diff options
author | Luke T. Shumaker <lukeshu@lukeshu.com> | 2024-06-10 11:31:53 -0600 |
---|---|---|
committer | Luke T. Shumaker <lukeshu@lukeshu.com> | 2024-06-10 11:31:53 -0600 |
commit | 290354461422fff8c05ef9ce37ba154641e3f8fc (patch) | |
tree | 505e4faa4d6fba1e7d0b4addbda39a4e5beb26fd /cmd/generate/forge_pipermail.go | |
parent | 81c8d44fba88dbb049c48363d6b7697224b98a2e (diff) |
Diffstat (limited to 'cmd/generate/forge_pipermail.go')
-rw-r--r-- | cmd/generate/forge_pipermail.go | 192 |
1 files changed, 0 insertions, 192 deletions
diff --git a/cmd/generate/forge_pipermail.go b/cmd/generate/forge_pipermail.go deleted file mode 100644 index ccc58f0..0000000 --- a/cmd/generate/forge_pipermail.go +++ /dev/null @@ -1,192 +0,0 @@ -package main - -import ( - "compress/gzip" - "errors" - "fmt" - "net/mail" - "net/url" - "os" - "regexp" - "strconv" - "strings" - "time" - - "git.lukeshu.com/www/cmd/generate/mailstuff" -) - -var ( - rePiperMailMessage = regexp.MustCompile(`^(https?://.*/pipermail/.*/)([0-4]{4}-(?:January|February|March|April|May|June|July|August|September|October|November|December))/([0-9]+)\.html$`) - rePiperMailDate = regexp.MustCompile(`^\s*<I>([^<]+)</I>\s*$`) - rePiperMailReply = regexp.MustCompile(`^\s*<LINK REL="made" HREF="(.*)">\s*$`) -) - -type PiperMail struct{} - -var _ Forge = PiperMail{} - -func (PiperMail) FetchStatus(urls []string) (string, error) { - return "", nil -} - -func (PiperMail) FetchSubmittedAt(urls []string) (time.Time, error) { - for _, u := range urls { - if !rePiperMailMessage.MatchString(u) { - continue - } - htmlStr, err := httpGet(u, nil) - if err != nil { - return time.Time{}, err - } - for _, line := range strings.Split(htmlStr, "\n") { - if m := rePiperMailDate.FindStringSubmatch(line); m != nil { - return time.Parse(time.UnixDate, m[1]) - } - } - } - return time.Time{}, nil -} - -func (PiperMail) nextMonth(ym string) string { - yStr, mStr, ok := strings.Cut(ym, "-") - if !ok { - panic(fmt.Errorf("invalid year-month: %q", ym)) - } - switch mStr { - case "January": - return yStr + "-February" - case "February": - return yStr + "-March" - case "March": - return yStr + "-April" - case "April": - return yStr + "-May" - case "May": - return yStr + "-June" - case "June": - return yStr + "-July" - case "July": - return yStr + "-August" - case "August": - return yStr + "-September" - case "September": - return yStr + "-October" - case "October": - return yStr + "-November" - case "November": - return yStr + "-December" - case "December": - y, _ := strconv.Atoi(yStr) - return fmt.Sprintf("%d-January", y+1) - default: - panic(fmt.Errorf("invalid year-month: %q", ym)) - } -} - -func (p PiperMail) threadLen(thread *mailstuff.ThreadedMessage) int { - if thread == nil { - return 0 - } - - ret := 0 - if thread.Message != nil { - ret++ - } - for child := range thread.Children { - ret += p.threadLen(child) - } - return ret -} - -func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { - for _, u := range urls { - m := rePiperMailMessage.FindStringSubmatch(u) - if m == nil { - continue - } - uBase := m[1] - uYM := m[2] - //uInt := m[3] - - htmlStr, err := httpGet(u, nil) - if err != nil { - return time.Time{}, User{}, fmt.Errorf("could not fetch message: %w", err) - } - var msgid mailstuff.MessageID - for _, line := range strings.Split(htmlStr, "\n") { - if m := rePiperMailReply.FindStringSubmatch(line); m != nil { - ru, err := url.Parse(m[1]) - if err != nil { - continue - } - if msgid = mailstuff.MessageID(ru.Query().Get("In-Reply-To")); msgid != "" { - break - } - } - } - if msgid == "" { - continue - } - - var thread *mailstuff.ThreadedMessage - for ym, mbox := uYM, []*mail.Message(nil); true; ym = p.nextMonth(ym) { - lenBefore := p.threadLen(thread) - - mboxGzStr, err := httpGet(uBase+ym+".txt.gz", nil) - if err != nil { - if (ym == uYM || !errors.Is(err, os.ErrNotExist)) { - return time.Time{}, User{}, fmt.Errorf("could not fetch mbox for %s: %w", ym, err) - } - break - } - gzReader, err := gzip.NewReader(strings.NewReader(mboxGzStr)) - if err != nil { - return time.Time{}, User{}, fmt.Errorf("could not read mbox gz: %w", err) - } - _mbox, err := mailstuff.ReadMBox(gzReader) - if err != nil { - gzReader.Close() - return time.Time{}, User{}, fmt.Errorf("could not parse mbox: %w", err) - } - if err := gzReader.Close(); err != nil { - return time.Time{}, User{}, fmt.Errorf("close gz: %w", err) - } - mbox = append(mbox, _mbox...) - _, messages := mailstuff.ThreadMessages(mbox) - thread = messages[msgid] - - if p.threadLen(thread) == lenBefore { - break - } - } - if thread == nil { - continue - } - - var retTime time.Time - var retUser User - - var walk func(*mailstuff.ThreadedMessage) - walk = func(msg *mailstuff.ThreadedMessage) { - date, dateErr := msg.Header.Date() - froms, fromErr := msg.Header.AddressList("From") - if dateErr == nil && fromErr == nil && len(froms) > 0 && (retTime.IsZero() || date.After(retTime)) { - retTime = date - retUser.Name = froms[0].Name - if retUser.Name == "" { - retUser.Name = froms[0].Address - } - retUser.URL = "mailto:" + froms[0].Address - } - for child := range msg.Children { - walk(child) - } - } - walk(thread) - - if !retTime.IsZero() { - return retTime, retUser, nil - } - } - return time.Time{}, User{}, nil -} |