diff options
Diffstat (limited to 'cmd/generate/forge_part_pipermail.go')
-rw-r--r-- | cmd/generate/forge_part_pipermail.go | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/cmd/generate/forge_part_pipermail.go b/cmd/generate/forge_part_pipermail.go new file mode 100644 index 0000000..af6a009 --- /dev/null +++ b/cmd/generate/forge_part_pipermail.go @@ -0,0 +1,192 @@ +package main + +import ( + "compress/gzip" + "errors" + "fmt" + "net/mail" + "net/url" + "os" + "regexp" + "strconv" + "strings" + "time" + + "git.lukeshu.com/www/cmd/generate/mailstuff" +) + +var ( + rePiperMailMessage = regexp.MustCompile(`^(https?://.*/pipermail/.*/)([0-4]{4}-(?:January|February|March|April|May|June|July|August|September|October|November|December))/([0-9]+)\.html$`) + rePiperMailDate = regexp.MustCompile(`^\s*<I>([^<]+)</I>\s*$`) + rePiperMailReply = regexp.MustCompile(`^\s*<LINK REL="made" HREF="(.*)">\s*$`) +) + +type PartPiperMail struct{} + +var _ Forge = PartPiperMail{} + +func (PartPiperMail) FetchStatus(urls []string) (string, error) { + return "", nil +} + +func (PartPiperMail) FetchSubmittedAt(urls []string) (time.Time, error) { + for _, u := range urls { + if !rePiperMailMessage.MatchString(u) { + continue + } + htmlStr, err := httpGet(u, nil) + if err != nil { + return time.Time{}, err + } + for _, line := range strings.Split(htmlStr, "\n") { + if m := rePiperMailDate.FindStringSubmatch(line); m != nil { + return time.Parse(time.UnixDate, m[1]) + } + } + } + return time.Time{}, nil +} + +func (PartPiperMail) nextMonth(ym string) string { + yStr, mStr, ok := strings.Cut(ym, "-") + if !ok { + panic(fmt.Errorf("invalid year-month: %q", ym)) + } + switch mStr { + case "January": + return yStr + "-February" + case "February": + return yStr + "-March" + case "March": + return yStr + "-April" + case "April": + return yStr + "-May" + case "May": + return yStr + "-June" + case "June": + return yStr + "-July" + case "July": + return yStr + "-August" + case "August": + return yStr + "-September" + case "September": + return yStr + "-October" + case "October": + return yStr + "-November" + case "November": + return yStr + "-December" + case "December": + y, _ := strconv.Atoi(yStr) + return fmt.Sprintf("%d-January", y+1) + default: + panic(fmt.Errorf("invalid year-month: %q", ym)) + } +} + +func (p PartPiperMail) threadLen(thread *mailstuff.ThreadedMessage) int { + if thread == nil { + return 0 + } + + ret := 0 + if thread.Message != nil { + ret++ + } + for child := range thread.Children { + ret += p.threadLen(child) + } + return ret +} + +func (p PartPiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { + for _, u := range urls { + m := rePiperMailMessage.FindStringSubmatch(u) + if m == nil { + continue + } + uBase := m[1] + uYM := m[2] + //uInt := m[3] + + htmlStr, err := httpGet(u, nil) + if err != nil { + return time.Time{}, User{}, fmt.Errorf("could not fetch message: %w", err) + } + var msgid mailstuff.MessageID + for _, line := range strings.Split(htmlStr, "\n") { + if m := rePiperMailReply.FindStringSubmatch(line); m != nil { + ru, err := url.Parse(m[1]) + if err != nil { + continue + } + if msgid = mailstuff.MessageID(ru.Query().Get("In-Reply-To")); msgid != "" { + break + } + } + } + if msgid == "" { + continue + } + + var thread *mailstuff.ThreadedMessage + for ym, mbox := uYM, []*mail.Message(nil); true; ym = p.nextMonth(ym) { + lenBefore := p.threadLen(thread) + + mboxGzStr, err := httpGet(uBase+ym+".txt.gz", nil) + if err != nil { + if ym == uYM || !errors.Is(err, os.ErrNotExist) { + return time.Time{}, User{}, fmt.Errorf("could not fetch mbox for %s: %w", ym, err) + } + break + } + gzReader, err := gzip.NewReader(strings.NewReader(mboxGzStr)) + if err != nil { + return time.Time{}, User{}, fmt.Errorf("could not read mbox gz: %w", err) + } + _mbox, err := mailstuff.ReadMBox(gzReader) + if err != nil { + gzReader.Close() + return time.Time{}, User{}, fmt.Errorf("could not parse mbox: %w", err) + } + if err := gzReader.Close(); err != nil { + return time.Time{}, User{}, fmt.Errorf("close gz: %w", err) + } + mbox = append(mbox, _mbox...) + _, messages := mailstuff.ThreadMessages(mbox) + thread = messages[msgid] + + if p.threadLen(thread) == lenBefore { + break + } + } + if thread == nil { + continue + } + + var retTime time.Time + var retUser User + + var walk func(*mailstuff.ThreadedMessage) + walk = func(msg *mailstuff.ThreadedMessage) { + date, dateErr := msg.Header.Date() + froms, fromErr := msg.Header.AddressList("From") + if dateErr == nil && fromErr == nil && len(froms) > 0 && (retTime.IsZero() || date.After(retTime)) { + retTime = date + retUser.Name = froms[0].Name + if retUser.Name == "" { + retUser.Name = froms[0].Address + } + retUser.URL = "mailto:" + froms[0].Address + } + for child := range msg.Children { + walk(child) + } + } + walk(thread) + + if !retTime.IsZero() { + return retTime, retUser, nil + } + } + return time.Time{}, User{}, nil +} |