summaryrefslogtreecommitdiff
path: root/cmd/generate/forge_part_pipermail.go
diff options
context:
space:
mode:
Diffstat (limited to 'cmd/generate/forge_part_pipermail.go')
-rw-r--r--cmd/generate/forge_part_pipermail.go192
1 files changed, 192 insertions, 0 deletions
diff --git a/cmd/generate/forge_part_pipermail.go b/cmd/generate/forge_part_pipermail.go
new file mode 100644
index 0000000..af6a009
--- /dev/null
+++ b/cmd/generate/forge_part_pipermail.go
@@ -0,0 +1,192 @@
+package main
+
+import (
+ "compress/gzip"
+ "errors"
+ "fmt"
+ "net/mail"
+ "net/url"
+ "os"
+ "regexp"
+ "strconv"
+ "strings"
+ "time"
+
+ "git.lukeshu.com/www/cmd/generate/mailstuff"
+)
+
+var (
+ rePiperMailMessage = regexp.MustCompile(`^(https?://.*/pipermail/.*/)([0-4]{4}-(?:January|February|March|April|May|June|July|August|September|October|November|December))/([0-9]+)\.html$`)
+ rePiperMailDate = regexp.MustCompile(`^\s*<I>([^<]+)</I>\s*$`)
+ rePiperMailReply = regexp.MustCompile(`^\s*<LINK REL="made" HREF="(.*)">\s*$`)
+)
+
+type PartPiperMail struct{}
+
+var _ Forge = PartPiperMail{}
+
+func (PartPiperMail) FetchStatus(urls []string) (string, error) {
+ return "", nil
+}
+
+func (PartPiperMail) FetchSubmittedAt(urls []string) (time.Time, error) {
+ for _, u := range urls {
+ if !rePiperMailMessage.MatchString(u) {
+ continue
+ }
+ htmlStr, err := httpGet(u, nil)
+ if err != nil {
+ return time.Time{}, err
+ }
+ for _, line := range strings.Split(htmlStr, "\n") {
+ if m := rePiperMailDate.FindStringSubmatch(line); m != nil {
+ return time.Parse(time.UnixDate, m[1])
+ }
+ }
+ }
+ return time.Time{}, nil
+}
+
+func (PartPiperMail) nextMonth(ym string) string {
+ yStr, mStr, ok := strings.Cut(ym, "-")
+ if !ok {
+ panic(fmt.Errorf("invalid year-month: %q", ym))
+ }
+ switch mStr {
+ case "January":
+ return yStr + "-February"
+ case "February":
+ return yStr + "-March"
+ case "March":
+ return yStr + "-April"
+ case "April":
+ return yStr + "-May"
+ case "May":
+ return yStr + "-June"
+ case "June":
+ return yStr + "-July"
+ case "July":
+ return yStr + "-August"
+ case "August":
+ return yStr + "-September"
+ case "September":
+ return yStr + "-October"
+ case "October":
+ return yStr + "-November"
+ case "November":
+ return yStr + "-December"
+ case "December":
+ y, _ := strconv.Atoi(yStr)
+ return fmt.Sprintf("%d-January", y+1)
+ default:
+ panic(fmt.Errorf("invalid year-month: %q", ym))
+ }
+}
+
+func (p PartPiperMail) threadLen(thread *mailstuff.ThreadedMessage) int {
+ if thread == nil {
+ return 0
+ }
+
+ ret := 0
+ if thread.Message != nil {
+ ret++
+ }
+ for child := range thread.Children {
+ ret += p.threadLen(child)
+ }
+ return ret
+}
+
+func (p PartPiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
+ for _, u := range urls {
+ m := rePiperMailMessage.FindStringSubmatch(u)
+ if m == nil {
+ continue
+ }
+ uBase := m[1]
+ uYM := m[2]
+ //uInt := m[3]
+
+ htmlStr, err := httpGet(u, nil)
+ if err != nil {
+ return time.Time{}, User{}, fmt.Errorf("could not fetch message: %w", err)
+ }
+ var msgid mailstuff.MessageID
+ for _, line := range strings.Split(htmlStr, "\n") {
+ if m := rePiperMailReply.FindStringSubmatch(line); m != nil {
+ ru, err := url.Parse(m[1])
+ if err != nil {
+ continue
+ }
+ if msgid = mailstuff.MessageID(ru.Query().Get("In-Reply-To")); msgid != "" {
+ break
+ }
+ }
+ }
+ if msgid == "" {
+ continue
+ }
+
+ var thread *mailstuff.ThreadedMessage
+ for ym, mbox := uYM, []*mail.Message(nil); true; ym = p.nextMonth(ym) {
+ lenBefore := p.threadLen(thread)
+
+ mboxGzStr, err := httpGet(uBase+ym+".txt.gz", nil)
+ if err != nil {
+ if ym == uYM || !errors.Is(err, os.ErrNotExist) {
+ return time.Time{}, User{}, fmt.Errorf("could not fetch mbox for %s: %w", ym, err)
+ }
+ break
+ }
+ gzReader, err := gzip.NewReader(strings.NewReader(mboxGzStr))
+ if err != nil {
+ return time.Time{}, User{}, fmt.Errorf("could not read mbox gz: %w", err)
+ }
+ _mbox, err := mailstuff.ReadMBox(gzReader)
+ if err != nil {
+ gzReader.Close()
+ return time.Time{}, User{}, fmt.Errorf("could not parse mbox: %w", err)
+ }
+ if err := gzReader.Close(); err != nil {
+ return time.Time{}, User{}, fmt.Errorf("close gz: %w", err)
+ }
+ mbox = append(mbox, _mbox...)
+ _, messages := mailstuff.ThreadMessages(mbox)
+ thread = messages[msgid]
+
+ if p.threadLen(thread) == lenBefore {
+ break
+ }
+ }
+ if thread == nil {
+ continue
+ }
+
+ var retTime time.Time
+ var retUser User
+
+ var walk func(*mailstuff.ThreadedMessage)
+ walk = func(msg *mailstuff.ThreadedMessage) {
+ date, dateErr := msg.Header.Date()
+ froms, fromErr := msg.Header.AddressList("From")
+ if dateErr == nil && fromErr == nil && len(froms) > 0 && (retTime.IsZero() || date.After(retTime)) {
+ retTime = date
+ retUser.Name = froms[0].Name
+ if retUser.Name == "" {
+ retUser.Name = froms[0].Address
+ }
+ retUser.URL = "mailto:" + froms[0].Address
+ }
+ for child := range msg.Children {
+ walk(child)
+ }
+ }
+ walk(thread)
+
+ if !retTime.IsZero() {
+ return retTime, retUser, nil
+ }
+ }
+ return time.Time{}, User{}, nil
+}