summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke T. Shumaker <lukeshu@lukeshu.com>2024-06-08 21:45:35 -0600
committerLuke T. Shumaker <lukeshu@lukeshu.com>2024-06-08 21:47:34 -0600
commit0bd9be7a0ba08fdecb065790f48d2704c88d22c0 (patch)
tree8460db4aef7edbb8ae30d17227fa468a4d08d811
parent8cc4eb82796727f20accfce8d049f677e6210824 (diff)
fix pipermail threading
-rw-r--r--cmd/generate/forge_pipermail.go32
1 files changed, 25 insertions, 7 deletions
diff --git a/cmd/generate/forge_pipermail.go b/cmd/generate/forge_pipermail.go
index 56e7ef2..ccc58f0 100644
--- a/cmd/generate/forge_pipermail.go
+++ b/cmd/generate/forge_pipermail.go
@@ -1,6 +1,7 @@
package main
import (
+ "compress/gzip"
"errors"
"fmt"
"net/mail"
@@ -17,7 +18,7 @@ import (
var (
rePiperMailMessage = regexp.MustCompile(`^(https?://.*/pipermail/.*/)([0-4]{4}-(?:January|February|March|April|May|June|July|August|September|October|November|December))/([0-9]+)\.html$`)
rePiperMailDate = regexp.MustCompile(`^\s*<I>([^<]+)</I>\s*$`)
- rePiperMailReply = regexp.MustCompile(`^\s*<LINK REL="made" HREF="(.*)">\s$`)
+ rePiperMailReply = regexp.MustCompile(`^\s*<LINK REL="made" HREF="(.*)">\s*$`)
)
type PiperMail struct{}
@@ -109,7 +110,7 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
htmlStr, err := httpGet(u, nil)
if err != nil {
- return time.Time{}, User{}, err
+ return time.Time{}, User{}, fmt.Errorf("could not fetch message: %w", err)
}
var msgid mailstuff.MessageID
for _, line := range strings.Split(htmlStr, "\n") {
@@ -131,13 +132,24 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
for ym, mbox := uYM, []*mail.Message(nil); true; ym = p.nextMonth(ym) {
lenBefore := p.threadLen(thread)
- mboxStr, err := httpGet(uBase+ym+".txt.gz", nil)
- if err != nil && (ym == uYM || !errors.Is(err, os.ErrNotExist)) {
- return time.Time{}, User{}, err
+ mboxGzStr, err := httpGet(uBase+ym+".txt.gz", nil)
+ if err != nil {
+ if (ym == uYM || !errors.Is(err, os.ErrNotExist)) {
+ return time.Time{}, User{}, fmt.Errorf("could not fetch mbox for %s: %w", ym, err)
+ }
+ break
+ }
+ gzReader, err := gzip.NewReader(strings.NewReader(mboxGzStr))
+ if err != nil {
+ return time.Time{}, User{}, fmt.Errorf("could not read mbox gz: %w", err)
}
- _mbox, err := mailstuff.ReadMBox(strings.NewReader(mboxStr))
+ _mbox, err := mailstuff.ReadMBox(gzReader)
if err != nil {
- return time.Time{}, User{}, err
+ gzReader.Close()
+ return time.Time{}, User{}, fmt.Errorf("could not parse mbox: %w", err)
+ }
+ if err := gzReader.Close(); err != nil {
+ return time.Time{}, User{}, fmt.Errorf("close gz: %w", err)
}
mbox = append(mbox, _mbox...)
_, messages := mailstuff.ThreadMessages(mbox)
@@ -147,6 +159,9 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
break
}
}
+ if thread == nil {
+ continue
+ }
var retTime time.Time
var retUser User
@@ -163,6 +178,9 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
}
retUser.URL = "mailto:" + froms[0].Address
}
+ for child := range msg.Children {
+ walk(child)
+ }
}
walk(thread)