From 0bd9be7a0ba08fdecb065790f48d2704c88d22c0 Mon Sep 17 00:00:00 2001 From: "Luke T. Shumaker" Date: Sat, 8 Jun 2024 21:45:35 -0600 Subject: fix pipermail threading --- cmd/generate/forge_pipermail.go | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'cmd/generate/forge_pipermail.go') diff --git a/cmd/generate/forge_pipermail.go b/cmd/generate/forge_pipermail.go index 56e7ef2..ccc58f0 100644 --- a/cmd/generate/forge_pipermail.go +++ b/cmd/generate/forge_pipermail.go @@ -1,6 +1,7 @@ package main import ( + "compress/gzip" "errors" "fmt" "net/mail" @@ -17,7 +18,7 @@ import ( var ( rePiperMailMessage = regexp.MustCompile(`^(https?://.*/pipermail/.*/)([0-4]{4}-(?:January|February|March|April|May|June|July|August|September|October|November|December))/([0-9]+)\.html$`) rePiperMailDate = regexp.MustCompile(`^\s*([^<]+)\s*$`) - rePiperMailReply = regexp.MustCompile(`^\s*\s$`) + rePiperMailReply = regexp.MustCompile(`^\s*\s*$`) ) type PiperMail struct{} @@ -109,7 +110,7 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { htmlStr, err := httpGet(u, nil) if err != nil { - return time.Time{}, User{}, err + return time.Time{}, User{}, fmt.Errorf("could not fetch message: %w", err) } var msgid mailstuff.MessageID for _, line := range strings.Split(htmlStr, "\n") { @@ -131,13 +132,24 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { for ym, mbox := uYM, []*mail.Message(nil); true; ym = p.nextMonth(ym) { lenBefore := p.threadLen(thread) - mboxStr, err := httpGet(uBase+ym+".txt.gz", nil) - if err != nil && (ym == uYM || !errors.Is(err, os.ErrNotExist)) { - return time.Time{}, User{}, err + mboxGzStr, err := httpGet(uBase+ym+".txt.gz", nil) + if err != nil { + if (ym == uYM || !errors.Is(err, os.ErrNotExist)) { + return time.Time{}, User{}, fmt.Errorf("could not fetch mbox for %s: %w", ym, err) + } + break + } + gzReader, err := gzip.NewReader(strings.NewReader(mboxGzStr)) + if err != nil { + return time.Time{}, User{}, fmt.Errorf("could not read mbox gz: %w", err) } - _mbox, err := mailstuff.ReadMBox(strings.NewReader(mboxStr)) + _mbox, err := mailstuff.ReadMBox(gzReader) if err != nil { - return time.Time{}, User{}, err + gzReader.Close() + return time.Time{}, User{}, fmt.Errorf("could not parse mbox: %w", err) + } + if err := gzReader.Close(); err != nil { + return time.Time{}, User{}, fmt.Errorf("close gz: %w", err) } mbox = append(mbox, _mbox...) _, messages := mailstuff.ThreadMessages(mbox) @@ -147,6 +159,9 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { break } } + if thread == nil { + continue + } var retTime time.Time var retUser User @@ -163,6 +178,9 @@ func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) { } retUser.URL = "mailto:" + froms[0].Address } + for child := range msg.Children { + walk(child) + } } walk(thread) -- cgit