summaryrefslogtreecommitdiff
path: root/cmd/generate
diff options
context:
space:
mode:
authorLuke T. Shumaker <lukeshu@lukeshu.com>2024-06-08 17:51:41 -0600
committerLuke T. Shumaker <lukeshu@lukeshu.com>2024-06-08 17:51:41 -0600
commit5dc2e9533a111d75ff91a56dd50af8e03ebf5f5f (patch)
treee90d2b74612ecb44fb0e41a19e44483f90a071ba /cmd/generate
parentf6080300406a674419dba5005c76bc424df35502 (diff)
wip pipermail threading
Diffstat (limited to 'cmd/generate')
-rw-r--r--cmd/generate/forge_github.go68
-rw-r--r--cmd/generate/forge_pipermail.go133
-rw-r--r--cmd/generate/mailstuff/mbox.go38
-rw-r--r--cmd/generate/mailstuff/thread.go248
-rw-r--r--cmd/generate/src_contribs.go19
5 files changed, 445 insertions, 61 deletions
diff --git a/cmd/generate/forge_github.go b/cmd/generate/forge_github.go
index d3618ce..d29e3f7 100644
--- a/cmd/generate/forge_github.go
+++ b/cmd/generate/forge_github.go
@@ -7,7 +7,10 @@ import (
"time"
)
-var reGitHubPR = regexp.MustCompile(`^https://github\.com/([^/?#]+)/([^/?#]+)/pull/([0-9]+)(?:\?[^#]*)?(?:#.*)?$`)
+var (
+ reGitHubPR = regexp.MustCompile(`^https://github\.com/([^/?#]+)/([^/?#]+)/pull/([0-9]+)(?:\?[^#]*)?(?:#.*)?$`)
+ reGitHubCommit = regexp.MustCompile(`^https://github\.com/([^/?#]+)/([^/?#]+)/commit/([0-9a-f]+)(?:\?[^#]*)?(?:#.*)?$`)
+)
func githubPagination(i int) url.Values {
params := make(url.Values)
@@ -20,6 +23,7 @@ type GitHub struct{}
var _ Forge = GitHub{}
func (GitHub) FetchStatus(urls []string) (string, error) {
+ // PR
for _, u := range urls {
m := reGitHubPR.FindStringSubmatch(u)
if m == nil {
@@ -54,6 +58,31 @@ func (GitHub) FetchStatus(urls []string) (string, error) {
return ret, nil
}
+ // Commits from a non-PR
+ var gitURL string
+ var gitCommits []string
+ for _, u := range urls {
+ if m := reGitHubCommit.FindStringSubmatch(u); m != nil {
+ user := m[1]
+ repo := m[2]
+ hash := m[3]
+
+ gitURL = "https://github.com/" + user + "/" + repo
+ gitCommits = append(gitCommits, hash)
+ }
+ }
+ if len(gitCommits) > 0 {
+ ret := statusMerged
+ tag, err := getGitTagThatContainsAll(gitURL, gitCommits...)
+ if err != nil {
+ return "", err
+ }
+ if tag != "" {
+ ret = fmt.Sprintf(statusReleasedFmt, tag)
+ }
+ return ret, nil
+ }
+ // Nope
return "", nil
}
@@ -81,6 +110,7 @@ func (GitHub) FetchSubmittedAt(urls []string) (time.Time, error) {
}
func (GitHub) FetchLastUpdated(urls []string) (time.Time, User, error) {
+ // PR
for _, u := range urls {
m := reGitHubPR.FindStringSubmatch(u)
if m == nil {
@@ -184,5 +214,41 @@ func (GitHub) FetchLastUpdated(urls []string) (time.Time, User, error) {
return retUpdatedAt, retUser, nil
}
+ // Commits from a non-PR
+ {
+ var ret time.Time
+ for _, u := range urls {
+ if m := reGitHubCommit.FindStringSubmatch(u); m != nil {
+ user := m[1]
+ repo := m[2]
+ hash := m[3]
+
+ urlStr := "https://api.github.com/repos/" + user + "/" + repo + "/commits/" + hash
+ var obj struct {
+ Commit struct {
+ Author struct {
+ Date time.Time `json:"date"`
+ } `json:"author"`
+ Committer struct {
+ Date time.Time `json:"date"`
+ } `json:"committer"`
+ } `json:"commit"`
+ }
+ if err := httpGetJSON(urlStr, nil, &obj); err != nil {
+ return time.Time{}, User{}, err
+ }
+ if obj.Commit.Author.Date.After(ret) {
+ ret = obj.Commit.Author.Date
+ }
+ if obj.Commit.Committer.Date.After(ret) {
+ ret = obj.Commit.Committer.Date
+ }
+ }
+ }
+ if ret.IsZero() {
+ return time.Time{}, User{}, nil
+ }
+ }
+ // Nope
return time.Time{}, User{}, nil
}
diff --git a/cmd/generate/forge_pipermail.go b/cmd/generate/forge_pipermail.go
index 2c5cf01..e015bb5 100644
--- a/cmd/generate/forge_pipermail.go
+++ b/cmd/generate/forge_pipermail.go
@@ -2,14 +2,17 @@ package main
import (
"fmt"
+ "net/url"
"regexp"
+ "strconv"
"strings"
"time"
)
var (
- rePiperMailDate = regexp.MustCompile(`^\s*<I>([^<]+)</I>\s*$`)
- reGitHubCommit = regexp.MustCompile(`^https://github\.com/([^/?#]+)/([^/?#]+)/commit/([0-9a-f]+)(?:\?[^#]*)?(?:#.*)?$`)
+ rePiperMailMessage = regexp.MustCompile(`^(https?://.*/pipermail/.*/)([0-4]{4}-(?:January|February|March|April|May|June|July|August|September|October|November|December))/([0-9]+)\.html$`)
+ rePiperMailDate = regexp.MustCompile(`^\s*<I>([^<]+)</I>\s*$`)
+ rePiperMailReply = regexp.MustCompile(`^\s*<LINK REL="made" HREF="(.*)">\s$`)
)
type PiperMail struct{}
@@ -17,35 +20,12 @@ type PiperMail struct{}
var _ Forge = PiperMail{}
func (PiperMail) FetchStatus(urls []string) (string, error) {
- var gitURL string
- var gitCommits []string
- for _, u := range urls {
- if m := reGitHubCommit.FindStringSubmatch(u); m != nil {
- user := m[1]
- repo := m[2]
- hash := m[3]
-
- gitURL = "https://github.com/" + user + "/" + repo
- gitCommits = append(gitCommits, hash)
- }
- }
- if len(gitCommits) == 0 {
- return "", nil
- }
- ret := statusMerged
- tag, err := getGitTagThatContainsAll(gitURL, gitCommits...)
- if err != nil {
- return "", err
- }
- if tag != "" {
- ret = fmt.Sprintf(statusReleasedFmt, tag)
- }
- return ret, nil
+ return "", nil
}
func (PiperMail) FetchSubmittedAt(urls []string) (time.Time, error) {
for _, u := range urls {
- if !strings.Contains(u, "/pipermail/") {
+ if !rePiperMailMessage.MatchString(u) {
continue
}
htmlStr, err := httpGet(u, nil)
@@ -61,38 +41,79 @@ func (PiperMail) FetchSubmittedAt(urls []string) (time.Time, error) {
return time.Time{}, nil
}
-func (PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
- var ret time.Time
+func (PiperMail) nextMonth(ym string) string {
+ yStr, mStr, ok := strings.Cut(ym, "-")
+ if !ok {
+ panic(fmt.Errorf("invalid year-month: %q", ym))
+ }
+ switch mStr {
+ case "January":
+ return yStr + "-February"
+ case "February":
+ return yStr + "-March"
+ case "March":
+ return yStr + "-April"
+ case "April":
+ return yStr + "-May"
+ case "May":
+ return yStr + "-June"
+ case "June":
+ return yStr + "-July"
+ case "July":
+ return yStr + "-August"
+ case "August":
+ return yStr + "-September"
+ case "September":
+ return yStr + "-October"
+ case "October":
+ return yStr + "-November"
+ case "November":
+ return yStr + "-December"
+ case "December":
+ y, _ := strconv.Atoi(yStr)
+ return fmt.Sprintf("%d-January", y+1)
+ default:
+ panic(fmt.Errorf("invalid year-month: %q", ym))
+ }
+}
+
+func (PiperMail) messageID(u string) (string, error) {
+}
+
+
+func (p PiperMail) FetchLastUpdated(urls []string) (time.Time, User, error) {
for _, u := range urls {
- if m := reGitHubCommit.FindStringSubmatch(u); m != nil {
- user := m[1]
- repo := m[2]
- hash := m[3]
+ m := rePiperMailMessage.FindStringSubmatch(u)
+ if m == nil {
+ continue
+ }
+ uBase := m[1]
+ uYM := m[2]
+ //uInt := m[3]
- urlStr := "https://api.github.com/repos/" + user + "/" + repo + "/commits/" + hash
- var obj struct {
- Commit struct {
- Author struct {
- Date time.Time `json:"date"`
- } `json:"author"`
- Committer struct {
- Date time.Time `json:"date"`
- } `json:"committer"`
- } `json:"commit"`
- }
- if err := httpGetJSON(urlStr, nil, &obj); err != nil {
- return time.Time{}, User{}, err
- }
- if obj.Commit.Author.Date.After(ret) {
- ret = obj.Commit.Author.Date
- }
- if obj.Commit.Committer.Date.After(ret) {
- ret = obj.Commit.Committer.Date
+ htmlStr, err := httpGet(u, nil)
+ if err != nil {
+ return time.Time{}, User{}, err
+ }
+ var msgid string
+ for _, line := range strings.Split(htmlStr, "\n") {
+ if m := rePiperMailReply.FindStringSubmatch(line); m != nil {
+ ru, err := url.Parse(m[1])
+ if err != nil {
+ continue
+ }
+ if msgid = ru.Query().Get("In-Reply-To"); msgid != "" {
+ break
+ }
}
}
+ if msgid == "" {
+ continue
+ }
+ mboxStr, err := httpGet(uBase+uYM+".txt.gz", nil)
+ if err != nil {
+ return time.Time{}, User{}, err
+ }
}
- if ret.IsZero() {
- return time.Time{}, User{}, nil
- }
- return ret, User{}, nil
+ return time.Time{}, User{}, nil
}
diff --git a/cmd/generate/mailstuff/mbox.go b/cmd/generate/mailstuff/mbox.go
new file mode 100644
index 0000000..8700c24
--- /dev/null
+++ b/cmd/generate/mailstuff/mbox.go
@@ -0,0 +1,38 @@
+package mailstuff
+
+import (
+ "bytes"
+ "io"
+ "net/mail"
+)
+
+func ReadMBox(r io.Reader) ([]*mail.Message, error) {
+ rest, err := io.ReadAll(r)
+ if err != nil {
+ return nil, err
+ }
+
+ const terminator = "\nFrom "
+
+ var parts [][]byte
+ for {
+ pos := bytes.Index(rest, []byte(terminator))
+ if pos < 0 {
+ parts = append(parts, rest)
+ break
+ }
+ parts = append(parts, rest[:pos+1])
+ rest = rest[pos+1:]
+ }
+
+ ret := make([]*mail.Message, len(parts))
+ for i := range len(parts) {
+ msg, err := mail.ReadMessage(bytes.NewReader(parts[i]))
+ if err != nil {
+ return nil, err
+ }
+ ret[i] = msg
+ }
+
+ return ret, nil
+}
diff --git a/cmd/generate/mailstuff/thread.go b/cmd/generate/mailstuff/thread.go
new file mode 100644
index 0000000..c6fa181
--- /dev/null
+++ b/cmd/generate/mailstuff/thread.go
@@ -0,0 +1,248 @@
+package mailstuff
+
+import (
+ "regexp"
+ "strings"
+)
+
+type set[T comparable] map[T]struct{}
+
+func (s set[T]) Insert(val T) {
+ s[val] = struct{}{}
+}
+
+func (s set[T]) Has(val T) bool {
+ _, ok := s[val]
+ return ok
+}
+
+func (s set[T]) PickOne() T {
+ for v := range s {
+ return v
+ }
+ var zero T
+ return zero
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// https://www.jwz.org/doc/threading.html
+
+// Definitions /////////////////////////////////////////////////////////////////
+
+type jwzContainer struct {
+ Message *jwzMessage
+ Parent *jwzContainer
+ Children set[*jwzContainer]
+}
+
+type jwzMessage struct {
+ Subject string
+ ID jwzID
+ References []jwzID
+}
+
+type jwzID string
+
+func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool {
+ if ancestor == descendent {
+ return true
+ }
+ for child := range ancestor.Children {
+ if child.IsAncestorOf(descendent) {
+ return true
+ }
+ }
+ return false
+}
+
+// The Algorithm ///////////////////////////////////////////////////////////////
+
+var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`)
+
+func jwzThreadMessages(msgs map[jwzID]*jwzMessage) set[*jwzContainer] {
+ idTable := make(map[jwzID]*jwzContainer, len(msgs))
+
+ // 1. For each message
+ for _, msg := range msgs {
+ // A.
+ msgContainer := idTable[msg.ID]
+ if msgContainer != nil && msgContainer.Message == nil {
+ msgContainer.Message = msg
+ } else {
+ msgContainer = &jwzContainer{
+ Message: msg,
+ Children: make(set[*jwzContainer]),
+ }
+ idTable[msg.ID] = msgContainer
+ }
+ // B.
+ for _, refID := range msg.References {
+ refContainer := idTable[refID]
+ if refContainer == nil {
+ refContainer = &jwzContainer{
+ Children: make(set[*jwzContainer]),
+ }
+ idTable[refID] = refContainer
+ }
+ }
+ for i := 0; i+1 < len(msg.References); i++ {
+ parent := idTable[msg.References[i]]
+ child := idTable[msg.References[i+1]]
+ if !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) {
+ parent.Children.Insert(child)
+ child.Parent = parent
+ }
+ }
+ // C.
+ if len(msg.References) == 0 {
+ if msgContainer.Parent != nil {
+ delete(msgContainer.Parent.Children, msgContainer)
+ }
+ msgContainer.Parent = nil
+ } else {
+ msgContainer.Parent = idTable[msg.References[len(msg.References)-1]]
+ msgContainer.Parent.Children.Insert(msgContainer)
+ }
+ }
+
+ // 2. Find the root set
+ roots := make(set[*jwzContainer])
+ for _, container := range idTable {
+ if container.Parent == nil {
+ roots.Insert(container)
+ }
+ }
+
+ // 3. Discard id_table
+ idTable = nil
+
+ // 4. Prune empty containers
+ pseudoRoot := &jwzContainer{
+ Children: roots,
+ }
+ for root := range roots {
+ root.Parent = pseudoRoot
+ }
+ var recurse func(*jwzContainer)
+ recurse = func(container *jwzContainer) {
+ // Recurse. This is a touch complicated because
+ // `recurse(child)` might insert into
+ // `container.Children`, and those insertions might
+ // not be emitted by the range loop
+ for visited := make(set[*jwzContainer]); ; {
+ beforeSize := len(visited)
+ for child := range container.Children {
+ if visited.Has(child) {
+ continue
+ }
+ recurse(child)
+ visited.Insert(child)
+ }
+ if len(visited) == beforeSize {
+ break
+ }
+ }
+ // Main.
+ if container.Message == nil {
+ if len(container.Children) == 0 { // A.
+ delete(container.Parent.Children, container)
+ } else { // B.
+ if len(container.Children) == 1 || container.Parent != pseudoRoot {
+ for child := range container.Children {
+ container.Parent.Children.Insert(child)
+ child.Parent = container.Parent
+ }
+ delete(container.Parent.Children, container)
+ }
+ }
+ }
+ }
+ for root := range roots {
+ recurse(root)
+ }
+ for root := range roots {
+ root.Parent = nil
+ }
+ pseudoRoot = nil
+
+ // 5. Group root set by subject
+ // A.
+ subjectTable := make(map[string]*jwzContainer)
+ // B.
+ for root := range roots {
+ var subject string
+ if root.Message != nil {
+ subject = root.Message.Subject
+ } else {
+ subject = root.Children.PickOne().Message.Subject
+ }
+ prefix := jwzSubjectRE.FindString(subject)
+ subject = strings.TrimSpace(subject[len(prefix):])
+ if subject == "" {
+ continue
+ }
+ if other := subjectTable[subject]; other == nil {
+ subjectTable[subject] = root
+ } else if other.Message == nil {
+ subjectTable[subject] = root
+ } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" {
+ subjectTable[subject] = root
+ }
+ }
+ // C.
+ for root := range roots {
+ var subject string
+ if root.Message != nil {
+ subject = root.Message.Subject
+ } else {
+ subject = root.Children.PickOne().Message.Subject
+ }
+ prefix := jwzSubjectRE.FindString(subject)
+ subject = strings.TrimSpace(subject[len(prefix):])
+
+ other := subjectTable[subject]
+ if other == nil || other == root {
+ continue
+ }
+
+ switch {
+ case root.Message == nil && other.Message == nil:
+ for child := range root.Children {
+ other.Children.Insert(child)
+ child.Parent = other
+ }
+ delete(roots, root)
+ case (root.Message == nil) != (other.Message == nil):
+ var empty, nonEmpty *jwzContainer
+ if root.Message == nil {
+ empty = root
+ nonEmpty = other
+ } else {
+ empty = other
+ nonEmpty = root
+ }
+ empty.Children.Insert(nonEmpty)
+ nonEmpty.Parent = empty
+ case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "":
+ other.Children.Insert(root)
+ root.Parent = other
+ // skip the reverse of the above case--it happened implicitly
+ default:
+ newRoot := &jwzContainer{
+ Children: make(set[*jwzContainer], 2),
+ }
+ newRoot.Children.Insert(root)
+ root.Parent = newRoot
+ newRoot.Children.Insert(other)
+ other.Parent = newRoot
+ subjectTable[subject] = newRoot
+ roots.Insert(newRoot)
+ delete(roots, root)
+ delete(roots, other)
+ }
+ }
+
+ // 6. Now you're done threading
+ return roots
+}
diff --git a/cmd/generate/src_contribs.go b/cmd/generate/src_contribs.go
index 9c7bcd6..0ead1cd 100644
--- a/cmd/generate/src_contribs.go
+++ b/cmd/generate/src_contribs.go
@@ -130,15 +130,19 @@ func (c Contribution) fetchStatus() (string, error) {
}
func (c Contribution) fetchSubmittedAt() (time.Time, error) {
+ var ret time.Time
for _, forge := range forges {
submittedAt, err := forge.FetchSubmittedAt(c.URLs)
if err != nil {
return time.Time{}, err
}
- if !submittedAt.IsZero() {
- return submittedAt, nil
+ if !submittedAt.IsZero() && (ret.IsZero() || submittedAt.Before(ret)) {
+ ret = submittedAt
}
}
+ if !ret.IsZero() {
+ return ret, nil
+ }
return time.Time{}, fmt.Errorf("idk how to get created timestamp for %q", c.URLs[0])
}
@@ -151,14 +155,21 @@ func withinOneSecond(a, b time.Time) bool {
}
func (c Contribution) fetchLastUpdated() (time.Time, User, error) {
+ var ret struct {
+ time.Time
+ User
+ }
for _, forge := range forges {
updatedAt, updatedBy, err := forge.FetchLastUpdated(c.URLs)
if err != nil {
return time.Time{}, User{}, err
}
- if !updatedAt.IsZero() {
- return updatedAt, updatedBy, nil
+ if !updatedAt.IsZero() && (ret.Time.IsZero() || updatedAt.After(ret.Time)) {
+ ret.Time, ret.User = updatedAt, updatedBy
}
}
+ if !ret.Time.IsZero() {
+ return ret.Time, ret.User, nil
+ }
return time.Time{}, User{}, nil //fmt.Errorf("idk how to get updated timestamp for %q", c.URLs[0])
}