summaryrefslogtreecommitdiff
path: root/cmd/generate/mailstuff
diff options
context:
space:
mode:
Diffstat (limited to 'cmd/generate/mailstuff')
-rw-r--r--cmd/generate/mailstuff/mbox.go38
-rw-r--r--cmd/generate/mailstuff/thread.go248
2 files changed, 286 insertions, 0 deletions
diff --git a/cmd/generate/mailstuff/mbox.go b/cmd/generate/mailstuff/mbox.go
new file mode 100644
index 0000000..8700c24
--- /dev/null
+++ b/cmd/generate/mailstuff/mbox.go
@@ -0,0 +1,38 @@
+package mailstuff
+
+import (
+ "bytes"
+ "io"
+ "net/mail"
+)
+
+func ReadMBox(r io.Reader) ([]*mail.Message, error) {
+ rest, err := io.ReadAll(r)
+ if err != nil {
+ return nil, err
+ }
+
+ const terminator = "\nFrom "
+
+ var parts [][]byte
+ for {
+ pos := bytes.Index(rest, []byte(terminator))
+ if pos < 0 {
+ parts = append(parts, rest)
+ break
+ }
+ parts = append(parts, rest[:pos+1])
+ rest = rest[pos+1:]
+ }
+
+ ret := make([]*mail.Message, len(parts))
+ for i := range len(parts) {
+ msg, err := mail.ReadMessage(bytes.NewReader(parts[i]))
+ if err != nil {
+ return nil, err
+ }
+ ret[i] = msg
+ }
+
+ return ret, nil
+}
diff --git a/cmd/generate/mailstuff/thread.go b/cmd/generate/mailstuff/thread.go
new file mode 100644
index 0000000..c6fa181
--- /dev/null
+++ b/cmd/generate/mailstuff/thread.go
@@ -0,0 +1,248 @@
+package mailstuff
+
+import (
+ "regexp"
+ "strings"
+)
+
+type set[T comparable] map[T]struct{}
+
+func (s set[T]) Insert(val T) {
+ s[val] = struct{}{}
+}
+
+func (s set[T]) Has(val T) bool {
+ _, ok := s[val]
+ return ok
+}
+
+func (s set[T]) PickOne() T {
+ for v := range s {
+ return v
+ }
+ var zero T
+ return zero
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// https://www.jwz.org/doc/threading.html
+
+// Definitions /////////////////////////////////////////////////////////////////
+
+type jwzContainer struct {
+ Message *jwzMessage
+ Parent *jwzContainer
+ Children set[*jwzContainer]
+}
+
+type jwzMessage struct {
+ Subject string
+ ID jwzID
+ References []jwzID
+}
+
+type jwzID string
+
+func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool {
+ if ancestor == descendent {
+ return true
+ }
+ for child := range ancestor.Children {
+ if child.IsAncestorOf(descendent) {
+ return true
+ }
+ }
+ return false
+}
+
+// The Algorithm ///////////////////////////////////////////////////////////////
+
+var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`)
+
+func jwzThreadMessages(msgs map[jwzID]*jwzMessage) set[*jwzContainer] {
+ idTable := make(map[jwzID]*jwzContainer, len(msgs))
+
+ // 1. For each message
+ for _, msg := range msgs {
+ // A.
+ msgContainer := idTable[msg.ID]
+ if msgContainer != nil && msgContainer.Message == nil {
+ msgContainer.Message = msg
+ } else {
+ msgContainer = &jwzContainer{
+ Message: msg,
+ Children: make(set[*jwzContainer]),
+ }
+ idTable[msg.ID] = msgContainer
+ }
+ // B.
+ for _, refID := range msg.References {
+ refContainer := idTable[refID]
+ if refContainer == nil {
+ refContainer = &jwzContainer{
+ Children: make(set[*jwzContainer]),
+ }
+ idTable[refID] = refContainer
+ }
+ }
+ for i := 0; i+1 < len(msg.References); i++ {
+ parent := idTable[msg.References[i]]
+ child := idTable[msg.References[i+1]]
+ if !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) {
+ parent.Children.Insert(child)
+ child.Parent = parent
+ }
+ }
+ // C.
+ if len(msg.References) == 0 {
+ if msgContainer.Parent != nil {
+ delete(msgContainer.Parent.Children, msgContainer)
+ }
+ msgContainer.Parent = nil
+ } else {
+ msgContainer.Parent = idTable[msg.References[len(msg.References)-1]]
+ msgContainer.Parent.Children.Insert(msgContainer)
+ }
+ }
+
+ // 2. Find the root set
+ roots := make(set[*jwzContainer])
+ for _, container := range idTable {
+ if container.Parent == nil {
+ roots.Insert(container)
+ }
+ }
+
+ // 3. Discard id_table
+ idTable = nil
+
+ // 4. Prune empty containers
+ pseudoRoot := &jwzContainer{
+ Children: roots,
+ }
+ for root := range roots {
+ root.Parent = pseudoRoot
+ }
+ var recurse func(*jwzContainer)
+ recurse = func(container *jwzContainer) {
+ // Recurse. This is a touch complicated because
+ // `recurse(child)` might insert into
+ // `container.Children`, and those insertions might
+ // not be emitted by the range loop
+ for visited := make(set[*jwzContainer]); ; {
+ beforeSize := len(visited)
+ for child := range container.Children {
+ if visited.Has(child) {
+ continue
+ }
+ recurse(child)
+ visited.Insert(child)
+ }
+ if len(visited) == beforeSize {
+ break
+ }
+ }
+ // Main.
+ if container.Message == nil {
+ if len(container.Children) == 0 { // A.
+ delete(container.Parent.Children, container)
+ } else { // B.
+ if len(container.Children) == 1 || container.Parent != pseudoRoot {
+ for child := range container.Children {
+ container.Parent.Children.Insert(child)
+ child.Parent = container.Parent
+ }
+ delete(container.Parent.Children, container)
+ }
+ }
+ }
+ }
+ for root := range roots {
+ recurse(root)
+ }
+ for root := range roots {
+ root.Parent = nil
+ }
+ pseudoRoot = nil
+
+ // 5. Group root set by subject
+ // A.
+ subjectTable := make(map[string]*jwzContainer)
+ // B.
+ for root := range roots {
+ var subject string
+ if root.Message != nil {
+ subject = root.Message.Subject
+ } else {
+ subject = root.Children.PickOne().Message.Subject
+ }
+ prefix := jwzSubjectRE.FindString(subject)
+ subject = strings.TrimSpace(subject[len(prefix):])
+ if subject == "" {
+ continue
+ }
+ if other := subjectTable[subject]; other == nil {
+ subjectTable[subject] = root
+ } else if other.Message == nil {
+ subjectTable[subject] = root
+ } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" {
+ subjectTable[subject] = root
+ }
+ }
+ // C.
+ for root := range roots {
+ var subject string
+ if root.Message != nil {
+ subject = root.Message.Subject
+ } else {
+ subject = root.Children.PickOne().Message.Subject
+ }
+ prefix := jwzSubjectRE.FindString(subject)
+ subject = strings.TrimSpace(subject[len(prefix):])
+
+ other := subjectTable[subject]
+ if other == nil || other == root {
+ continue
+ }
+
+ switch {
+ case root.Message == nil && other.Message == nil:
+ for child := range root.Children {
+ other.Children.Insert(child)
+ child.Parent = other
+ }
+ delete(roots, root)
+ case (root.Message == nil) != (other.Message == nil):
+ var empty, nonEmpty *jwzContainer
+ if root.Message == nil {
+ empty = root
+ nonEmpty = other
+ } else {
+ empty = other
+ nonEmpty = root
+ }
+ empty.Children.Insert(nonEmpty)
+ nonEmpty.Parent = empty
+ case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "":
+ other.Children.Insert(root)
+ root.Parent = other
+ // skip the reverse of the above case--it happened implicitly
+ default:
+ newRoot := &jwzContainer{
+ Children: make(set[*jwzContainer], 2),
+ }
+ newRoot.Children.Insert(root)
+ root.Parent = newRoot
+ newRoot.Children.Insert(other)
+ other.Parent = newRoot
+ subjectTable[subject] = newRoot
+ roots.Insert(newRoot)
+ delete(roots, root)
+ delete(roots, other)
+ }
+ }
+
+ // 6. Now you're done threading
+ return roots
+}