diff options
Diffstat (limited to 'cmd/generate/mailstuff/thread_alg.go')
-rw-r--r-- | cmd/generate/mailstuff/thread_alg.go | 226 |
1 files changed, 226 insertions, 0 deletions
diff --git a/cmd/generate/mailstuff/thread_alg.go b/cmd/generate/mailstuff/thread_alg.go new file mode 100644 index 0000000..1b351e9 --- /dev/null +++ b/cmd/generate/mailstuff/thread_alg.go @@ -0,0 +1,226 @@ +package mailstuff + +import ( + "regexp" + "strings" +) + +// https://www.jwz.org/doc/threading.html + +// TODO: See ./jwz.md for RFC 5256 changes we might want to bring in. + +// Definitions ///////////////////////////////////////////////////////////////// + +type jwzContainer struct { + Message *jwzMessage + Parent *jwzContainer + Children Set[*jwzContainer] +} + +type jwzMessage struct { + Subject string + ID jwzID + References []jwzID +} + +type jwzID = MessageID //string + +func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool { + if ancestor == descendent { + return true + } + for child := range ancestor.Children { + if child.IsAncestorOf(descendent) { + return true + } + } + return false +} + +// The Algorithm /////////////////////////////////////////////////////////////// + +var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`) + +func jwzThreadMessages(msgs map[jwzID]*jwzMessage) Set[*jwzContainer] { + idTable := make(map[jwzID]*jwzContainer, len(msgs)) + + // 1. For each message + for _, msg := range msgs { + // A. + msgContainer := idTable[msg.ID] + if msgContainer != nil && msgContainer.Message == nil { + msgContainer.Message = msg + } else { + msgContainer = &jwzContainer{ + Message: msg, + Children: make(Set[*jwzContainer]), + } + idTable[msg.ID] = msgContainer + } + // B. + for _, refID := range msg.References { + refContainer := idTable[refID] + if refContainer == nil { + refContainer = &jwzContainer{ + Children: make(Set[*jwzContainer]), + } + idTable[refID] = refContainer + } + } + for i := 0; i+1 < len(msg.References); i++ { + parent := idTable[msg.References[i]] + child := idTable[msg.References[i+1]] + if child.Parent == nil && !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) { + parent.Children.Insert(child) + child.Parent = parent + } + } + // C. + if len(msg.References) == 0 { + if msgContainer.Parent != nil { + delete(msgContainer.Parent.Children, msgContainer) + } + msgContainer.Parent = nil + } else { + msgContainer.Parent = idTable[msg.References[len(msg.References)-1]] + msgContainer.Parent.Children.Insert(msgContainer) + } + } + + // 2. Find the root Set + root := &jwzContainer{ + Children: make(Set[*jwzContainer]), + } + for _, container := range idTable { + if container.Parent == nil { + container.Parent = root + root.Children.Insert(container) + } + } + + // 3. Discard id_table + idTable = nil + + // 4. Prune empty containers + var recurse func(*jwzContainer) + recurse = func(container *jwzContainer) { + // Recurse. This is a touch complicated because + // `recurse(child)` might insert into + // `container.Children`, and those insertions might + // not be emitted by the range loop + for visited := make(Set[*jwzContainer]); ; { + beforeSize := len(visited) + for child := range container.Children { + if visited.Has(child) { + continue + } + recurse(child) + visited.Insert(child) + } + if len(visited) == beforeSize { + break + } + } + if container.Parent == nil { + return + } + // Main. + if container.Message == nil { + if len(container.Children) == 0 { // A. + delete(container.Parent.Children, container) + } else { // B. + if len(container.Children) == 1 || container.Parent != root { + for child := range container.Children { + container.Parent.Children.Insert(child) + child.Parent = container.Parent + } + delete(container.Parent.Children, container) + } + } + } + } + recurse(root) + + // 5. Group root Set by subject + // A. + subjectTable := make(map[string]*jwzContainer) + // B. + for this := range root.Children { + var subject string + if this.Message != nil { + subject = this.Message.Subject + } else { + subject = this.Children.PickOne().Message.Subject + } + prefix := jwzSubjectRE.FindString(subject) + subject = strings.TrimSpace(subject[len(prefix):]) + if subject == "" { + continue + } + if other := subjectTable[subject]; other == nil { + subjectTable[subject] = this + } else if other.Message == nil { + subjectTable[subject] = this + } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" { + subjectTable[subject] = this + } + } + // C. + for this := range root.Children { + var subject string + if this.Message != nil { + subject = this.Message.Subject + } else { + subject = this.Children.PickOne().Message.Subject + } + prefix := jwzSubjectRE.FindString(subject) + subject = strings.TrimSpace(subject[len(prefix):]) + + other := subjectTable[subject] + if other == nil || other == this { + continue + } + + switch { + case this.Message == nil && other.Message == nil: + for child := range this.Children { + other.Children.Insert(child) + child.Parent = other + } + delete(root.Children, this) + case (this.Message == nil) != (other.Message == nil): + var empty, nonEmpty *jwzContainer + if this.Message == nil { + empty = this + nonEmpty = other + } else { + empty = other + nonEmpty = this + } + empty.Children.Insert(nonEmpty) + nonEmpty.Parent = empty + case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "": + other.Children.Insert(this) + this.Parent = other + // skip the reverse of the above case--it happened implicitly + default: + newParent := &jwzContainer{ + Children: make(Set[*jwzContainer], 2), + } + newParent.Children.Insert(this) + this.Parent = newParent + newParent.Children.Insert(other) + other.Parent = newParent + subjectTable[subject] = newParent + root.Children.Insert(newParent) + delete(root.Children, this) + delete(root.Children, other) + } + } + + // 6. Now you're done threading + for child := range root.Children { + child.Parent = nil + } + return root.Children +} |