package mailstuff import ( "regexp" "strings" ) // https://www.jwz.org/doc/threading.html // TODO: See ./jwz.md for RFC 5256 changes we might want to bring in. // Definitions ///////////////////////////////////////////////////////////////// type jwzContainer struct { Message *jwzMessage Parent *jwzContainer Children Set[*jwzContainer] } type jwzMessage struct { Subject string ID jwzID References []jwzID } type jwzID = MessageID //string func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool { if ancestor == descendent { return true } for child := range ancestor.Children { if child.IsAncestorOf(descendent) { return true } } return false } // The Algorithm /////////////////////////////////////////////////////////////// var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`) func jwzThreadMessages(msgs map[jwzID]*jwzMessage) Set[*jwzContainer] { idTable := make(map[jwzID]*jwzContainer, len(msgs)) // 1. For each message for _, msg := range msgs { // A. msgContainer := idTable[msg.ID] if msgContainer != nil && msgContainer.Message == nil { msgContainer.Message = msg } else { msgContainer = &jwzContainer{ Message: msg, Children: make(Set[*jwzContainer]), } idTable[msg.ID] = msgContainer } // B. for _, refID := range msg.References { refContainer := idTable[refID] if refContainer == nil { refContainer = &jwzContainer{ Children: make(Set[*jwzContainer]), } idTable[refID] = refContainer } } for i := 0; i+1 < len(msg.References); i++ { parent := idTable[msg.References[i]] child := idTable[msg.References[i+1]] if child.Parent == nil && !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) { parent.Children.Insert(child) child.Parent = parent } } // C. if len(msg.References) == 0 { if msgContainer.Parent != nil { delete(msgContainer.Parent.Children, msgContainer) } msgContainer.Parent = nil } else { msgContainer.Parent = idTable[msg.References[len(msg.References)-1]] msgContainer.Parent.Children.Insert(msgContainer) } } // 2. Find the root Set root := &jwzContainer{ Children: make(Set[*jwzContainer]), } for _, container := range idTable { if container.Parent == nil { container.Parent = root root.Children.Insert(container) } } // 3. Discard id_table idTable = nil // 4. Prune empty containers var recurse func(*jwzContainer) recurse = func(container *jwzContainer) { // Recurse. This is a touch complicated because // `recurse(child)` might insert into // `container.Children`, and those insertions might // not be emitted by the range loop for visited := make(Set[*jwzContainer]); ; { beforeSize := len(visited) for child := range container.Children { if visited.Has(child) { continue } recurse(child) visited.Insert(child) } if len(visited) == beforeSize { break } } if container.Parent == nil { return } // Main. if container.Message == nil { if len(container.Children) == 0 { // A. delete(container.Parent.Children, container) } else { // B. if len(container.Children) == 1 || container.Parent != root { for child := range container.Children { container.Parent.Children.Insert(child) child.Parent = container.Parent } delete(container.Parent.Children, container) } } } } recurse(root) // 5. Group root Set by subject // A. subjectTable := make(map[string]*jwzContainer) // B. for this := range root.Children { var subject string if this.Message != nil { subject = this.Message.Subject } else { subject = this.Children.PickOne().Message.Subject } prefix := jwzSubjectRE.FindString(subject) subject = strings.TrimSpace(subject[len(prefix):]) if subject == "" { continue } if other := subjectTable[subject]; other == nil { subjectTable[subject] = this } else if other.Message == nil { subjectTable[subject] = this } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" { subjectTable[subject] = this } } // C. for this := range root.Children { var subject string if this.Message != nil { subject = this.Message.Subject } else { subject = this.Children.PickOne().Message.Subject } prefix := jwzSubjectRE.FindString(subject) subject = strings.TrimSpace(subject[len(prefix):]) other := subjectTable[subject] if other == nil || other == this { continue } switch { case this.Message == nil && other.Message == nil: for child := range this.Children { other.Children.Insert(child) child.Parent = other } delete(root.Children, this) case (this.Message == nil) != (other.Message == nil): var empty, nonEmpty *jwzContainer if this.Message == nil { empty = this nonEmpty = other } else { empty = other nonEmpty = this } empty.Children.Insert(nonEmpty) nonEmpty.Parent = empty case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "": other.Children.Insert(this) this.Parent = other // skip the reverse of the above case--it happened implicitly default: newParent := &jwzContainer{ Children: make(Set[*jwzContainer], 2), } newParent.Children.Insert(this) this.Parent = newParent newParent.Children.Insert(other) other.Parent = newParent subjectTable[subject] = newParent root.Children.Insert(newParent) delete(root.Children, this) delete(root.Children, other) } } // 6. Now you're done threading for child := range root.Children { child.Parent = nil } return root.Children }