package mailstuff import ( "regexp" "strings" ) type set[T comparable] map[T]struct{} func (s set[T]) Insert(val T) { s[val] = struct{}{} } func (s set[T]) Has(val T) bool { _, ok := s[val] return ok } func (s set[T]) PickOne() T { for v := range s { return v } var zero T return zero } //////////////////////////////////////////////////////////////////////////////// // https://www.jwz.org/doc/threading.html // Definitions ///////////////////////////////////////////////////////////////// type jwzContainer struct { Message *jwzMessage Parent *jwzContainer Children set[*jwzContainer] } type jwzMessage struct { Subject string ID jwzID References []jwzID } type jwzID string func (ancestor *jwzContainer) IsAncestorOf(descendent *jwzContainer) bool { if ancestor == descendent { return true } for child := range ancestor.Children { if child.IsAncestorOf(descendent) { return true } } return false } // The Algorithm /////////////////////////////////////////////////////////////// var jwzSubjectRE = regexp.MustCompile(`^(?:\s*[Rr][Ee](?:\[[0-9]+\])?:)*`) func jwzThreadMessages(msgs map[jwzID]*jwzMessage) set[*jwzContainer] { idTable := make(map[jwzID]*jwzContainer, len(msgs)) // 1. For each message for _, msg := range msgs { // A. msgContainer := idTable[msg.ID] if msgContainer != nil && msgContainer.Message == nil { msgContainer.Message = msg } else { msgContainer = &jwzContainer{ Message: msg, Children: make(set[*jwzContainer]), } idTable[msg.ID] = msgContainer } // B. for _, refID := range msg.References { refContainer := idTable[refID] if refContainer == nil { refContainer = &jwzContainer{ Children: make(set[*jwzContainer]), } idTable[refID] = refContainer } } for i := 0; i+1 < len(msg.References); i++ { parent := idTable[msg.References[i]] child := idTable[msg.References[i+1]] if !parent.IsAncestorOf(child) && !child.IsAncestorOf(parent) { parent.Children.Insert(child) child.Parent = parent } } // C. if len(msg.References) == 0 { if msgContainer.Parent != nil { delete(msgContainer.Parent.Children, msgContainer) } msgContainer.Parent = nil } else { msgContainer.Parent = idTable[msg.References[len(msg.References)-1]] msgContainer.Parent.Children.Insert(msgContainer) } } // 2. Find the root set roots := make(set[*jwzContainer]) for _, container := range idTable { if container.Parent == nil { roots.Insert(container) } } // 3. Discard id_table idTable = nil // 4. Prune empty containers pseudoRoot := &jwzContainer{ Children: roots, } for root := range roots { root.Parent = pseudoRoot } var recurse func(*jwzContainer) recurse = func(container *jwzContainer) { // Recurse. This is a touch complicated because // `recurse(child)` might insert into // `container.Children`, and those insertions might // not be emitted by the range loop for visited := make(set[*jwzContainer]); ; { beforeSize := len(visited) for child := range container.Children { if visited.Has(child) { continue } recurse(child) visited.Insert(child) } if len(visited) == beforeSize { break } } // Main. if container.Message == nil { if len(container.Children) == 0 { // A. delete(container.Parent.Children, container) } else { // B. if len(container.Children) == 1 || container.Parent != pseudoRoot { for child := range container.Children { container.Parent.Children.Insert(child) child.Parent = container.Parent } delete(container.Parent.Children, container) } } } } for root := range roots { recurse(root) } for root := range roots { root.Parent = nil } pseudoRoot = nil // 5. Group root set by subject // A. subjectTable := make(map[string]*jwzContainer) // B. for root := range roots { var subject string if root.Message != nil { subject = root.Message.Subject } else { subject = root.Children.PickOne().Message.Subject } prefix := jwzSubjectRE.FindString(subject) subject = strings.TrimSpace(subject[len(prefix):]) if subject == "" { continue } if other := subjectTable[subject]; other == nil { subjectTable[subject] = root } else if other.Message == nil { subjectTable[subject] = root } else if jwzSubjectRE.MatchString(other.Message.Subject) && prefix == "" { subjectTable[subject] = root } } // C. for root := range roots { var subject string if root.Message != nil { subject = root.Message.Subject } else { subject = root.Children.PickOne().Message.Subject } prefix := jwzSubjectRE.FindString(subject) subject = strings.TrimSpace(subject[len(prefix):]) other := subjectTable[subject] if other == nil || other == root { continue } switch { case root.Message == nil && other.Message == nil: for child := range root.Children { other.Children.Insert(child) child.Parent = other } delete(roots, root) case (root.Message == nil) != (other.Message == nil): var empty, nonEmpty *jwzContainer if root.Message == nil { empty = root nonEmpty = other } else { empty = other nonEmpty = root } empty.Children.Insert(nonEmpty) nonEmpty.Parent = empty case other.Message != nil && !jwzSubjectRE.MatchString(other.Message.Subject) && prefix != "": other.Children.Insert(root) root.Parent = other // skip the reverse of the above case--it happened implicitly default: newRoot := &jwzContainer{ Children: make(set[*jwzContainer], 2), } newRoot.Children.Insert(root) root.Parent = newRoot newRoot.Children.Insert(other) other.Parent = newRoot subjectTable[subject] = newRoot roots.Insert(newRoot) delete(roots, root) delete(roots, other) } } // 6. Now you're done threading return roots }