jwz.go

  1package threading
  2
  3import (
  4	"regexp"
  5	"sort"
  6	"strings"
  7	"time"
  8)
  9
 10type EmailHeader struct {
 11	ID         string
 12	InReplyTo  string
 13	References []string
 14	Subject    string
 15	Date       time.Time
 16	EmailID    string
 17	Sender     string
 18}
 19
 20type Thread struct {
 21	Root     *ThreadNode
 22	LatestAt time.Time
 23	Count    int
 24	Subject  string
 25	Senders  []string
 26}
 27
 28type ThreadNode struct {
 29	EmailID  string
 30	Children []*ThreadNode
 31	Date     time.Time
 32	Sender   string
 33	Subject  string
 34}
 35
 36type container struct {
 37	id       string
 38	node     *ThreadNode
 39	parent   *container
 40	children []*container
 41}
 42
 43var messageIDRE = regexp.MustCompile(`<[^>]+>`)
 44
 45func Build(headers []EmailHeader) []Thread {
 46	containers := make(map[string]*container)
 47	ordered := make([]*container, 0, len(headers))
 48
 49	get := func(id string) *container {
 50		if c := containers[id]; c != nil {
 51			return c
 52		}
 53		c := &container{id: id}
 54		containers[id] = c
 55		ordered = append(ordered, c)
 56		return c
 57	}
 58
 59	for _, h := range headers {
 60		msgID := normalizeMessageID(h.ID)
 61		if msgID == "" {
 62			msgID = "email:" + h.EmailID
 63		}
 64		c := get(msgID)
 65		if c.node != nil {
 66			msgID = msgID + "#email:" + h.EmailID
 67			c = get(msgID)
 68		}
 69		c.node = &ThreadNode{
 70			EmailID: h.EmailID,
 71			Date:    h.Date,
 72			Sender:  h.Sender,
 73			Subject: h.Subject,
 74		}
 75
 76		var prev *container
 77		refs := normalizeReferences(h.References)
 78		for _, ref := range refs {
 79			refc := get(ref)
 80			if prev != nil {
 81				link(prev, refc)
 82			}
 83			prev = refc
 84		}
 85
 86		parentID := normalizeMessageID(h.InReplyTo)
 87		if parentID == "" && len(refs) > 0 {
 88			parentID = refs[len(refs)-1]
 89		}
 90		if parentID != "" {
 91			link(get(parentID), c)
 92		}
 93	}
 94
 95	var roots []*container
 96	for _, c := range ordered {
 97		if c.parent == nil {
 98			if root := prune(c); root != nil {
 99				roots = append(roots, root)
100			}
101		}
102	}
103	roots = groupBySubject(roots)
104
105	threads := make([]Thread, 0, len(roots))
106	for _, root := range roots {
107		sortContainer(root)
108		thread := buildThread(root)
109		if thread.Count > 0 {
110			threads = append(threads, thread)
111		}
112	}
113
114	sort.SliceStable(threads, func(i, j int) bool {
115		if !threads[i].LatestAt.Equal(threads[j].LatestAt) {
116			return threads[i].LatestAt.After(threads[j].LatestAt)
117		}
118		return threadKey(threads[i].Root) < threadKey(threads[j].Root)
119	})
120
121	return threads
122}
123
124func normalizeReferences(refs []string) []string {
125	seen := make(map[string]bool)
126	var out []string
127	for _, ref := range refs {
128		for _, id := range extractMessageIDs(ref) {
129			if !seen[id] {
130				out = append(out, id)
131				seen[id] = true
132			}
133		}
134	}
135	return out
136}
137
138func extractMessageIDs(s string) []string {
139	matches := messageIDRE.FindAllString(s, -1)
140	if len(matches) == 0 {
141		if id := normalizeMessageID(s); id != "" {
142			return []string{id}
143		}
144		return nil
145	}
146	ids := make([]string, 0, len(matches))
147	for _, match := range matches {
148		if id := normalizeMessageID(match); id != "" {
149			ids = append(ids, id)
150		}
151	}
152	return ids
153}
154
155func normalizeMessageID(id string) string {
156	id = strings.TrimSpace(id)
157	if id == "" {
158		return ""
159	}
160	if matches := messageIDRE.FindAllString(id, -1); len(matches) > 0 {
161		id = matches[len(matches)-1]
162	}
163	id = strings.TrimSpace(id)
164	id = strings.TrimPrefix(id, "<")
165	id = strings.TrimSuffix(id, ">")
166	id = strings.TrimSpace(id)
167	return strings.ToLower(id)
168}
169
170func link(parent, child *container) {
171	if parent == nil || child == nil || parent == child {
172		return
173	}
174	if child.parent != nil || child.hasDescendant(parent) {
175		return
176	}
177	child.parent = parent
178	for _, existing := range parent.children {
179		if existing == child {
180			return
181		}
182	}
183	parent.children = append(parent.children, child)
184}
185
186func (c *container) hasDescendant(target *container) bool {
187	for _, child := range c.children {
188		if child == target || child.hasDescendant(target) {
189			return true
190		}
191	}
192	return false
193}
194
195func prune(c *container) *container {
196	if c == nil {
197		return nil
198	}
199	var children []*container
200	for _, child := range c.children {
201		if pruned := prune(child); pruned != nil {
202			pruned.parent = c
203			children = append(children, pruned)
204		}
205	}
206	c.children = children
207
208	if c.node != nil {
209		return c
210	}
211	switch len(c.children) {
212	case 0:
213		return nil
214	case 1:
215		child := c.children[0]
216		child.parent = c.parent
217		return child
218	default:
219		return c
220	}
221}
222
223func groupBySubject(roots []*container) []*container {
224	subjects := make(map[string]*container)
225	var grouped []*container
226	for _, root := range roots {
227		subject := firstSubject(root)
228		if subject == "" {
229			grouped = append(grouped, root)
230			continue
231		}
232		if existing := subjects[subject]; existing != nil {
233			link(existing, root)
234			continue
235		}
236		subjects[subject] = root
237		grouped = append(grouped, root)
238	}
239	return grouped
240}
241
242func firstSubject(c *container) string {
243	if c == nil {
244		return ""
245	}
246	if c.node != nil {
247		return canonicalSubject(c.node.Subject)
248	}
249	for _, child := range c.children {
250		if subject := firstSubject(child); subject != "" {
251			return subject
252		}
253	}
254	return ""
255}
256
257func sortContainer(c *container) {
258	for _, child := range c.children {
259		sortContainer(child)
260	}
261	sort.SliceStable(c.children, func(i, j int) bool {
262		a, b := c.children[i], c.children[j]
263		ad, bd := containerDate(a), containerDate(b)
264		if !ad.Equal(bd) {
265			return ad.Before(bd)
266		}
267		return containerKey(a) < containerKey(b)
268	})
269}
270
271func buildThread(root *container) Thread {
272	node := toThreadNode(root)
273	thread := Thread{Root: node, Subject: canonicalSubject(firstDisplaySubject(node))}
274	seenSenders := make(map[string]bool)
275	walkThread(node, &thread, seenSenders)
276	return thread
277}
278
279func toThreadNode(c *container) *ThreadNode {
280	node := &ThreadNode{}
281	if c.node != nil {
282		*node = *c.node
283		node.Children = nil
284	}
285	for _, child := range c.children {
286		node.Children = append(node.Children, toThreadNode(child))
287	}
288	return node
289}
290
291func walkThread(node *ThreadNode, thread *Thread, seenSenders map[string]bool) {
292	if node == nil {
293		return
294	}
295	if node.EmailID != "" {
296		thread.Count++
297		if node.Date.After(thread.LatestAt) {
298			thread.LatestAt = node.Date
299		}
300		if node.Sender != "" && !seenSenders[node.Sender] {
301			thread.Senders = append(thread.Senders, node.Sender)
302			seenSenders[node.Sender] = true
303		}
304	}
305	for _, child := range node.Children {
306		walkThread(child, thread, seenSenders)
307	}
308}
309
310func containerDate(c *container) time.Time {
311	if c == nil {
312		return time.Time{}
313	}
314	if c.node != nil {
315		return c.node.Date
316	}
317	var earliest time.Time
318	for _, child := range c.children {
319		date := containerDate(child)
320		if earliest.IsZero() || (!date.IsZero() && date.Before(earliest)) {
321			earliest = date
322		}
323	}
324	return earliest
325}
326
327func containerKey(c *container) string {
328	if c == nil {
329		return ""
330	}
331	if c.node != nil && c.node.EmailID != "" {
332		return c.node.EmailID
333	}
334	return c.id
335}
336
337func threadKey(n *ThreadNode) string {
338	if n == nil {
339		return ""
340	}
341	if n.EmailID != "" {
342		return n.EmailID
343	}
344	for _, child := range n.Children {
345		if key := threadKey(child); key != "" {
346			return key
347		}
348	}
349	return ""
350}
351
352func firstDisplaySubject(node *ThreadNode) string {
353	if node == nil {
354		return ""
355	}
356	if node.Subject != "" {
357		return node.Subject
358	}
359	for _, child := range node.Children {
360		if subject := firstDisplaySubject(child); subject != "" {
361			return subject
362		}
363	}
364	return ""
365}