1package threading
2
3import (
4 "regexp"
5 "sort"
6 "strings"
7 "time"
8)
9
10type EmailHeader struct {
11 ID string
12 InReplyTo string
13 References []string
14 Subject string
15 Date time.Time
16 EmailID string
17 Sender string
18}
19
20type Thread struct {
21 Root *ThreadNode
22 LatestAt time.Time
23 Count int
24 Subject string
25 Senders []string
26}
27
28type ThreadNode struct {
29 EmailID string
30 Children []*ThreadNode
31 Date time.Time
32 Sender string
33 Subject string
34}
35
36type container struct {
37 id string
38 node *ThreadNode
39 parent *container
40 children []*container
41}
42
43var messageIDRE = regexp.MustCompile(`<[^>]+>`)
44
45func Build(headers []EmailHeader) []Thread {
46 containers := make(map[string]*container)
47 ordered := make([]*container, 0, len(headers))
48
49 get := func(id string) *container {
50 if c := containers[id]; c != nil {
51 return c
52 }
53 c := &container{id: id}
54 containers[id] = c
55 ordered = append(ordered, c)
56 return c
57 }
58
59 for _, h := range headers {
60 msgID := normalizeMessageID(h.ID)
61 if msgID == "" {
62 msgID = "email:" + h.EmailID
63 }
64 c := get(msgID)
65 if c.node != nil {
66 msgID = msgID + "#email:" + h.EmailID
67 c = get(msgID)
68 }
69 c.node = &ThreadNode{
70 EmailID: h.EmailID,
71 Date: h.Date,
72 Sender: h.Sender,
73 Subject: h.Subject,
74 }
75
76 var prev *container
77 refs := normalizeReferences(h.References)
78 for _, ref := range refs {
79 refc := get(ref)
80 if prev != nil {
81 link(prev, refc)
82 }
83 prev = refc
84 }
85
86 parentID := normalizeMessageID(h.InReplyTo)
87 if parentID == "" && len(refs) > 0 {
88 parentID = refs[len(refs)-1]
89 }
90 if parentID != "" {
91 link(get(parentID), c)
92 }
93 }
94
95 var roots []*container
96 for _, c := range ordered {
97 if c.parent == nil {
98 if root := prune(c); root != nil {
99 roots = append(roots, root)
100 }
101 }
102 }
103 roots = groupBySubject(roots)
104
105 threads := make([]Thread, 0, len(roots))
106 for _, root := range roots {
107 sortContainer(root)
108 thread := buildThread(root)
109 if thread.Count > 0 {
110 threads = append(threads, thread)
111 }
112 }
113
114 sort.SliceStable(threads, func(i, j int) bool {
115 if !threads[i].LatestAt.Equal(threads[j].LatestAt) {
116 return threads[i].LatestAt.After(threads[j].LatestAt)
117 }
118 return threadKey(threads[i].Root) < threadKey(threads[j].Root)
119 })
120
121 return threads
122}
123
124func normalizeReferences(refs []string) []string {
125 seen := make(map[string]bool)
126 var out []string
127 for _, ref := range refs {
128 for _, id := range extractMessageIDs(ref) {
129 if !seen[id] {
130 out = append(out, id)
131 seen[id] = true
132 }
133 }
134 }
135 return out
136}
137
138func extractMessageIDs(s string) []string {
139 matches := messageIDRE.FindAllString(s, -1)
140 if len(matches) == 0 {
141 if id := normalizeMessageID(s); id != "" {
142 return []string{id}
143 }
144 return nil
145 }
146 ids := make([]string, 0, len(matches))
147 for _, match := range matches {
148 if id := normalizeMessageID(match); id != "" {
149 ids = append(ids, id)
150 }
151 }
152 return ids
153}
154
155func normalizeMessageID(id string) string {
156 id = strings.TrimSpace(id)
157 if id == "" {
158 return ""
159 }
160 if matches := messageIDRE.FindAllString(id, -1); len(matches) > 0 {
161 id = matches[len(matches)-1]
162 }
163 id = strings.TrimSpace(id)
164 id = strings.TrimPrefix(id, "<")
165 id = strings.TrimSuffix(id, ">")
166 id = strings.TrimSpace(id)
167 return strings.ToLower(id)
168}
169
170func link(parent, child *container) {
171 if parent == nil || child == nil || parent == child {
172 return
173 }
174 if child.parent != nil || child.hasDescendant(parent) {
175 return
176 }
177 child.parent = parent
178 for _, existing := range parent.children {
179 if existing == child {
180 return
181 }
182 }
183 parent.children = append(parent.children, child)
184}
185
186func (c *container) hasDescendant(target *container) bool {
187 for _, child := range c.children {
188 if child == target || child.hasDescendant(target) {
189 return true
190 }
191 }
192 return false
193}
194
195func prune(c *container) *container {
196 if c == nil {
197 return nil
198 }
199 var children []*container
200 for _, child := range c.children {
201 if pruned := prune(child); pruned != nil {
202 pruned.parent = c
203 children = append(children, pruned)
204 }
205 }
206 c.children = children
207
208 if c.node != nil {
209 return c
210 }
211 switch len(c.children) {
212 case 0:
213 return nil
214 case 1:
215 child := c.children[0]
216 child.parent = c.parent
217 return child
218 default:
219 return c
220 }
221}
222
223func groupBySubject(roots []*container) []*container {
224 subjects := make(map[string]*container)
225 var grouped []*container
226 for _, root := range roots {
227 subject := firstSubject(root)
228 if subject == "" {
229 grouped = append(grouped, root)
230 continue
231 }
232 if existing := subjects[subject]; existing != nil {
233 link(existing, root)
234 continue
235 }
236 subjects[subject] = root
237 grouped = append(grouped, root)
238 }
239 return grouped
240}
241
242func firstSubject(c *container) string {
243 if c == nil {
244 return ""
245 }
246 if c.node != nil {
247 return canonicalSubject(c.node.Subject)
248 }
249 for _, child := range c.children {
250 if subject := firstSubject(child); subject != "" {
251 return subject
252 }
253 }
254 return ""
255}
256
257func sortContainer(c *container) {
258 for _, child := range c.children {
259 sortContainer(child)
260 }
261 sort.SliceStable(c.children, func(i, j int) bool {
262 a, b := c.children[i], c.children[j]
263 ad, bd := containerDate(a), containerDate(b)
264 if !ad.Equal(bd) {
265 return ad.Before(bd)
266 }
267 return containerKey(a) < containerKey(b)
268 })
269}
270
271func buildThread(root *container) Thread {
272 node := toThreadNode(root)
273 thread := Thread{Root: node, Subject: canonicalSubject(firstDisplaySubject(node))}
274 seenSenders := make(map[string]bool)
275 walkThread(node, &thread, seenSenders)
276 return thread
277}
278
279func toThreadNode(c *container) *ThreadNode {
280 node := &ThreadNode{}
281 if c.node != nil {
282 *node = *c.node
283 node.Children = nil
284 }
285 for _, child := range c.children {
286 node.Children = append(node.Children, toThreadNode(child))
287 }
288 return node
289}
290
291func walkThread(node *ThreadNode, thread *Thread, seenSenders map[string]bool) {
292 if node == nil {
293 return
294 }
295 if node.EmailID != "" {
296 thread.Count++
297 if node.Date.After(thread.LatestAt) {
298 thread.LatestAt = node.Date
299 }
300 if node.Sender != "" && !seenSenders[node.Sender] {
301 thread.Senders = append(thread.Senders, node.Sender)
302 seenSenders[node.Sender] = true
303 }
304 }
305 for _, child := range node.Children {
306 walkThread(child, thread, seenSenders)
307 }
308}
309
310func containerDate(c *container) time.Time {
311 if c == nil {
312 return time.Time{}
313 }
314 if c.node != nil {
315 return c.node.Date
316 }
317 var earliest time.Time
318 for _, child := range c.children {
319 date := containerDate(child)
320 if earliest.IsZero() || (!date.IsZero() && date.Before(earliest)) {
321 earliest = date
322 }
323 }
324 return earliest
325}
326
327func containerKey(c *container) string {
328 if c == nil {
329 return ""
330 }
331 if c.node != nil && c.node.EmailID != "" {
332 return c.node.EmailID
333 }
334 return c.id
335}
336
337func threadKey(n *ThreadNode) string {
338 if n == nil {
339 return ""
340 }
341 if n.EmailID != "" {
342 return n.EmailID
343 }
344 for _, child := range n.Children {
345 if key := threadKey(child); key != "" {
346 return key
347 }
348 }
349 return ""
350}
351
352func firstDisplaySubject(node *ThreadNode) string {
353 if node == nil {
354 return ""
355 }
356 if node.Subject != "" {
357 return node.Subject
358 }
359 for _, child := range node.Children {
360 if subject := firstDisplaySubject(child); subject != "" {
361 return subject
362 }
363 }
364 return ""
365}