1//go:build cgo
2
3package clib
4
5/*
6#include "htmlconv.h"
7#include <stdlib.h>
8*/
9import "C"
10import "unsafe"
11
12// HTMLToElements parses HTML and returns structured elements.
13// This is a single-pass C parser that replaces goquery-based DOM parsing.
14func HTMLToElements(html string) ([]HTMLElement, bool) {
15 if len(html) == 0 {
16 return nil, true
17 }
18
19 cHTML := C.CString(html)
20 defer C.free(unsafe.Pointer(cHTML))
21
22 result := C.html_to_elements(cHTML, C.size_t(len(html)))
23 if result.ok == 0 {
24 return nil, false
25 }
26 defer C.free_html_result(&result) //nolint:gocritic
27
28 count := int(result.count)
29 if count == 0 {
30 return nil, true
31 }
32
33 elements := make([]HTMLElement, count)
34
35 // Access the C array via pointer arithmetic.
36 cElems := (*[1 << 20]C.HTMLElement)(unsafe.Pointer(result.elements))[:count:count]
37
38 for i := 0; i < count; i++ {
39 ce := &cElems[i]
40 elements[i] = HTMLElement{
41 Type: int(ce._type),
42 }
43 if ce.text != nil {
44 elements[i].Text = C.GoString(ce.text)
45 }
46 if ce.attr1 != nil {
47 elements[i].Attr1 = C.GoString(ce.attr1)
48 }
49 if ce.attr2 != nil {
50 elements[i].Attr2 = C.GoString(ce.attr2)
51 }
52 }
53
54 return elements, true
55}