htmlconv.go

 1//go:build cgo
 2
 3package clib
 4
 5/*
 6#include "htmlconv.h"
 7#include <stdlib.h>
 8*/
 9import "C"
10import "unsafe"
11
12// HTMLToElements parses HTML and returns structured elements.
13// This is a single-pass C parser that replaces goquery-based DOM parsing.
14func HTMLToElements(html string) ([]HTMLElement, bool) {
15	if len(html) == 0 {
16		return nil, true
17	}
18
19	cHTML := C.CString(html)
20	defer C.free(unsafe.Pointer(cHTML))
21
22	result := C.html_to_elements(cHTML, C.size_t(len(html)))
23	if result.ok == 0 {
24		return nil, false
25	}
26	defer C.free_html_result(&result) //nolint:gocritic
27
28	count := int(result.count)
29	if count == 0 {
30		return nil, true
31	}
32
33	elements := make([]HTMLElement, count)
34
35	// Access the C array via pointer arithmetic.
36	cElems := (*[1 << 20]C.HTMLElement)(unsafe.Pointer(result.elements))[:count:count]
37
38	for i := 0; i < count; i++ {
39		ce := &cElems[i]
40		elements[i] = HTMLElement{
41			Type: int(ce._type),
42		}
43		if ce.text != nil {
44			elements[i].Text = C.GoString(ce.text)
45		}
46		if ce.attr1 != nil {
47			elements[i].Attr1 = C.GoString(ce.attr1)
48		}
49		if ce.attr2 != nil {
50			elements[i].Attr2 = C.GoString(ce.attr2)
51		}
52	}
53
54	return elements, true
55}