1package tools
2
3import (
4 "context"
5 "encoding/json"
6 "fmt"
7 "io"
8 "net/http"
9 "strings"
10 "time"
11
12 md "github.com/JohannesKaufmann/html-to-markdown"
13 "github.com/PuerkitoBio/goquery"
14 "github.com/kujtimiihoxha/termai/internal/config"
15 "github.com/kujtimiihoxha/termai/internal/permission"
16)
17
18type FetchParams struct {
19 URL string `json:"url"`
20 Format string `json:"format"`
21 Timeout int `json:"timeout,omitempty"`
22}
23
24type FetchPermissionsParams struct {
25 URL string `json:"url"`
26 Format string `json:"format"`
27 Timeout int `json:"timeout,omitempty"`
28}
29
30type fetchTool struct {
31 client *http.Client
32 permissions permission.Service
33}
34
35const (
36 FetchToolName = "fetch"
37 fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
38
39WHEN TO USE THIS TOOL:
40- Use when you need to download content from a URL
41- Helpful for retrieving documentation, API responses, or web content
42- Useful for getting external information to assist with tasks
43
44HOW TO USE:
45- Provide the URL to fetch content from
46- Specify the desired output format (text, markdown, or html)
47- Optionally set a timeout for the request
48
49FEATURES:
50- Supports three output formats: text, markdown, and html
51- Automatically handles HTTP redirects
52- Sets reasonable timeouts to prevent hanging
53- Validates input parameters before making requests
54
55LIMITATIONS:
56- Maximum response size is 5MB
57- Only supports HTTP and HTTPS protocols
58- Cannot handle authentication or cookies
59- Some websites may block automated requests
60
61TIPS:
62- Use text format for plain text content or simple API responses
63- Use markdown format for content that should be rendered with formatting
64- Use html format when you need the raw HTML structure
65- Set appropriate timeouts for potentially slow websites`
66)
67
68func NewFetchTool(permissions permission.Service) BaseTool {
69 return &fetchTool{
70 client: &http.Client{
71 Timeout: 30 * time.Second,
72 },
73 permissions: permissions,
74 }
75}
76
77func (t *fetchTool) Info() ToolInfo {
78 return ToolInfo{
79 Name: FetchToolName,
80 Description: fetchToolDescription,
81 Parameters: map[string]any{
82 "url": map[string]any{
83 "type": "string",
84 "description": "The URL to fetch content from",
85 },
86 "format": map[string]any{
87 "type": "string",
88 "description": "The format to return the content in (text, markdown, or html)",
89 },
90 "timeout": map[string]any{
91 "type": "number",
92 "description": "Optional timeout in seconds (max 120)",
93 },
94 },
95 Required: []string{"url", "format"},
96 }
97}
98
99func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
100 var params FetchParams
101 if err := json.Unmarshal([]byte(call.Input), ¶ms); err != nil {
102 return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
103 }
104
105 if params.URL == "" {
106 return NewTextErrorResponse("URL parameter is required"), nil
107 }
108
109 format := strings.ToLower(params.Format)
110 if format != "text" && format != "markdown" && format != "html" {
111 return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
112 }
113
114 if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
115 return NewTextErrorResponse("URL must start with http:// or https://"), nil
116 }
117
118 p := t.permissions.Request(
119 permission.CreatePermissionRequest{
120 Path: config.WorkingDirectory(),
121 ToolName: FetchToolName,
122 Action: "fetch",
123 Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
124 Params: FetchPermissionsParams{
125 URL: params.URL,
126 Format: params.Format,
127 Timeout: params.Timeout,
128 },
129 },
130 )
131
132 if !p {
133 return NewTextErrorResponse("Permission denied to fetch from URL: " + params.URL), nil
134 }
135
136 client := t.client
137 if params.Timeout > 0 {
138 maxTimeout := 120 // 2 minutes
139 if params.Timeout > maxTimeout {
140 params.Timeout = maxTimeout
141 }
142 client = &http.Client{
143 Timeout: time.Duration(params.Timeout) * time.Second,
144 }
145 }
146
147 req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil)
148 if err != nil {
149 return NewTextErrorResponse("Failed to create request: " + err.Error()), nil
150 }
151
152 req.Header.Set("User-Agent", "termai/1.0")
153
154 resp, err := client.Do(req)
155 if err != nil {
156 return NewTextErrorResponse("Failed to execute request: " + err.Error()), nil
157 }
158 defer resp.Body.Close()
159
160 if resp.StatusCode != http.StatusOK {
161 return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
162 }
163
164 maxSize := int64(5 * 1024 * 1024) // 5MB
165 body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
166 if err != nil {
167 return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
168 }
169
170 content := string(body)
171 contentType := resp.Header.Get("Content-Type")
172
173 switch format {
174 case "text":
175 if strings.Contains(contentType, "text/html") {
176 text, err := extractTextFromHTML(content)
177 if err != nil {
178 return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
179 }
180 return NewTextResponse(text), nil
181 }
182 return NewTextResponse(content), nil
183
184 case "markdown":
185 if strings.Contains(contentType, "text/html") {
186 markdown, err := convertHTMLToMarkdown(content)
187 if err != nil {
188 return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
189 }
190 return NewTextResponse(markdown), nil
191 }
192
193 return NewTextResponse("```\n" + content + "\n```"), nil
194
195 case "html":
196 return NewTextResponse(content), nil
197
198 default:
199 return NewTextResponse(content), nil
200 }
201}
202
203func extractTextFromHTML(html string) (string, error) {
204 doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
205 if err != nil {
206 return "", err
207 }
208
209 text := doc.Text()
210 text = strings.Join(strings.Fields(text), " ")
211
212 return text, nil
213}
214
215func convertHTMLToMarkdown(html string) (string, error) {
216 converter := md.NewConverter("", true, nil)
217
218 markdown, err := converter.ConvertString(html)
219 if err != nil {
220 return "", err
221 }
222
223 return markdown, nil
224}