fetch.go

  1package tools
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"fmt"
  7	"io"
  8	"net/http"
  9	"strings"
 10	"time"
 11
 12	md "github.com/JohannesKaufmann/html-to-markdown"
 13	"github.com/PuerkitoBio/goquery"
 14	"github.com/kujtimiihoxha/termai/internal/config"
 15	"github.com/kujtimiihoxha/termai/internal/permission"
 16)
 17
 18type FetchParams struct {
 19	URL     string `json:"url"`
 20	Format  string `json:"format"`
 21	Timeout int    `json:"timeout,omitempty"`
 22}
 23
 24type FetchPermissionsParams struct {
 25	URL     string `json:"url"`
 26	Format  string `json:"format"`
 27	Timeout int    `json:"timeout,omitempty"`
 28}
 29
 30type fetchTool struct {
 31	client      *http.Client
 32	permissions permission.Service
 33}
 34
 35const (
 36	FetchToolName        = "fetch"
 37	fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
 38
 39WHEN TO USE THIS TOOL:
 40- Use when you need to download content from a URL
 41- Helpful for retrieving documentation, API responses, or web content
 42- Useful for getting external information to assist with tasks
 43
 44HOW TO USE:
 45- Provide the URL to fetch content from
 46- Specify the desired output format (text, markdown, or html)
 47- Optionally set a timeout for the request
 48
 49FEATURES:
 50- Supports three output formats: text, markdown, and html
 51- Automatically handles HTTP redirects
 52- Sets reasonable timeouts to prevent hanging
 53- Validates input parameters before making requests
 54
 55LIMITATIONS:
 56- Maximum response size is 5MB
 57- Only supports HTTP and HTTPS protocols
 58- Cannot handle authentication or cookies
 59- Some websites may block automated requests
 60
 61TIPS:
 62- Use text format for plain text content or simple API responses
 63- Use markdown format for content that should be rendered with formatting
 64- Use html format when you need the raw HTML structure
 65- Set appropriate timeouts for potentially slow websites`
 66)
 67
 68func NewFetchTool(permissions permission.Service) BaseTool {
 69	return &fetchTool{
 70		client: &http.Client{
 71			Timeout: 30 * time.Second,
 72		},
 73		permissions: permissions,
 74	}
 75}
 76
 77func (t *fetchTool) Info() ToolInfo {
 78	return ToolInfo{
 79		Name:        FetchToolName,
 80		Description: fetchToolDescription,
 81		Parameters: map[string]any{
 82			"url": map[string]any{
 83				"type":        "string",
 84				"description": "The URL to fetch content from",
 85			},
 86			"format": map[string]any{
 87				"type":        "string",
 88				"description": "The format to return the content in (text, markdown, or html)",
 89			},
 90			"timeout": map[string]any{
 91				"type":        "number",
 92				"description": "Optional timeout in seconds (max 120)",
 93			},
 94		},
 95		Required: []string{"url", "format"},
 96	}
 97}
 98
 99func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
100	var params FetchParams
101	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
102		return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
103	}
104
105	if params.URL == "" {
106		return NewTextErrorResponse("URL parameter is required"), nil
107	}
108
109	format := strings.ToLower(params.Format)
110	if format != "text" && format != "markdown" && format != "html" {
111		return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
112	}
113
114	if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
115		return NewTextErrorResponse("URL must start with http:// or https://"), nil
116	}
117
118	p := t.permissions.Request(
119		permission.CreatePermissionRequest{
120			Path:        config.WorkingDirectory(),
121			ToolName:    FetchToolName,
122			Action:      "fetch",
123			Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
124			Params: FetchPermissionsParams{
125				URL:     params.URL,
126				Format:  params.Format,
127				Timeout: params.Timeout,
128			},
129		},
130	)
131
132	if !p {
133		return NewTextErrorResponse("Permission denied to fetch from URL: " + params.URL), nil
134	}
135
136	client := t.client
137	if params.Timeout > 0 {
138		maxTimeout := 120 // 2 minutes
139		if params.Timeout > maxTimeout {
140			params.Timeout = maxTimeout
141		}
142		client = &http.Client{
143			Timeout: time.Duration(params.Timeout) * time.Second,
144		}
145	}
146
147	req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil)
148	if err != nil {
149		return NewTextErrorResponse("Failed to create request: " + err.Error()), nil
150	}
151
152	req.Header.Set("User-Agent", "termai/1.0")
153
154	resp, err := client.Do(req)
155	if err != nil {
156		return NewTextErrorResponse("Failed to execute request: " + err.Error()), nil
157	}
158	defer resp.Body.Close()
159
160	if resp.StatusCode != http.StatusOK {
161		return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
162	}
163
164	maxSize := int64(5 * 1024 * 1024) // 5MB
165	body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
166	if err != nil {
167		return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
168	}
169
170	content := string(body)
171	contentType := resp.Header.Get("Content-Type")
172
173	switch format {
174	case "text":
175		if strings.Contains(contentType, "text/html") {
176			text, err := extractTextFromHTML(content)
177			if err != nil {
178				return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
179			}
180			return NewTextResponse(text), nil
181		}
182		return NewTextResponse(content), nil
183
184	case "markdown":
185		if strings.Contains(contentType, "text/html") {
186			markdown, err := convertHTMLToMarkdown(content)
187			if err != nil {
188				return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
189			}
190			return NewTextResponse(markdown), nil
191		}
192
193		return NewTextResponse("```\n" + content + "\n```"), nil
194
195	case "html":
196		return NewTextResponse(content), nil
197
198	default:
199		return NewTextResponse(content), nil
200	}
201}
202
203func extractTextFromHTML(html string) (string, error) {
204	doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
205	if err != nil {
206		return "", err
207	}
208
209	text := doc.Text()
210	text = strings.Join(strings.Fields(text), " ")
211
212	return text, nil
213}
214
215func convertHTMLToMarkdown(html string) (string, error) {
216	converter := md.NewConverter("", true, nil)
217
218	markdown, err := converter.ConvertString(html)
219	if err != nil {
220		return "", err
221	}
222
223	return markdown, nil
224}