fetch.go

  1package tools
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"fmt"
  7	"io"
  8	"net/http"
  9	"strings"
 10	"time"
 11
 12	md "github.com/JohannesKaufmann/html-to-markdown"
 13	"github.com/PuerkitoBio/goquery"
 14	"github.com/kujtimiihoxha/termai/internal/config"
 15	"github.com/kujtimiihoxha/termai/internal/permission"
 16)
 17
 18const (
 19	FetchToolName        = "fetch"
 20	fetchToolDescription = `Fetches content from a URL and returns it in the specified format.
 21
 22WHEN TO USE THIS TOOL:
 23- Use when you need to download content from a URL
 24- Helpful for retrieving documentation, API responses, or web content
 25- Useful for getting external information to assist with tasks
 26
 27HOW TO USE:
 28- Provide the URL to fetch content from
 29- Specify the desired output format (text, markdown, or html)
 30- Optionally set a timeout for the request
 31
 32FEATURES:
 33- Supports three output formats: text, markdown, and html
 34- Automatically handles HTTP redirects
 35- Sets reasonable timeouts to prevent hanging
 36- Validates input parameters before making requests
 37
 38LIMITATIONS:
 39- Maximum response size is 5MB
 40- Only supports HTTP and HTTPS protocols
 41- Cannot handle authentication or cookies
 42- Some websites may block automated requests
 43
 44TIPS:
 45- Use text format for plain text content or simple API responses
 46- Use markdown format for content that should be rendered with formatting
 47- Use html format when you need the raw HTML structure
 48- Set appropriate timeouts for potentially slow websites`
 49)
 50
 51type FetchParams struct {
 52	URL     string `json:"url"`
 53	Format  string `json:"format"`
 54	Timeout int    `json:"timeout,omitempty"`
 55}
 56
 57type FetchPermissionsParams struct {
 58	URL     string `json:"url"`
 59	Format  string `json:"format"`
 60	Timeout int    `json:"timeout,omitempty"`
 61}
 62
 63type fetchTool struct {
 64	client *http.Client
 65}
 66
 67func NewFetchTool() BaseTool {
 68	return &fetchTool{
 69		client: &http.Client{
 70			Timeout: 30 * time.Second,
 71		},
 72	}
 73}
 74
 75func (t *fetchTool) Info() ToolInfo {
 76	return ToolInfo{
 77		Name:        FetchToolName,
 78		Description: fetchToolDescription,
 79		Parameters: map[string]any{
 80			"url": map[string]any{
 81				"type":        "string",
 82				"description": "The URL to fetch content from",
 83			},
 84			"format": map[string]any{
 85				"type":        "string",
 86				"description": "The format to return the content in (text, markdown, or html)",
 87			},
 88			"timeout": map[string]any{
 89				"type":        "number",
 90				"description": "Optional timeout in seconds (max 120)",
 91			},
 92		},
 93		Required: []string{"url", "format"},
 94	}
 95}
 96
 97func (t *fetchTool) Run(ctx context.Context, call ToolCall) (ToolResponse, error) {
 98	var params FetchParams
 99	if err := json.Unmarshal([]byte(call.Input), &params); err != nil {
100		return NewTextErrorResponse("Failed to parse fetch parameters: " + err.Error()), nil
101	}
102
103	if params.URL == "" {
104		return NewTextErrorResponse("URL parameter is required"), nil
105	}
106
107	format := strings.ToLower(params.Format)
108	if format != "text" && format != "markdown" && format != "html" {
109		return NewTextErrorResponse("Format must be one of: text, markdown, html"), nil
110	}
111
112	if !strings.HasPrefix(params.URL, "http://") && !strings.HasPrefix(params.URL, "https://") {
113		return NewTextErrorResponse("URL must start with http:// or https://"), nil
114	}
115
116	p := permission.Default.Request(
117		permission.CreatePermissionRequest{
118			Path:        config.WorkingDirectory(),
119			ToolName:    FetchToolName,
120			Action:      "fetch",
121			Description: fmt.Sprintf("Fetch content from URL: %s", params.URL),
122			Params: FetchPermissionsParams{
123				URL:     params.URL,
124				Format:  params.Format,
125				Timeout: params.Timeout,
126			},
127		},
128	)
129
130	if !p {
131		return NewTextErrorResponse("Permission denied to fetch from URL: " + params.URL), nil
132	}
133
134	client := t.client
135	if params.Timeout > 0 {
136		maxTimeout := 120 // 2 minutes
137		if params.Timeout > maxTimeout {
138			params.Timeout = maxTimeout
139		}
140		client = &http.Client{
141			Timeout: time.Duration(params.Timeout) * time.Second,
142		}
143	}
144
145	req, err := http.NewRequestWithContext(ctx, "GET", params.URL, nil)
146	if err != nil {
147		return NewTextErrorResponse("Failed to create request: " + err.Error()), nil
148	}
149
150	req.Header.Set("User-Agent", "termai/1.0")
151
152	resp, err := client.Do(req)
153	if err != nil {
154		return NewTextErrorResponse("Failed to execute request: " + err.Error()), nil
155	}
156	defer resp.Body.Close()
157
158	if resp.StatusCode != http.StatusOK {
159		return NewTextErrorResponse(fmt.Sprintf("Request failed with status code: %d", resp.StatusCode)), nil
160	}
161
162	maxSize := int64(5 * 1024 * 1024) // 5MB
163	body, err := io.ReadAll(io.LimitReader(resp.Body, maxSize))
164	if err != nil {
165		return NewTextErrorResponse("Failed to read response body: " + err.Error()), nil
166	}
167
168	content := string(body)
169	contentType := resp.Header.Get("Content-Type")
170
171	switch format {
172	case "text":
173		if strings.Contains(contentType, "text/html") {
174			text, err := extractTextFromHTML(content)
175			if err != nil {
176				return NewTextErrorResponse("Failed to extract text from HTML: " + err.Error()), nil
177			}
178			return NewTextResponse(text), nil
179		}
180		return NewTextResponse(content), nil
181
182	case "markdown":
183		if strings.Contains(contentType, "text/html") {
184			markdown, err := convertHTMLToMarkdown(content)
185			if err != nil {
186				return NewTextErrorResponse("Failed to convert HTML to Markdown: " + err.Error()), nil
187			}
188			return NewTextResponse(markdown), nil
189		}
190
191		return NewTextResponse("```\n" + content + "\n```"), nil
192
193	case "html":
194		return NewTextResponse(content), nil
195
196	default:
197		return NewTextResponse(content), nil
198	}
199}
200
201func extractTextFromHTML(html string) (string, error) {
202	doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
203	if err != nil {
204		return "", err
205	}
206
207	text := doc.Text()
208	text = strings.Join(strings.Fields(text), " ")
209
210	return text, nil
211}
212
213func convertHTMLToMarkdown(html string) (string, error) {
214	converter := md.NewConverter("", true, nil)
215
216	markdown, err := converter.ConvertString(html)
217	if err != nil {
218		return "", err
219	}
220
221	return markdown, nil
222}
223