agent: Fix crash with pathological fetch output (#34253)

Cole Miller created

Closes #34029

The crash is due to a stack overflow in our `html_to_markdown`
conversion; I've added a maximum depth of 200 for the recursion in that
crate to guard against this kind of thing.

Separately, we were treating all content-types other than `text/plain`
and `application/json` as HTML; I've changed this to only treat
`text/html` and `application/xhtml+xml` as HTML, and fall back to
plaintext. (In the original crash, the content-type was
`application/octet-stream`.)

Release Notes:

- agent: Fixed a potential crash when fetching large non-HTML files.

Change summary

crates/assistant_tools/src/fetch_tool.rs       | 5 ++---
crates/html_to_markdown/src/markdown_writer.rs | 6 ++++--
2 files changed, 6 insertions(+), 5 deletions(-)

Detailed changes

crates/assistant_tools/src/fetch_tool.rs 🔗

@@ -69,10 +69,9 @@ impl FetchTool {
             .to_str()
             .context("invalid Content-Type header")?;
         let content_type = match content_type {
-            "text/html" => ContentType::Html,
-            "text/plain" => ContentType::Plaintext,
+            "text/html" | "application/xhtml+xml" => ContentType::Html,
             "application/json" => ContentType::Json,
-            _ => ContentType::Html,
+            _ => ContentType::Plaintext,
         };
 
         match content_type {

crates/html_to_markdown/src/markdown_writer.rs 🔗

@@ -119,8 +119,10 @@ impl MarkdownWriter {
                 .push_back(current_element.clone());
         }
 
-        for child in node.children.borrow().iter() {
-            self.visit_node(child, handlers)?;
+        if self.current_element_stack.len() < 200 {
+            for child in node.children.borrow().iter() {
+                self.visit_node(child, handlers)?;
+            }
         }
 
         if let Some(current_element) = current_element {