Change summary
crates/assistant/src/slash_command/fetch_command.rs | 1
crates/html_to_markdown/src/markdown.rs | 24 +++++++++++++++
2 files changed, 25 insertions(+)
Detailed changes
@@ -62,6 +62,7 @@ impl FetchSlashCommand {
match content_type {
ContentType::Html => {
let mut handlers: Vec<TagHandler> = vec![
+ Rc::new(RefCell::new(markdown::WebpageChromeRemover)),
Rc::new(RefCell::new(markdown::ParagraphHandler)),
Rc::new(RefCell::new(markdown::HeadingHandler)),
Rc::new(RefCell::new(markdown::ListHandler)),
@@ -1,6 +1,30 @@
use crate::html_element::HtmlElement;
use crate::markdown_writer::{HandleTag, HandlerOutcome, MarkdownWriter, StartTagOutcome};
+pub struct WebpageChromeRemover;
+
+impl HandleTag for WebpageChromeRemover {
+ fn should_handle(&self, tag: &str) -> bool {
+ match tag {
+ "head" | "script" | "style" | "nav" => true,
+ _ => false,
+ }
+ }
+
+ fn handle_tag_start(
+ &mut self,
+ tag: &HtmlElement,
+ _writer: &mut MarkdownWriter,
+ ) -> StartTagOutcome {
+ match tag.tag() {
+ "head" | "script" | "style" | "nav" => return StartTagOutcome::Skip,
+ _ => {}
+ }
+
+ StartTagOutcome::Continue
+ }
+}
+
pub struct ParagraphHandler;
impl HandleTag for ParagraphHandler {