Detailed changes
@@ -8355,6 +8355,7 @@ dependencies = [
"anyhow",
"assets",
"env_logger 0.11.8",
+ "file_icons",
"gpui",
"language",
"languages",
@@ -8366,6 +8367,7 @@ dependencies = [
"theme",
"ui",
"util",
+ "workspace",
"workspace-hack",
]
@@ -9,6 +9,7 @@ You should only perform actions that modify the user's system if explicitly requ
- If the user asks a question about how to accomplish a task, provide guidance or information, and use read-only tools (e.g., search) to assist. You may suggest potential actions, but do not directly modify the userβs system without explicit instruction.
- If the user clearly requests that you perform an action, carry out the action directly without explaining why you are doing so.
+<style>
Editing code:
- Make sure to take previous edits into account.
- The edits you perform might lead to errors or warnings. At the end of your changes, check whether you introduced any problems, and fix them before providing a summary of the changes you made.
@@ -34,6 +35,106 @@ Responding:
For example, don't say "Now I'm going to check diagnostics to see if there are any warnings or errors," followed by running a tool which checks diagnostics and reports warnings or errors; instead, just request the tool call without saying anything.
- All tool results are provided to you automatically, so DO NOT thank the user when this happens.
+Whenever you mention a code block, you MUST use ONLY use the following format:
+
+```language path/to/Something.blah#L123-456
+(code goes here)
+```
+
+The `#L123-456` means the line number range 123 through 456, and the path/to/Something.blah
+is a path in the project. (If there is no valid path in the project, then you can use
+/dev/null/path.extension for its path.) This is the ONLY valid way to format code blocks, because the Markdown parser
+does not understand the more common ```language syntax, or bare ``` blocks. It only
+understands this path-based syntax, and if the path is missing, then it will error and you will have to do it over again.
+
+Just to be really clear about this, if you ever find yourself writing three backticks followed by a language name, STOP!
+You have made a mistake. You can only ever put paths after triple backticks!
+
+<example>
+Based on all the information I've gathered, here's a summary of how this system works:
+1. The README file is loaded into the system.
+2. The system finds the first two headers, including everything in between. In this case, that would be:
+
+```path/to/README.md#L8-12
+# First Header
+
+This is the info under the first header.
+
+## Sub-header
+```
+
+3. Then the system finds the last header in the README:
+
+```path/to/README.md#L27-29
+## Last Header
+
+This is the last header in the README.
+```
+
+4. Finally, it passes this information on to the next process.
+</example>
+
+<example>
+In Markdown, hash marks signify headings. For example:
+
+```/dev/null/example.md#L1-3
+# Level 1 heading
+## Level 2 heading
+### Level 3 heading
+```
+</example>
+
+Here are examples of ways you must never render code blocks:
+
+<bad_example_do_not_do_this>
+In Markdown, hash marks signify headings. For example:
+
+```
+# Level 1 heading
+## Level 2 heading
+### Level 3 heading
+```
+</bad_example_do_not_do_this>
+
+This example is unacceptable because it does not include the path.
+
+<bad_example_do_not_do_this>
+In Markdown, hash marks signify headings. For example:
+
+```markdown
+# Level 1 heading
+## Level 2 heading
+### Level 3 heading
+```
+</bad_example_do_not_do_this>
+
+This example is unacceptable because it has the language instead of the path.
+
+<bad_example_do_not_do_this>
+In Markdown, hash marks signify headings. For example:
+
+ # Level 1 heading
+ ## Level 2 heading
+ ### Level 3 heading
+</bad_example_do_not_do_this>
+
+This example is unacceptable because it uses indentation to mark the code block
+instead of backticks with a path.
+
+<bad_example_do_not_do_this>
+In Markdown, hash marks signify headings. For example:
+
+```markdown
+/dev/null/example.md#L1-3
+# Level 1 heading
+## Level 2 heading
+### Level 3 heading
+```
+</bad_example_do_not_do_this>
+
+This example is unacceptable because the path is in the wrong place. The path must be directly after the opening backticks.
+</style>
+
The user has opened a project that contains the following root directories/files. Whenever you specify a path in the project, it must be a relative path which begins with one of these root directories/files:
{{#each worktrees}}
@@ -20,6 +20,7 @@ test-support = [
[dependencies]
anyhow.workspace = true
+file_icons.workspace = true
gpui.workspace = true
language.workspace = true
linkify.workspace = true
@@ -28,6 +29,7 @@ pulldown-cmark.workspace = true
theme.workspace = true
ui.workspace = true
util.workspace = true
+workspace.workspace = true
workspace-hack.workspace = true
[dev-dependencies]
@@ -1,9 +1,12 @@
pub mod parser;
+mod path_range;
+use file_icons::FileIcons;
use std::collections::{HashMap, HashSet};
use std::iter;
use std::mem;
use std::ops::Range;
+use std::path::PathBuf;
use std::rc::Rc;
use std::sync::Arc;
use std::time::Duration;
@@ -19,8 +22,9 @@ use language::{Language, LanguageRegistry, Rope};
use parser::{MarkdownEvent, MarkdownTag, MarkdownTagEnd, parse_links_only, parse_markdown};
use pulldown_cmark::Alignment;
use theme::SyntaxTheme;
-use ui::{Tooltip, prelude::*};
+use ui::{ButtonLike, Tooltip, prelude::*};
use util::{ResultExt, TryFutureExt};
+use workspace::Workspace;
use crate::parser::CodeBlockKind;
@@ -210,13 +214,15 @@ impl Markdown {
return anyhow::Ok(ParsedMarkdown {
events: Arc::from(parse_links_only(source.as_ref())),
source,
- languages: HashMap::default(),
+ languages_by_name: HashMap::default(),
+ languages_by_path: HashMap::default(),
});
}
- let (events, language_names) = parse_markdown(&source);
- let mut languages = HashMap::with_capacity(language_names.len());
- for name in language_names {
- if let Some(registry) = language_registry.as_ref() {
+ let (events, language_names, paths) = parse_markdown(&source);
+ let mut languages_by_name = HashMap::with_capacity(language_names.len());
+ let mut languages_by_path = HashMap::with_capacity(paths.len());
+ if let Some(registry) = language_registry.as_ref() {
+ for name in language_names {
let language = if !name.is_empty() {
registry.language_for_name(&name)
} else if let Some(fallback) = &fallback {
@@ -225,14 +231,21 @@ impl Markdown {
continue;
};
if let Ok(language) = language.await {
- languages.insert(name, language);
+ languages_by_name.insert(name, language);
+ }
+ }
+
+ for path in paths {
+ if let Ok(language) = registry.language_for_file_path(&path).await {
+ languages_by_path.insert(path, language);
}
}
}
anyhow::Ok(ParsedMarkdown {
source,
events: Arc::from(events),
- languages,
+ languages_by_name,
+ languages_by_path,
})
});
@@ -308,7 +321,8 @@ impl Selection {
pub struct ParsedMarkdown {
source: SharedString,
events: Arc<[(Range<usize>, MarkdownEvent)]>,
- languages: HashMap<SharedString, Arc<Language>>,
+ languages_by_name: HashMap<SharedString, Arc<Language>>,
+ languages_by_path: HashMap<PathBuf, Arc<Language>>,
}
impl ParsedMarkdown {
@@ -574,7 +588,9 @@ impl Element for MarkdownElement {
} else {
0
};
- for (range, event) in parsed_markdown.events.iter() {
+
+ let code_citation_id = SharedString::from("code-citation-link");
+ for (index, (range, event)) in parsed_markdown.events.iter().enumerate() {
match event {
MarkdownEvent::Start(tag) => {
match tag {
@@ -613,10 +629,102 @@ impl Element for MarkdownElement {
);
}
MarkdownTag::CodeBlock(kind) => {
- let language = if let CodeBlockKind::Fenced(language) = kind {
- parsed_markdown.languages.get(language).cloned()
- } else {
- None
+ let language = match kind {
+ CodeBlockKind::Fenced => None,
+ CodeBlockKind::FencedLang(language) => {
+ parsed_markdown.languages_by_name.get(language).cloned()
+ }
+ CodeBlockKind::FencedSrc(path_range) => {
+ // If the path actually exists in the project, render a link to it.
+ if let Some(project_path) =
+ window.root::<Workspace>().flatten().and_then(|workspace| {
+ workspace
+ .read(cx)
+ .project()
+ .read(cx)
+ .find_project_path(&path_range.path, cx)
+ })
+ {
+ builder.flush_text();
+
+ builder.push_div(
+ div().relative().w_full(),
+ range,
+ markdown_end,
+ );
+
+ builder.modify_current_div(|el| {
+ let file_icon =
+ FileIcons::get_icon(&project_path.path, cx)
+ .map(|path| {
+ Icon::from_path(path)
+ .color(Color::Muted)
+ .into_any_element()
+ })
+ .unwrap_or_else(|| {
+ IconButton::new(
+ "file-path-icon",
+ IconName::File,
+ )
+ .shape(ui::IconButtonShape::Square)
+ .into_any_element()
+ });
+
+ el.child(
+ ButtonLike::new(ElementId::NamedInteger(
+ code_citation_id.clone(),
+ index,
+ ))
+ .child(
+ div()
+ .mb_1()
+ .flex()
+ .items_center()
+ .gap_1()
+ .child(file_icon)
+ .child(
+ Label::new(
+ project_path
+ .path
+ .display()
+ .to_string(),
+ )
+ .color(Color::Muted)
+ .underline(),
+ ),
+ )
+ .on_click({
+ let click_path = project_path.clone();
+ move |_, window, cx| {
+ if let Some(workspace) =
+ window.root::<Workspace>().flatten()
+ {
+ workspace.update(cx, |workspace, cx| {
+ workspace
+ .open_path(
+ click_path.clone(),
+ None,
+ true,
+ window,
+ cx,
+ )
+ .detach_and_log_err(cx);
+ })
+ }
+ }
+ }),
+ )
+ });
+
+ builder.pop_div();
+ }
+
+ parsed_markdown
+ .languages_by_path
+ .get(&path_range.path)
+ .cloned()
+ }
+ _ => None,
};
// This is a parent container that we can position the copy button inside.
@@ -7,8 +7,11 @@ use pulldown_cmark::{
use std::{
collections::HashSet,
ops::{Deref, Range},
+ path::PathBuf,
};
+use crate::path_range::PathRange;
+
const PARSE_OPTIONS: Options = Options::ENABLE_TABLES
.union(Options::ENABLE_FOOTNOTES)
.union(Options::ENABLE_STRIKETHROUGH)
@@ -19,9 +22,16 @@ const PARSE_OPTIONS: Options = Options::ENABLE_TABLES
.union(Options::ENABLE_OLD_FOOTNOTES)
.union(Options::ENABLE_GFM);
-pub fn parse_markdown(text: &str) -> (Vec<(Range<usize>, MarkdownEvent)>, HashSet<SharedString>) {
+pub fn parse_markdown(
+ text: &str,
+) -> (
+ Vec<(Range<usize>, MarkdownEvent)>,
+ HashSet<SharedString>,
+ HashSet<PathBuf>,
+) {
let mut events = Vec::new();
- let mut languages = HashSet::new();
+ let mut language_names = HashSet::new();
+ let mut language_paths = HashSet::new();
let mut within_link = false;
let mut within_metadata = false;
for (pulldown_event, mut range) in Parser::new_ext(text, PARSE_OPTIONS).into_offset_iter() {
@@ -35,17 +45,46 @@ pub fn parse_markdown(text: &str) -> (Vec<(Range<usize>, MarkdownEvent)>, HashSe
}
match pulldown_event {
pulldown_cmark::Event::Start(tag) => {
- match tag {
- pulldown_cmark::Tag::Link { .. } => within_link = true,
- pulldown_cmark::Tag::MetadataBlock { .. } => within_metadata = true,
+ let tag = match tag {
+ pulldown_cmark::Tag::Link {
+ link_type,
+ dest_url,
+ title,
+ id,
+ } => {
+ within_link = true;
+ MarkdownTag::Link {
+ link_type,
+ dest_url: SharedString::from(dest_url.into_string()),
+ title: SharedString::from(title.into_string()),
+ id: SharedString::from(id.into_string()),
+ }
+ }
+ pulldown_cmark::Tag::MetadataBlock(kind) => {
+ within_metadata = true;
+ MarkdownTag::MetadataBlock(kind)
+ }
pulldown_cmark::Tag::CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(
- ref language,
+ ref info,
)) => {
- languages.insert(SharedString::from(language.to_string()));
+ let info = info.trim();
+ MarkdownTag::CodeBlock(if info.is_empty() {
+ CodeBlockKind::Fenced
+ // Languages should never contain a slash, and PathRanges always should.
+ // (Models are told to specify them relative to a workspace root.)
+ } else if info.contains('/') {
+ let path_range = PathRange::new(info);
+ language_paths.insert(path_range.path.clone());
+ CodeBlockKind::FencedSrc(path_range)
+ } else {
+ let language = SharedString::from(info.to_string());
+ language_names.insert(language.clone());
+ CodeBlockKind::FencedLang(language)
+ })
}
- _ => {}
- }
- events.push((range, MarkdownEvent::Start(tag.into())))
+ tag => tag.into(),
+ };
+ events.push((range, MarkdownEvent::Start(tag)))
}
pulldown_cmark::Event::End(tag) => {
if let pulldown_cmark::TagEnd::Link = tag {
@@ -129,7 +168,7 @@ pub fn parse_markdown(text: &str) -> (Vec<(Range<usize>, MarkdownEvent)>, HashSe
pulldown_cmark::Event::InlineMath(_) | pulldown_cmark::Event::DisplayMath(_) => {}
}
}
- (events, languages)
+ (events, language_names, language_paths)
}
pub fn parse_links_only(text: &str) -> Vec<(Range<usize>, MarkdownEvent)> {
@@ -287,8 +326,13 @@ pub enum MarkdownTag {
#[derive(Clone, Debug, PartialEq)]
pub enum CodeBlockKind {
Indented,
- /// The value contained in the tag describes the language of the code, which may be empty.
- Fenced(SharedString),
+ /// "Fenced" means "surrounded by triple backticks."
+ /// There can optionally be either a language after the backticks (like in traditional Markdown)
+ /// or, if an agent is specifying a path for a source location in the project, it can be a PathRange,
+ /// e.g. ```path/to/foo.rs#L123-456 instead of ```rust
+ Fenced,
+ FencedLang(SharedString),
+ FencedSrc(PathRange),
}
impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
@@ -327,9 +371,18 @@ impl From<pulldown_cmark::Tag<'_>> for MarkdownTag {
pulldown_cmark::CodeBlockKind::Indented => {
MarkdownTag::CodeBlock(CodeBlockKind::Indented)
}
- pulldown_cmark::CodeBlockKind::Fenced(info) => MarkdownTag::CodeBlock(
- CodeBlockKind::Fenced(SharedString::from(info.into_string())),
- ),
+ pulldown_cmark::CodeBlockKind::Fenced(info) => {
+ let info = info.trim();
+ MarkdownTag::CodeBlock(if info.is_empty() {
+ CodeBlockKind::Fenced
+ } else if info.contains('/') {
+ // Languages should never contain a slash, and PathRanges always should.
+ // (Models are told to specify them relative to a workspace root.)
+ CodeBlockKind::FencedSrc(PathRange::new(info))
+ } else {
+ CodeBlockKind::FencedLang(SharedString::from(info.to_string()))
+ })
+ }
},
pulldown_cmark::Tag::List(start_number) => MarkdownTag::List(start_number),
pulldown_cmark::Tag::Item => MarkdownTag::Item,
@@ -488,6 +541,7 @@ mod tests {
(46..51, Text),
(0..51, End(MarkdownTagEnd::Paragraph))
],
+ HashSet::new(),
HashSet::new()
)
);
@@ -515,6 +569,7 @@ mod tests {
(41..42, SubstitutedText("β".into())),
(0..42, End(MarkdownTagEnd::Paragraph))
],
+ HashSet::new(),
HashSet::new()
)
)
@@ -0,0 +1,222 @@
+use std::{ops::Range, path::PathBuf};
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct PathRange {
+ pub path: PathBuf,
+ pub range: Option<Range<LineCol>>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct LineCol {
+ pub line: u32,
+ pub col: Option<u32>,
+}
+
+impl LineCol {
+ pub fn new(str: impl AsRef<str>) -> Option<Self> {
+ let str = str.as_ref();
+ match str.split_once(':') {
+ Some((line, col)) => match (line.parse::<u32>(), col.parse::<u32>()) {
+ (Ok(line), Ok(col)) => Some(Self {
+ line,
+ col: Some(col),
+ }),
+ _ => None,
+ },
+ None => match str.parse::<u32>() {
+ Ok(line) => Some(Self { line, col: None }),
+ Err(_) => None,
+ },
+ }
+ }
+}
+
+impl PathRange {
+ pub fn new(str: impl AsRef<str>) -> Self {
+ let str = str.as_ref();
+ // Sometimes the model will include a language at the start,
+ // e.g. "```rust zed/crates/markdown/src/markdown.rs#L1"
+ // We just discard that.
+ let str = match str.trim_end().rfind(' ') {
+ Some(space) => &str[space + 1..],
+ None => str.trim_start(),
+ };
+
+ match str.rsplit_once('#') {
+ Some((path, after_hash)) => {
+ // Be tolerant to the model omitting the "L" prefix, lowercasing it,
+ // or including it more than once.
+ let after_hash = after_hash.replace(['L', 'l'], "");
+
+ let range = {
+ let mut iter = after_hash.split('-').flat_map(LineCol::new);
+ iter.next()
+ .map(|start| iter.next().map(|end| start..end).unwrap_or(start..start))
+ };
+
+ Self {
+ path: PathBuf::from(path),
+ range,
+ }
+ }
+ None => Self {
+ path: str.into(),
+ range: None,
+ },
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_linecol_parsing() {
+ let line_col = LineCol::new("10:5");
+ assert_eq!(
+ line_col,
+ Some(LineCol {
+ line: 10,
+ col: Some(5)
+ })
+ );
+
+ let line_only = LineCol::new("42");
+ assert_eq!(
+ line_only,
+ Some(LineCol {
+ line: 42,
+ col: None
+ })
+ );
+
+ assert_eq!(LineCol::new(""), None);
+ assert_eq!(LineCol::new("not a number"), None);
+ assert_eq!(LineCol::new("10:not a number"), None);
+ assert_eq!(LineCol::new("not:5"), None);
+ }
+
+ #[test]
+ fn test_pathrange_parsing() {
+ let path_range = PathRange::new("file.rs#L10-L20");
+ assert_eq!(path_range.path, PathBuf::from("file.rs"));
+ assert!(path_range.range.is_some());
+ if let Some(range) = path_range.range {
+ assert_eq!(range.start.line, 10);
+ assert_eq!(range.start.col, None);
+ assert_eq!(range.end.line, 20);
+ assert_eq!(range.end.col, None);
+ }
+
+ let single_line = PathRange::new("file.rs#L15");
+ assert_eq!(single_line.path, PathBuf::from("file.rs"));
+ assert!(single_line.range.is_some());
+ if let Some(range) = single_line.range {
+ assert_eq!(range.start.line, 15);
+ assert_eq!(range.end.line, 15);
+ }
+
+ let no_range = PathRange::new("file.rs");
+ assert_eq!(no_range.path, PathBuf::from("file.rs"));
+ assert!(no_range.range.is_none());
+
+ let lowercase = PathRange::new("file.rs#l5-l10");
+ assert_eq!(lowercase.path, PathBuf::from("file.rs"));
+ assert!(lowercase.range.is_some());
+ if let Some(range) = lowercase.range {
+ assert_eq!(range.start.line, 5);
+ assert_eq!(range.end.line, 10);
+ }
+
+ let complex = PathRange::new("src/path/to/file.rs#L100");
+ assert_eq!(complex.path, PathBuf::from("src/path/to/file.rs"));
+ assert!(complex.range.is_some());
+ }
+
+ #[test]
+ fn test_pathrange_from_str() {
+ let with_range = PathRange::new("file.rs#L10-L20");
+ assert!(with_range.range.is_some());
+ assert_eq!(with_range.path, PathBuf::from("file.rs"));
+
+ let without_range = PathRange::new("file.rs");
+ assert!(without_range.range.is_none());
+
+ let single_line = PathRange::new("file.rs#L15");
+ assert!(single_line.range.is_some());
+ }
+
+ #[test]
+ fn test_pathrange_leading_text_trimming() {
+ let with_language = PathRange::new("```rust file.rs#L10");
+ assert_eq!(with_language.path, PathBuf::from("file.rs"));
+ assert!(with_language.range.is_some());
+ if let Some(range) = with_language.range {
+ assert_eq!(range.start.line, 10);
+ }
+
+ let with_spaces = PathRange::new("``` file.rs#L10-L20");
+ assert_eq!(with_spaces.path, PathBuf::from("file.rs"));
+ assert!(with_spaces.range.is_some());
+
+ let with_words = PathRange::new("```rust code example file.rs#L15:10");
+ assert_eq!(with_words.path, PathBuf::from("file.rs"));
+ assert!(with_words.range.is_some());
+ if let Some(range) = with_words.range {
+ assert_eq!(range.start.line, 15);
+ assert_eq!(range.start.col, Some(10));
+ }
+
+ let with_whitespace = PathRange::new(" file.rs#L5");
+ assert_eq!(with_whitespace.path, PathBuf::from("file.rs"));
+ assert!(with_whitespace.range.is_some());
+
+ let no_leading = PathRange::new("file.rs#L10");
+ assert_eq!(no_leading.path, PathBuf::from("file.rs"));
+ assert!(no_leading.range.is_some());
+ }
+
+ #[test]
+ fn test_pathrange_with_line_and_column() {
+ let line_and_col = PathRange::new("file.rs#L10:5");
+ assert_eq!(line_and_col.path, PathBuf::from("file.rs"));
+ assert!(line_and_col.range.is_some());
+ if let Some(range) = line_and_col.range {
+ assert_eq!(range.start.line, 10);
+ assert_eq!(range.start.col, Some(5));
+ assert_eq!(range.end.line, 10);
+ assert_eq!(range.end.col, Some(5));
+ }
+
+ let full_range = PathRange::new("file.rs#L10:5-L20:15");
+ assert_eq!(full_range.path, PathBuf::from("file.rs"));
+ assert!(full_range.range.is_some());
+ if let Some(range) = full_range.range {
+ assert_eq!(range.start.line, 10);
+ assert_eq!(range.start.col, Some(5));
+ assert_eq!(range.end.line, 20);
+ assert_eq!(range.end.col, Some(15));
+ }
+
+ let mixed_range1 = PathRange::new("file.rs#L10:5-L20");
+ assert_eq!(mixed_range1.path, PathBuf::from("file.rs"));
+ assert!(mixed_range1.range.is_some());
+ if let Some(range) = mixed_range1.range {
+ assert_eq!(range.start.line, 10);
+ assert_eq!(range.start.col, Some(5));
+ assert_eq!(range.end.line, 20);
+ assert_eq!(range.end.col, None);
+ }
+
+ let mixed_range2 = PathRange::new("file.rs#L10-L20:15");
+ assert_eq!(mixed_range2.path, PathBuf::from("file.rs"));
+ assert!(mixed_range2.range.is_some());
+ if let Some(range) = mixed_range2.range {
+ assert_eq!(range.start.line, 10);
+ assert_eq!(range.start.col, None);
+ assert_eq!(range.end.line, 20);
+ assert_eq!(range.end.col, Some(15));
+ }
+ }
+}