1//! This module defines the format in which memory of debuggee is represented.
2//!
3//! Each byte in memory can either be mapped or unmapped. We try to mimic that twofold:
4//! - We assume that the memory is divided into pages of a fixed size.
5//! - We assume that each page can be either mapped or unmapped.
6//! These two assumptions drive the shape of the memory representation.
7//! In particular, we want the unmapped pages to be represented without allocating any memory, as *most*
8//! of the memory in a program space is usually unmapped.
9//! Note that per DAP we don't know what the address space layout is, so we can't optimize off of it.
10//! Note that while we optimize for a paged layout, we also want to be able to represent memory that is not paged.
11//! This use case is relevant to embedded folks. Furthermore, we cater to default 4k page size.
12//! It is picked arbitrarily as a ubiquous default - other than that, the underlying format of Zed's memory storage should not be relevant
13//! to the users of this module.
14
15use std::{collections::BTreeMap, ops::RangeInclusive, sync::Arc};
16
17use gpui::BackgroundExecutor;
18use smallvec::SmallVec;
19
20const PAGE_SIZE: u64 = 4096;
21
22/// Represents the contents of a single page. We special-case unmapped pages to be allocation-free,
23/// since they're going to make up the majority of the memory in a program space (even though the user might not even get to see them - ever).
24#[derive(Clone, Debug)]
25pub(super) enum PageContents {
26 /// Whole page is unreadable.
27 Unmapped,
28 Mapped(Arc<MappedPageContents>),
29}
30
31impl PageContents {
32 #[cfg(test)]
33 fn mapped(contents: Vec<u8>) -> Self {
34 PageContents::Mapped(Arc::new(MappedPageContents(
35 vec![PageChunk::Mapped(contents.into())].into(),
36 )))
37 }
38}
39
40#[derive(Clone, Debug)]
41enum PageChunk {
42 Mapped(Arc<[u8]>),
43 Unmapped(u64),
44}
45
46impl PageChunk {
47 fn len(&self) -> u64 {
48 match self {
49 PageChunk::Mapped(contents) => contents.len() as u64,
50 PageChunk::Unmapped(size) => *size,
51 }
52 }
53}
54
55impl MappedPageContents {
56 fn len(&self) -> u64 {
57 self.0.iter().map(|chunk| chunk.len()).sum()
58 }
59}
60/// We hope for the whole page to be mapped in a single chunk, but we do leave the possibility open
61/// of having interleaved read permissions in a single page; debuggee's execution environment might either
62/// have a different page size OR it might not have paged memory layout altogether
63/// (which might be relevant to embedded systems).
64///
65/// As stated previously, the concept of a page in this module has to do more
66/// with optimizing fetching of the memory and not with the underlying bits and pieces
67/// of the memory of a debuggee.
68
69#[derive(Default, Debug)]
70pub(super) struct MappedPageContents(
71 /// Most of the time there should be only one chunk (either mapped or unmapped),
72 /// but we do leave the possibility open of having multiple regions of memory in a single page.
73 SmallVec<[PageChunk; 1]>,
74);
75
76type MemoryAddress = u64;
77#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)]
78#[repr(transparent)]
79pub(super) struct PageAddress(u64);
80
81impl PageAddress {
82 pub(super) fn iter_range(
83 range: RangeInclusive<PageAddress>,
84 ) -> impl Iterator<Item = PageAddress> {
85 let mut current = range.start().0;
86 let end = range.end().0;
87
88 std::iter::from_fn(move || {
89 if current > end {
90 None
91 } else {
92 let addr = PageAddress(current);
93 current += PAGE_SIZE;
94 Some(addr)
95 }
96 })
97 }
98}
99
100pub(super) struct Memory {
101 pages: BTreeMap<PageAddress, PageContents>,
102}
103
104/// Represents a single memory cell (or None if a given cell is unmapped/unknown).
105#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Ord, Eq)]
106#[repr(transparent)]
107pub struct MemoryCell(pub Option<u8>);
108
109impl Memory {
110 pub(super) fn new() -> Self {
111 Self {
112 pages: Default::default(),
113 }
114 }
115
116 pub(super) fn memory_range_to_page_range(
117 range: RangeInclusive<MemoryAddress>,
118 ) -> RangeInclusive<PageAddress> {
119 let start_page = (range.start() / PAGE_SIZE) * PAGE_SIZE;
120 let end_page = (range.end() / PAGE_SIZE) * PAGE_SIZE;
121 PageAddress(start_page)..=PageAddress(end_page)
122 }
123
124 pub(super) fn build_page(&self, page_address: PageAddress) -> Option<MemoryPageBuilder> {
125 if self.pages.contains_key(&page_address) {
126 // We already know the state of this page.
127 None
128 } else {
129 Some(MemoryPageBuilder::new(page_address))
130 }
131 }
132
133 pub(super) fn insert_page(&mut self, address: PageAddress, page: PageContents) {
134 self.pages.insert(address, page);
135 }
136
137 pub(super) fn memory_range(&self, range: RangeInclusive<MemoryAddress>) -> MemoryIterator {
138 let pages = Self::memory_range_to_page_range(range.clone());
139 let pages = self
140 .pages
141 .range(pages)
142 .map(|(address, page)| (*address, page.clone()))
143 .collect::<Vec<_>>();
144 MemoryIterator::new(range, pages.into_iter())
145 }
146
147 pub(crate) fn clear(&mut self, background_executor: &BackgroundExecutor) {
148 let memory = std::mem::take(&mut self.pages);
149 background_executor
150 .spawn(async move {
151 drop(memory);
152 })
153 .detach();
154 }
155}
156
157/// Builder for memory pages.
158///
159/// Memory reads in DAP are sequential (or at least we make them so).
160/// ReadMemory response includes `unreadableBytes` property indicating the number of bytes
161/// that could not be read after the last successfully read byte.
162///
163/// We use it as follows:
164/// - We start off with a "large" 1-page ReadMemory request.
165/// - If it succeeds/fails wholesale, cool; we have no unknown memory regions in this page.
166/// - If it succeeds partially, we know # of mapped bytes.
167/// We might also know the # of unmapped bytes.
168/// However, we're still unsure about what's *after* the unreadable region.
169///
170/// This is where this builder comes in. It lets us track the state of figuring out contents of a single page.
171pub(super) struct MemoryPageBuilder {
172 chunks: MappedPageContents,
173 base_address: PageAddress,
174 left_to_read: u64,
175}
176
177/// Represents a chunk of memory of which we don't know if it's mapped or unmapped; thus we need
178/// to issue a request to figure out it's state.
179pub(super) struct UnknownMemory {
180 pub(super) address: MemoryAddress,
181 pub(super) size: u64,
182}
183
184impl MemoryPageBuilder {
185 fn new(base_address: PageAddress) -> Self {
186 Self {
187 chunks: Default::default(),
188 base_address,
189 left_to_read: PAGE_SIZE,
190 }
191 }
192
193 pub(super) fn build(self) -> (PageAddress, PageContents) {
194 debug_assert_eq!(self.left_to_read, 0);
195 debug_assert_eq!(
196 self.chunks.len(),
197 PAGE_SIZE,
198 "Expected `build` to be called on a fully-fetched page"
199 );
200 let contents = if let Some(first) = self.chunks.0.first()
201 && self.chunks.len() == 1
202 && matches!(first, PageChunk::Unmapped(PAGE_SIZE))
203 {
204 PageContents::Unmapped
205 } else {
206 PageContents::Mapped(Arc::new(MappedPageContents(self.chunks.0)))
207 };
208 (self.base_address, contents)
209 }
210 /// Drives the fetching of memory, in an iterator-esque style.
211 pub(super) fn next_request(&self) -> Option<UnknownMemory> {
212 if self.left_to_read == 0 {
213 None
214 } else {
215 let offset_in_current_page = PAGE_SIZE - self.left_to_read;
216 Some(UnknownMemory {
217 address: self.base_address.0 + offset_in_current_page,
218 size: self.left_to_read,
219 })
220 }
221 }
222 pub(super) fn unknown(&mut self, bytes: u64) {
223 if bytes == 0 {
224 return;
225 }
226 self.left_to_read -= bytes;
227 self.chunks.0.push(PageChunk::Unmapped(bytes));
228 }
229 pub(super) fn known(&mut self, data: Arc<[u8]>) {
230 if data.is_empty() {
231 return;
232 }
233 self.left_to_read -= data.len() as u64;
234 self.chunks.0.push(PageChunk::Mapped(data));
235 }
236}
237
238fn page_contents_into_iter(data: Arc<MappedPageContents>) -> Box<dyn Iterator<Item = MemoryCell>> {
239 let mut data_range = 0..data.0.len();
240 let iter = std::iter::from_fn(move || {
241 let data = &data;
242 let data_ref = data.clone();
243 data_range.next().map(move |index| {
244 let contents = &data_ref.0[index];
245 match contents {
246 PageChunk::Mapped(items) => {
247 let chunk_range = 0..items.len();
248 let items = items.clone();
249 Box::new(
250 chunk_range
251 .into_iter()
252 .map(move |ix| MemoryCell(Some(items[ix]))),
253 ) as Box<dyn Iterator<Item = MemoryCell>>
254 }
255 PageChunk::Unmapped(len) => {
256 Box::new(std::iter::repeat_n(MemoryCell(None), *len as usize))
257 }
258 }
259 })
260 })
261 .flatten();
262
263 Box::new(iter)
264}
265/// Defines an iteration over a range of memory. Some of this memory might be unmapped or straight up missing.
266/// Thus, this iterator alternates between synthesizing values and yielding known memory.
267pub struct MemoryIterator {
268 start: MemoryAddress,
269 end: MemoryAddress,
270 current_known_page: Option<(PageAddress, Box<dyn Iterator<Item = MemoryCell>>)>,
271 pages: std::vec::IntoIter<(PageAddress, PageContents)>,
272}
273
274impl MemoryIterator {
275 fn new(
276 range: RangeInclusive<MemoryAddress>,
277 pages: std::vec::IntoIter<(PageAddress, PageContents)>,
278 ) -> Self {
279 Self {
280 start: *range.start(),
281 end: *range.end(),
282 current_known_page: None,
283 pages,
284 }
285 }
286 fn fetch_next_page(&mut self) -> bool {
287 if let Some((mut address, chunk)) = self.pages.next() {
288 let mut contents = match chunk {
289 PageContents::Unmapped => None,
290 PageContents::Mapped(mapped_page_contents) => {
291 Some(page_contents_into_iter(mapped_page_contents))
292 }
293 };
294
295 if address.0 < self.start {
296 // Skip ahead till our iterator is at the start of the range
297
298 //address: 20, start: 25
299 //
300 let to_skip = self.start - address.0;
301 address.0 += to_skip;
302 if let Some(contents) = &mut contents {
303 contents.nth(to_skip as usize - 1);
304 }
305 }
306 self.current_known_page = contents.map(|contents| (address, contents));
307 true
308 } else {
309 false
310 }
311 }
312}
313impl Iterator for MemoryIterator {
314 type Item = MemoryCell;
315
316 fn next(&mut self) -> Option<Self::Item> {
317 if self.start > self.end {
318 return None;
319 }
320 if let Some((current_page_address, current_memory_chunk)) = self.current_known_page.as_mut()
321 && current_page_address.0 <= self.start
322 {
323 if let Some(next_cell) = current_memory_chunk.next() {
324 self.start += 1;
325 return Some(next_cell);
326 } else {
327 self.current_known_page.take();
328 }
329 }
330 if !self.fetch_next_page() {
331 self.start += 1;
332 Some(MemoryCell(None))
333 } else {
334 self.next()
335 }
336 }
337}
338
339#[cfg(test)]
340mod tests {
341 use crate::debugger::{
342 MemoryCell,
343 memory::{MemoryIterator, PageAddress, PageContents},
344 };
345
346 #[test]
347 fn iterate_over_unmapped_memory() {
348 let empty_iterator = MemoryIterator::new(0..=127, Default::default());
349 let actual = empty_iterator.collect::<Vec<_>>();
350 let expected = vec![MemoryCell(None); 128];
351 assert_eq!(actual.len(), expected.len());
352 assert_eq!(actual, expected);
353 }
354
355 #[test]
356 fn iterate_over_partially_mapped_memory() {
357 let it = MemoryIterator::new(
358 0..=127,
359 vec![(PageAddress(5), PageContents::mapped(vec![1]))].into_iter(),
360 );
361 let actual = it.collect::<Vec<_>>();
362 let expected = std::iter::repeat_n(MemoryCell(None), 5)
363 .chain(std::iter::once(MemoryCell(Some(1))))
364 .chain(std::iter::repeat_n(MemoryCell(None), 122))
365 .collect::<Vec<_>>();
366 assert_eq!(actual.len(), expected.len());
367 assert_eq!(actual, expected);
368 }
369
370 #[test]
371 fn reads_from_the_middle_of_a_page() {
372 let partial_iter = MemoryIterator::new(
373 20..=30,
374 vec![(PageAddress(0), PageContents::mapped((0..255).collect()))].into_iter(),
375 );
376 let actual = partial_iter.collect::<Vec<_>>();
377 let expected = (20..=30)
378 .map(|val| MemoryCell(Some(val)))
379 .collect::<Vec<_>>();
380 assert_eq!(actual.len(), expected.len());
381 assert_eq!(actual, expected);
382 }
383}