memory.rs

  1//! This module defines the format in which memory of debuggee is represented.
  2//!
  3//! Each byte in memory can either be mapped or unmapped. We try to mimic that twofold:
  4//! - We assume that the memory is divided into pages of a fixed size.
  5//! - We assume that each page can be either mapped or unmapped.
  6//! These two assumptions drive the shape of the memory representation.
  7//! In particular, we want the unmapped pages to be represented without allocating any memory, as *most*
  8//! of the memory in a program space is usually unmapped.
  9//! Note that per DAP we don't know what the address space layout is, so we can't optimize off of it.
 10//! Note that while we optimize for a paged layout, we also want to be able to represent memory that is not paged.
 11//! This use case is relevant to embedded folks. Furthermore, we cater to default 4k page size.
 12//! It is picked arbitrarily as a ubiquous default - other than that, the underlying format of Zed's memory storage should not be relevant
 13//! to the users of this module.
 14
 15use std::{collections::BTreeMap, ops::RangeInclusive, sync::Arc};
 16
 17use gpui::BackgroundExecutor;
 18use smallvec::SmallVec;
 19
 20const PAGE_SIZE: u64 = 4096;
 21
 22/// Represents the contents of a single page. We special-case unmapped pages to be allocation-free,
 23/// since they're going to make up the majority of the memory in a program space (even though the user might not even get to see them - ever).
 24#[derive(Clone, Debug)]
 25pub(super) enum PageContents {
 26    /// Whole page is unreadable.
 27    Unmapped,
 28    Mapped(Arc<MappedPageContents>),
 29}
 30
 31impl PageContents {
 32    #[cfg(test)]
 33    fn mapped(contents: Vec<u8>) -> Self {
 34        PageContents::Mapped(Arc::new(MappedPageContents(
 35            vec![PageChunk::Mapped(contents.into())].into(),
 36        )))
 37    }
 38}
 39
 40#[derive(Clone, Debug)]
 41enum PageChunk {
 42    Mapped(Arc<[u8]>),
 43    Unmapped(u64),
 44}
 45
 46impl PageChunk {
 47    fn len(&self) -> u64 {
 48        match self {
 49            PageChunk::Mapped(contents) => contents.len() as u64,
 50            PageChunk::Unmapped(size) => *size,
 51        }
 52    }
 53}
 54
 55impl MappedPageContents {
 56    fn len(&self) -> u64 {
 57        self.0.iter().map(|chunk| chunk.len()).sum()
 58    }
 59}
 60/// We hope for the whole page to be mapped in a single chunk, but we do leave the possibility open
 61/// of having interleaved read permissions in a single page; debuggee's execution environment might either
 62/// have a different page size OR it might not have paged memory layout altogether
 63/// (which might be relevant to embedded systems).
 64///
 65/// As stated previously, the concept of a page in this module has to do more
 66/// with optimizing fetching of the memory and not with the underlying bits and pieces
 67/// of the memory of a debuggee.
 68
 69#[derive(Default, Debug)]
 70pub(super) struct MappedPageContents(
 71    /// Most of the time there should be only one chunk (either mapped or unmapped),
 72    /// but we do leave the possibility open of having multiple regions of memory in a single page.
 73    SmallVec<[PageChunk; 1]>,
 74);
 75
 76type MemoryAddress = u64;
 77#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)]
 78#[repr(transparent)]
 79pub(super) struct PageAddress(u64);
 80
 81impl PageAddress {
 82    pub(super) fn iter_range(
 83        range: RangeInclusive<PageAddress>,
 84    ) -> impl Iterator<Item = PageAddress> {
 85        let mut current = range.start().0;
 86        let end = range.end().0;
 87
 88        std::iter::from_fn(move || {
 89            if current > end {
 90                None
 91            } else {
 92                let addr = PageAddress(current);
 93                current += PAGE_SIZE;
 94                Some(addr)
 95            }
 96        })
 97    }
 98}
 99
100pub(super) struct Memory {
101    pages: BTreeMap<PageAddress, PageContents>,
102}
103
104/// Represents a single memory cell (or None if a given cell is unmapped/unknown).
105#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Ord, Eq)]
106#[repr(transparent)]
107pub struct MemoryCell(pub Option<u8>);
108
109impl Memory {
110    pub(super) fn new() -> Self {
111        Self {
112            pages: Default::default(),
113        }
114    }
115
116    pub(super) fn memory_range_to_page_range(
117        range: RangeInclusive<MemoryAddress>,
118    ) -> RangeInclusive<PageAddress> {
119        let start_page = (range.start() / PAGE_SIZE) * PAGE_SIZE;
120        let end_page = (range.end() / PAGE_SIZE) * PAGE_SIZE;
121        PageAddress(start_page)..=PageAddress(end_page)
122    }
123
124    pub(super) fn build_page(&self, page_address: PageAddress) -> Option<MemoryPageBuilder> {
125        if self.pages.contains_key(&page_address) {
126            // We already know the state of this page.
127            None
128        } else {
129            Some(MemoryPageBuilder::new(page_address))
130        }
131    }
132
133    pub(super) fn insert_page(&mut self, address: PageAddress, page: PageContents) {
134        self.pages.insert(address, page);
135    }
136
137    pub(super) fn memory_range(&self, range: RangeInclusive<MemoryAddress>) -> MemoryIterator {
138        let pages = Self::memory_range_to_page_range(range.clone());
139        let pages = self
140            .pages
141            .range(pages)
142            .map(|(address, page)| (*address, page.clone()))
143            .collect::<Vec<_>>();
144        MemoryIterator::new(range, pages.into_iter())
145    }
146
147    pub(crate) fn clear(&mut self, background_executor: &BackgroundExecutor) {
148        let memory = std::mem::take(&mut self.pages);
149        background_executor
150            .spawn(async move {
151                drop(memory);
152            })
153            .detach();
154    }
155}
156
157/// Builder for memory pages.
158///
159/// Memory reads in DAP are sequential (or at least we make them so).
160/// ReadMemory response includes `unreadableBytes` property indicating the number of bytes
161/// that could not be read after the last successfully read byte.
162///
163/// We use it as follows:
164/// - We start off with a "large" 1-page ReadMemory request.
165/// - If it succeeds/fails wholesale, cool; we have no unknown memory regions in this page.
166/// - If it succeeds partially, we know # of mapped bytes.
167///   We might also know the # of unmapped bytes.
168/// However, we're still unsure about what's *after* the unreadable region.
169///
170/// This is where this builder comes in. It lets us track the state of figuring out contents of a single page.
171pub(super) struct MemoryPageBuilder {
172    chunks: MappedPageContents,
173    base_address: PageAddress,
174    left_to_read: u64,
175}
176
177/// Represents a chunk of memory of which we don't know if it's mapped or unmapped; thus we need
178/// to issue a request to figure out it's state.
179pub(super) struct UnknownMemory {
180    pub(super) address: MemoryAddress,
181    pub(super) size: u64,
182}
183
184impl MemoryPageBuilder {
185    fn new(base_address: PageAddress) -> Self {
186        Self {
187            chunks: Default::default(),
188            base_address,
189            left_to_read: PAGE_SIZE,
190        }
191    }
192
193    pub(super) fn build(self) -> (PageAddress, PageContents) {
194        debug_assert_eq!(self.left_to_read, 0);
195        debug_assert_eq!(
196            self.chunks.len(),
197            PAGE_SIZE,
198            "Expected `build` to be called on a fully-fetched page"
199        );
200        let contents = if let Some(first) = self.chunks.0.first()
201            && self.chunks.len() == 1
202            && matches!(first, PageChunk::Unmapped(PAGE_SIZE))
203        {
204            PageContents::Unmapped
205        } else {
206            PageContents::Mapped(Arc::new(MappedPageContents(self.chunks.0)))
207        };
208        (self.base_address, contents)
209    }
210    /// Drives the fetching of memory, in an iterator-esque style.
211    pub(super) fn next_request(&self) -> Option<UnknownMemory> {
212        if self.left_to_read == 0 {
213            None
214        } else {
215            let offset_in_current_page = PAGE_SIZE - self.left_to_read;
216            Some(UnknownMemory {
217                address: self.base_address.0 + offset_in_current_page,
218                size: self.left_to_read,
219            })
220        }
221    }
222    pub(super) fn unknown(&mut self, bytes: u64) {
223        if bytes == 0 {
224            return;
225        }
226        self.left_to_read -= bytes;
227        self.chunks.0.push(PageChunk::Unmapped(bytes));
228    }
229    pub(super) fn known(&mut self, data: Arc<[u8]>) {
230        if data.is_empty() {
231            return;
232        }
233        self.left_to_read -= data.len() as u64;
234        self.chunks.0.push(PageChunk::Mapped(data));
235    }
236}
237
238fn page_contents_into_iter(data: Arc<MappedPageContents>) -> Box<dyn Iterator<Item = MemoryCell>> {
239    let mut data_range = 0..data.0.len();
240    let iter = std::iter::from_fn(move || {
241        let data = &data;
242        let data_ref = data.clone();
243        data_range.next().map(move |index| {
244            let contents = &data_ref.0[index];
245            match contents {
246                PageChunk::Mapped(items) => {
247                    let chunk_range = 0..items.len();
248                    let items = items.clone();
249                    Box::new(
250                        chunk_range
251                            .into_iter()
252                            .map(move |ix| MemoryCell(Some(items[ix]))),
253                    ) as Box<dyn Iterator<Item = MemoryCell>>
254                }
255                PageChunk::Unmapped(len) => {
256                    Box::new(std::iter::repeat_n(MemoryCell(None), *len as usize))
257                }
258            }
259        })
260    })
261    .flatten();
262
263    Box::new(iter)
264}
265/// Defines an iteration over a range of memory. Some of this memory might be unmapped or straight up missing.
266/// Thus, this iterator alternates between synthesizing values and yielding known memory.
267pub struct MemoryIterator {
268    start: MemoryAddress,
269    end: MemoryAddress,
270    current_known_page: Option<(PageAddress, Box<dyn Iterator<Item = MemoryCell>>)>,
271    pages: std::vec::IntoIter<(PageAddress, PageContents)>,
272}
273
274impl MemoryIterator {
275    fn new(
276        range: RangeInclusive<MemoryAddress>,
277        pages: std::vec::IntoIter<(PageAddress, PageContents)>,
278    ) -> Self {
279        Self {
280            start: *range.start(),
281            end: *range.end(),
282            current_known_page: None,
283            pages,
284        }
285    }
286    fn fetch_next_page(&mut self) -> bool {
287        if let Some((mut address, chunk)) = self.pages.next() {
288            let mut contents = match chunk {
289                PageContents::Unmapped => None,
290                PageContents::Mapped(mapped_page_contents) => {
291                    Some(page_contents_into_iter(mapped_page_contents))
292                }
293            };
294
295            if address.0 < self.start {
296                // Skip ahead till our iterator is at the start of the range
297
298                //address: 20, start: 25
299                //
300                let to_skip = self.start - address.0;
301                address.0 += to_skip;
302                if let Some(contents) = &mut contents {
303                    contents.nth(to_skip as usize - 1);
304                }
305            }
306            self.current_known_page = contents.map(|contents| (address, contents));
307            true
308        } else {
309            false
310        }
311    }
312}
313impl Iterator for MemoryIterator {
314    type Item = MemoryCell;
315
316    fn next(&mut self) -> Option<Self::Item> {
317        if self.start > self.end {
318            return None;
319        }
320        if let Some((current_page_address, current_memory_chunk)) = self.current_known_page.as_mut()
321        {
322            if current_page_address.0 <= self.start {
323                if let Some(next_cell) = current_memory_chunk.next() {
324                    self.start += 1;
325                    return Some(next_cell);
326                } else {
327                    self.current_known_page.take();
328                }
329            }
330        }
331        if !self.fetch_next_page() {
332            self.start += 1;
333            return Some(MemoryCell(None));
334        } else {
335            self.next()
336        }
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use crate::debugger::{
343        MemoryCell,
344        memory::{MemoryIterator, PageAddress, PageContents},
345    };
346
347    #[test]
348    fn iterate_over_unmapped_memory() {
349        let empty_iterator = MemoryIterator::new(0..=127, Default::default());
350        let actual = empty_iterator.collect::<Vec<_>>();
351        let expected = vec![MemoryCell(None); 128];
352        assert_eq!(actual.len(), expected.len());
353        assert_eq!(actual, expected);
354    }
355
356    #[test]
357    fn iterate_over_partially_mapped_memory() {
358        let it = MemoryIterator::new(
359            0..=127,
360            vec![(PageAddress(5), PageContents::mapped(vec![1]))].into_iter(),
361        );
362        let actual = it.collect::<Vec<_>>();
363        let expected = std::iter::repeat_n(MemoryCell(None), 5)
364            .chain(std::iter::once(MemoryCell(Some(1))))
365            .chain(std::iter::repeat_n(MemoryCell(None), 122))
366            .collect::<Vec<_>>();
367        assert_eq!(actual.len(), expected.len());
368        assert_eq!(actual, expected);
369    }
370
371    #[test]
372    fn reads_from_the_middle_of_a_page() {
373        let partial_iter = MemoryIterator::new(
374            20..=30,
375            vec![(PageAddress(0), PageContents::mapped((0..255).collect()))].into_iter(),
376        );
377        let actual = partial_iter.collect::<Vec<_>>();
378        let expected = (20..=30)
379            .map(|val| MemoryCell(Some(val)))
380            .collect::<Vec<_>>();
381        assert_eq!(actual.len(), expected.len());
382        assert_eq!(actual, expected);
383    }
384}