memory.rs

  1//! This module defines the format in which memory of debuggee is represented.
  2//!
  3//! Each byte in memory can either be mapped or unmapped. We try to mimic that twofold:
  4//! - We assume that the memory is divided into pages of a fixed size.
  5//! - We assume that each page can be either mapped or unmapped.
  6//!
  7//! These two assumptions drive the shape of the memory representation.
  8//! In particular, we want the unmapped pages to be represented without allocating any memory, as *most*
  9//! of the memory in a program space is usually unmapped.
 10//! Note that per DAP we don't know what the address space layout is, so we can't optimize off of it.
 11//! Note that while we optimize for a paged layout, we also want to be able to represent memory that is not paged.
 12//! This use case is relevant to embedded folks. Furthermore, we cater to default 4k page size.
 13//! It is picked arbitrarily as a ubiquous default - other than that, the underlying format of Zed's memory storage should not be relevant
 14//! to the users of this module.
 15
 16use std::{collections::BTreeMap, ops::RangeInclusive, sync::Arc};
 17
 18use gpui::BackgroundExecutor;
 19use smallvec::SmallVec;
 20
 21const PAGE_SIZE: u64 = 4096;
 22
 23/// Represents the contents of a single page. We special-case unmapped pages to be allocation-free,
 24/// since they're going to make up the majority of the memory in a program space (even though the user might not even get to see them - ever).
 25#[derive(Clone, Debug)]
 26pub(super) enum PageContents {
 27    /// Whole page is unreadable.
 28    Unmapped,
 29    Mapped(Arc<MappedPageContents>),
 30}
 31
 32impl PageContents {
 33    #[cfg(test)]
 34    fn mapped(contents: Vec<u8>) -> Self {
 35        PageContents::Mapped(Arc::new(MappedPageContents(
 36            vec![PageChunk::Mapped(contents.into())].into(),
 37        )))
 38    }
 39}
 40
 41#[derive(Clone, Debug)]
 42enum PageChunk {
 43    Mapped(Arc<[u8]>),
 44    Unmapped(u64),
 45}
 46
 47impl PageChunk {
 48    fn len(&self) -> u64 {
 49        match self {
 50            PageChunk::Mapped(contents) => contents.len() as u64,
 51            PageChunk::Unmapped(size) => *size,
 52        }
 53    }
 54}
 55
 56impl MappedPageContents {
 57    fn len(&self) -> u64 {
 58        self.0.iter().map(|chunk| chunk.len()).sum()
 59    }
 60}
 61/// We hope for the whole page to be mapped in a single chunk, but we do leave the possibility open
 62/// of having interleaved read permissions in a single page; debuggee's execution environment might either
 63/// have a different page size OR it might not have paged memory layout altogether
 64/// (which might be relevant to embedded systems).
 65///
 66/// As stated previously, the concept of a page in this module has to do more
 67/// with optimizing fetching of the memory and not with the underlying bits and pieces
 68/// of the memory of a debuggee.
 69
 70#[derive(Default, Debug)]
 71pub(super) struct MappedPageContents(
 72    /// Most of the time there should be only one chunk (either mapped or unmapped),
 73    /// but we do leave the possibility open of having multiple regions of memory in a single page.
 74    SmallVec<[PageChunk; 1]>,
 75);
 76
 77type MemoryAddress = u64;
 78#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Ord, Eq)]
 79#[repr(transparent)]
 80pub(super) struct PageAddress(u64);
 81
 82impl PageAddress {
 83    pub(super) fn iter_range(
 84        range: RangeInclusive<PageAddress>,
 85    ) -> impl Iterator<Item = PageAddress> {
 86        let mut current = range.start().0;
 87        let end = range.end().0;
 88
 89        std::iter::from_fn(move || {
 90            if current > end {
 91                None
 92            } else {
 93                let addr = PageAddress(current);
 94                current += PAGE_SIZE;
 95                Some(addr)
 96            }
 97        })
 98    }
 99}
100
101pub(super) struct Memory {
102    pages: BTreeMap<PageAddress, PageContents>,
103}
104
105/// Represents a single memory cell (or None if a given cell is unmapped/unknown).
106#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Ord, Eq)]
107#[repr(transparent)]
108pub struct MemoryCell(pub Option<u8>);
109
110impl Memory {
111    pub(super) fn new() -> Self {
112        Self {
113            pages: Default::default(),
114        }
115    }
116
117    pub(super) fn memory_range_to_page_range(
118        range: RangeInclusive<MemoryAddress>,
119    ) -> RangeInclusive<PageAddress> {
120        let start_page = (range.start() / PAGE_SIZE) * PAGE_SIZE;
121        let end_page = (range.end() / PAGE_SIZE) * PAGE_SIZE;
122        PageAddress(start_page)..=PageAddress(end_page)
123    }
124
125    pub(super) fn build_page(&self, page_address: PageAddress) -> Option<MemoryPageBuilder> {
126        if self.pages.contains_key(&page_address) {
127            // We already know the state of this page.
128            None
129        } else {
130            Some(MemoryPageBuilder::new(page_address))
131        }
132    }
133
134    pub(super) fn insert_page(&mut self, address: PageAddress, page: PageContents) {
135        self.pages.insert(address, page);
136    }
137
138    pub(super) fn memory_range(&self, range: RangeInclusive<MemoryAddress>) -> MemoryIterator {
139        let pages = Self::memory_range_to_page_range(range.clone());
140        let pages = self
141            .pages
142            .range(pages)
143            .map(|(address, page)| (*address, page.clone()))
144            .collect::<Vec<_>>();
145        MemoryIterator::new(range, pages.into_iter())
146    }
147
148    pub(crate) fn clear(&mut self, background_executor: &BackgroundExecutor) {
149        let memory = std::mem::take(&mut self.pages);
150        background_executor
151            .spawn(async move {
152                drop(memory);
153            })
154            .detach();
155    }
156}
157
158/// Builder for memory pages.
159///
160/// Memory reads in DAP are sequential (or at least we make them so).
161/// ReadMemory response includes `unreadableBytes` property indicating the number of bytes
162/// that could not be read after the last successfully read byte.
163///
164/// We use it as follows:
165/// - We start off with a "large" 1-page ReadMemory request.
166/// - If it succeeds/fails wholesale, cool; we have no unknown memory regions in this page.
167/// - If it succeeds partially, we know # of mapped bytes.
168///   We might also know the # of unmapped bytes.
169///
170/// However, we're still unsure about what's *after* the unreadable region.
171/// This is where this builder comes in. It lets us track the state of figuring out contents of a single page.
172pub(super) struct MemoryPageBuilder {
173    chunks: MappedPageContents,
174    base_address: PageAddress,
175    left_to_read: u64,
176}
177
178/// Represents a chunk of memory of which we don't know if it's mapped or unmapped; thus we need
179/// to issue a request to figure out it's state.
180pub(super) struct UnknownMemory {
181    pub(super) address: MemoryAddress,
182    pub(super) size: u64,
183}
184
185impl MemoryPageBuilder {
186    fn new(base_address: PageAddress) -> Self {
187        Self {
188            chunks: Default::default(),
189            base_address,
190            left_to_read: PAGE_SIZE,
191        }
192    }
193
194    pub(super) fn build(self) -> (PageAddress, PageContents) {
195        debug_assert_eq!(self.left_to_read, 0);
196        debug_assert_eq!(
197            self.chunks.len(),
198            PAGE_SIZE,
199            "Expected `build` to be called on a fully-fetched page"
200        );
201        let contents = if let Some(first) = self.chunks.0.first()
202            && self.chunks.len() == 1
203            && matches!(first, PageChunk::Unmapped(PAGE_SIZE))
204        {
205            PageContents::Unmapped
206        } else {
207            PageContents::Mapped(Arc::new(MappedPageContents(self.chunks.0)))
208        };
209        (self.base_address, contents)
210    }
211    /// Drives the fetching of memory, in an iterator-esque style.
212    pub(super) fn next_request(&self) -> Option<UnknownMemory> {
213        if self.left_to_read == 0 {
214            None
215        } else {
216            let offset_in_current_page = PAGE_SIZE - self.left_to_read;
217            Some(UnknownMemory {
218                address: self.base_address.0 + offset_in_current_page,
219                size: self.left_to_read,
220            })
221        }
222    }
223    pub(super) fn unknown(&mut self, bytes: u64) {
224        if bytes == 0 {
225            return;
226        }
227        self.left_to_read -= bytes;
228        self.chunks.0.push(PageChunk::Unmapped(bytes));
229    }
230    pub(super) fn known(&mut self, data: Arc<[u8]>) {
231        if data.is_empty() {
232            return;
233        }
234        self.left_to_read -= data.len() as u64;
235        self.chunks.0.push(PageChunk::Mapped(data));
236    }
237}
238
239fn page_contents_into_iter(data: Arc<MappedPageContents>) -> Box<dyn Iterator<Item = MemoryCell>> {
240    let mut data_range = 0..data.0.len();
241    let iter = std::iter::from_fn(move || {
242        let data = &data;
243        let data_ref = data.clone();
244        data_range.next().map(move |index| {
245            let contents = &data_ref.0[index];
246            match contents {
247                PageChunk::Mapped(items) => {
248                    let chunk_range = 0..items.len();
249                    let items = items.clone();
250                    Box::new(
251                        chunk_range
252                            .into_iter()
253                            .map(move |ix| MemoryCell(Some(items[ix]))),
254                    ) as Box<dyn Iterator<Item = MemoryCell>>
255                }
256                PageChunk::Unmapped(len) => {
257                    Box::new(std::iter::repeat_n(MemoryCell(None), *len as usize))
258                }
259            }
260        })
261    })
262    .flatten();
263
264    Box::new(iter)
265}
266/// Defines an iteration over a range of memory. Some of this memory might be unmapped or straight up missing.
267/// Thus, this iterator alternates between synthesizing values and yielding known memory.
268pub struct MemoryIterator {
269    start: MemoryAddress,
270    end: MemoryAddress,
271    current_known_page: Option<(PageAddress, Box<dyn Iterator<Item = MemoryCell>>)>,
272    pages: std::vec::IntoIter<(PageAddress, PageContents)>,
273}
274
275impl MemoryIterator {
276    fn new(
277        range: RangeInclusive<MemoryAddress>,
278        pages: std::vec::IntoIter<(PageAddress, PageContents)>,
279    ) -> Self {
280        Self {
281            start: *range.start(),
282            end: *range.end(),
283            current_known_page: None,
284            pages,
285        }
286    }
287    fn fetch_next_page(&mut self) -> bool {
288        if let Some((mut address, chunk)) = self.pages.next() {
289            let mut contents = match chunk {
290                PageContents::Unmapped => None,
291                PageContents::Mapped(mapped_page_contents) => {
292                    Some(page_contents_into_iter(mapped_page_contents))
293                }
294            };
295
296            if address.0 < self.start {
297                // Skip ahead till our iterator is at the start of the range
298
299                //address: 20, start: 25
300                //
301                let to_skip = self.start - address.0;
302                address.0 += to_skip;
303                if let Some(contents) = &mut contents {
304                    contents.nth(to_skip as usize - 1);
305                }
306            }
307            self.current_known_page = contents.map(|contents| (address, contents));
308            true
309        } else {
310            false
311        }
312    }
313}
314impl Iterator for MemoryIterator {
315    type Item = MemoryCell;
316
317    fn next(&mut self) -> Option<Self::Item> {
318        if self.start > self.end {
319            return None;
320        }
321        if let Some((current_page_address, current_memory_chunk)) = self.current_known_page.as_mut()
322            && current_page_address.0 <= self.start
323        {
324            if let Some(next_cell) = current_memory_chunk.next() {
325                self.start += 1;
326                return Some(next_cell);
327            } else {
328                self.current_known_page.take();
329            }
330        }
331        if !self.fetch_next_page() {
332            self.start += 1;
333            Some(MemoryCell(None))
334        } else {
335            self.next()
336        }
337    }
338}
339
340#[cfg(test)]
341mod tests {
342    use crate::debugger::{
343        MemoryCell,
344        memory::{MemoryIterator, PageAddress, PageContents},
345    };
346
347    #[test]
348    fn iterate_over_unmapped_memory() {
349        let empty_iterator = MemoryIterator::new(0..=127, Default::default());
350        let actual = empty_iterator.collect::<Vec<_>>();
351        let expected = vec![MemoryCell(None); 128];
352        assert_eq!(actual.len(), expected.len());
353        assert_eq!(actual, expected);
354    }
355
356    #[test]
357    fn iterate_over_partially_mapped_memory() {
358        let it = MemoryIterator::new(
359            0..=127,
360            vec![(PageAddress(5), PageContents::mapped(vec![1]))].into_iter(),
361        );
362        let actual = it.collect::<Vec<_>>();
363        let expected = std::iter::repeat_n(MemoryCell(None), 5)
364            .chain(std::iter::once(MemoryCell(Some(1))))
365            .chain(std::iter::repeat_n(MemoryCell(None), 122))
366            .collect::<Vec<_>>();
367        assert_eq!(actual.len(), expected.len());
368        assert_eq!(actual, expected);
369    }
370
371    #[test]
372    fn reads_from_the_middle_of_a_page() {
373        let partial_iter = MemoryIterator::new(
374            20..=30,
375            vec![(PageAddress(0), PageContents::mapped((0..255).collect()))].into_iter(),
376        );
377        let actual = partial_iter.collect::<Vec<_>>();
378        let expected = (20..=30)
379            .map(|val| MemoryCell(Some(val)))
380            .collect::<Vec<_>>();
381        assert_eq!(actual.len(), expected.len());
382        assert_eq!(actual, expected);
383    }
384}