Start on a new, more abstract `CompressionSession` primitive

Antonio Scandurra and Nathan Sobo created

Co-Authored-By: Nathan Sobo <nathan@zed.dev>

Change summary

Cargo.lock                                |   7 
crates/capture/Cargo.toml                 |   2 
crates/capture/src/compression_session.rs | 178 +++++++++++++++++++++++++
crates/capture/src/main.rs                |  55 ++++++-
crates/media/Cargo.toml                   |   1 
crates/media/src/media.rs                 |  54 ------
6 files changed, 236 insertions(+), 61 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -663,9 +663,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
 
 [[package]]
 name = "bytes"
-version = "1.1.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
+checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
 
 [[package]]
 name = "bzip2-sys"
@@ -754,8 +754,10 @@ dependencies = [
 name = "capture"
 version = "0.1.0"
 dependencies = [
+ "anyhow",
  "bindgen",
  "block",
+ "bytes",
  "cocoa",
  "core-foundation",
  "core-graphics",
@@ -3036,6 +3038,7 @@ dependencies = [
  "anyhow",
  "bindgen",
  "block",
+ "bytes",
  "core-foundation",
  "foreign-types",
  "metal",

crates/capture/Cargo.toml 🔗

@@ -12,7 +12,9 @@ identifier = "dev.zed.Capture"
 gpui = { path = "../gpui" }
 media = { path = "../media" }
 
+anyhow = "1.0.38"
 block = "0.1"
+bytes = "1.2"
 cocoa = "0.24"
 core-foundation = "0.9.3"
 core-graphics = "0.22.3"

crates/capture/src/compression_session.rs 🔗

@@ -0,0 +1,178 @@
+use anyhow::Result;
+use core_foundation::base::{OSStatus, TCFType};
+use media::{
+    core_media::{CMSampleBufferRef, CMSampleTimingInfo, CMVideoCodecType},
+    core_video::CVImageBuffer,
+    video_toolbox::{VTCompressionSession, VTEncodeInfoFlags},
+};
+use std::ffi::c_void;
+
+pub struct CompressionSession<F> {
+    session: VTCompressionSession,
+    output_callback: Box<F>,
+}
+
+impl<F: 'static + Send + FnMut(OSStatus, VTEncodeInfoFlags, CMSampleBufferRef)>
+    CompressionSession<F>
+{
+    pub fn new(width: usize, height: usize, codec: CMVideoCodecType, callback: F) -> Result<Self> {
+        let callback = Box::new(callback);
+        let session = VTCompressionSession::new(
+            width,
+            height,
+            codec,
+            Some(Self::output_callback),
+            callback.as_ref() as *const _ as *const c_void,
+        )?;
+        Ok(Self {
+            session,
+            output_callback: callback,
+        })
+    }
+
+    pub fn encode_frame(&self, buffer: &CVImageBuffer, timing: CMSampleTimingInfo) -> Result<()> {
+        self.session.encode_frame(
+            buffer.as_concrete_TypeRef(),
+            timing.presentationTimeStamp,
+            timing.duration,
+        )
+    }
+
+    extern "C" fn output_callback(
+        output_callback_ref_con: *mut c_void,
+        _: *mut c_void,
+        status: OSStatus,
+        flags: VTEncodeInfoFlags,
+        sample_buffer: CMSampleBufferRef,
+    ) {
+        let callback = unsafe { &mut *(output_callback_ref_con as *mut F) };
+        callback(status, flags, sample_buffer);
+    }
+}
+
+// unsafe extern "C" fn output(
+//     output_callback_ref_con: *mut c_void,
+//     source_frame_ref_con: *mut c_void,
+//     status: OSStatus,
+//     info_flags: VTEncodeInfoFlags,
+//     sample_buffer: CMSampleBufferRef,
+// ) {
+//     if status != 0 {
+//         println!("error encoding frame, code: {}", status);
+//         return;
+//     }
+//     let sample_buffer = CMSampleBuffer::wrap_under_get_rule(sample_buffer);
+
+//     let mut is_iframe = false;
+//     let attachments = sample_buffer.attachments();
+//     if let Some(attachments) = attachments.first() {
+//         is_iframe = attachments
+//             .find(bindings::kCMSampleAttachmentKey_NotSync as CFStringRef)
+//             .map_or(true, |not_sync| {
+//                 CFBooleanGetValue(*not_sync as CFBooleanRef)
+//             });
+//     }
+
+//     const START_CODE: [u8; 4] = [0x00, 0x00, 0x00, 0x01];
+//     if is_iframe {
+//         let format_description = sample_buffer.format_description();
+//         for ix in 0..format_description.h264_parameter_set_count() {
+//             let parameter_set = format_description.h264_parameter_set_at_index(ix);
+//             stream.extend(START_CODE);
+//             stream.extend(parameter_set);
+//         }
+//     }
+
+//     println!("YO!");
+// }
+
+// static void videoFrameFinishedEncoding(void *outputCallbackRefCon,
+//                                        void *sourceFrameRefCon,
+//                                        OSStatus status,
+//                                        VTEncodeInfoFlags infoFlags,
+//                                        CMSampleBufferRef sampleBuffer) {
+//     // Check if there were any errors encoding
+//     if (status != noErr) {
+//         NSLog(@"Error encoding video, err=%lld", (int64_t)status);
+//         return;
+//     }
+
+//     // In this example we will use a NSMutableData object to store the
+//     // elementary stream.
+//     NSMutableData *elementaryStream = [NSMutableData data];
+
+//     // Find out if the sample buffer contains an I-Frame.
+//     // If so we will write the SPS and PPS NAL units to the elementary stream.
+//     BOOL isIFrame = NO;
+//     CFArrayRef attachmentsArray = CMSampleBufferGetSampleAttachmentsArray(sampleBuffer, 0);
+//     if (CFArrayGetCount(attachmentsArray)) {
+//         CFBooleanRef notSync;
+//         CFDictionaryRef dict = CFArrayGetValueAtIndex(attachmentsArray, 0);
+//         BOOL keyExists = CFDictionaryGetValueIfPresent(dict,
+//                                                        kCMSampleAttachmentKey_NotSync,
+//                                                        (const void **)&notSync);
+//         // An I-Frame is a sync frame
+//         isIFrame = !keyExists || !CFBooleanGetValue(notSync);
+//     }
+
+//     // This is the start code that we will write to
+//     // the elementary stream before every NAL unit
+//     static const size_t startCodeLength = 4;
+//     static const uint8_t startCode[] = {0x00, 0x00, 0x00, 0x01};
+
+//     // Write the SPS and PPS NAL units to the elementary stream before every I-Frame
+//     if (isIFrame) {
+//         CMFormatDescriptionRef description = CMSampleBufferGetFormatDescription(sampleBuffer);
+
+//         // Find out how many parameter sets there are
+//         size_t numberOfParameterSets;
+//         CMVideoFormatDescriptionGetH264ParameterSetAtIndex(description,
+//                                                            0, NULL, NULL,
+//                                                            &numberOfParameterSets,
+//                                                            NULL);
+
+//         // Write each parameter set to the elementary stream
+//         for (int i = 0; i < numberOfParameterSets; i++) {
+//             const uint8_t *parameterSetPointer;
+//             size_t parameterSetLength;
+//             CMVideoFormatDescriptionGetH264ParameterSetAtIndex(description,
+//                                                                i,
+//                                                                &parameterSetPointer,
+//                                                                &parameterSetLength,
+//                                                                NULL, NULL);
+
+//             // Write the parameter set to the elementary stream
+//             [elementaryStream appendBytes:startCode length:startCodeLength];
+//             [elementaryStream appendBytes:parameterSetPointer length:parameterSetLength];
+//         }
+//     }
+
+//     // Get a pointer to the raw AVCC NAL unit data in the sample buffer
+//     size_t blockBufferLength;
+//     uint8_t *bufferDataPointer = NULL;
+//     CMBlockBufferGetDataPointer(CMSampleBufferGetDataBuffer(sampleBuffer),
+//                                 0,
+//                                 NULL,
+//                                 &blockBufferLength,
+//                                 (char **)&bufferDataPointer);
+
+//     // Loop through all the NAL units in the block buffer
+//     // and write them to the elementary stream with
+//     // start codes instead of AVCC length headers
+//     size_t bufferOffset = 0;
+//     static const int AVCCHeaderLength = 4;
+//     while (bufferOffset < blockBufferLength - AVCCHeaderLength) {
+//         // Read the NAL unit length
+//         uint32_t NALUnitLength = 0;
+//         memcpy(&NALUnitLength, bufferDataPointer + bufferOffset, AVCCHeaderLength);
+//         // Convert the length value from Big-endian to Little-endian
+//         NALUnitLength = CFSwapInt32BigToHost(NALUnitLength);
+//         // Write start code to the elementary stream
+//         [elementaryStream appendBytes:startCode length:startCodeLength];
+//         // Write the NAL unit without the AVCC length header to the elementary stream
+//         [elementaryStream appendBytes:bufferDataPointer + bufferOffset + AVCCHeaderLength
+//                                length:NALUnitLength];
+//         // Move to the next NAL unit in the block buffer
+//         bufferOffset += AVCCHeaderLength + NALUnitLength;
+//     }
+// }

crates/capture/src/main.rs 🔗

@@ -1,12 +1,18 @@
 mod bindings;
+mod compression_session;
 
-use crate::bindings::SCStreamOutputType;
+use crate::{bindings::SCStreamOutputType, compression_session::CompressionSession};
 use block::ConcreteBlock;
+use bytes::BytesMut;
 use cocoa::{
     base::{id, nil, YES},
     foundation::{NSArray, NSString, NSUInteger},
 };
-use core_foundation::{base::TCFType, number::CFNumberRef, string::CFStringRef};
+use core_foundation::{
+    base::TCFType,
+    number::{CFBooleanGetValue, CFBooleanRef, CFNumberRef},
+    string::CFStringRef,
+};
 use futures::StreamExt;
 use gpui::{
     actions,
@@ -17,7 +23,10 @@ use gpui::{
 };
 use log::LevelFilter;
 use media::{
-    core_media::{kCMVideoCodecType_H264, CMSampleBuffer, CMSampleBufferRef, CMTimeMake},
+    core_media::{
+        kCMSampleAttachmentKey_NotSync, kCMVideoCodecType_H264, CMSampleBuffer, CMSampleBufferRef,
+        CMTimeMake,
+    },
     core_video::{self, CVImageBuffer},
     video_toolbox::VTCompressionSession,
 };
@@ -86,12 +95,40 @@ impl ScreenCaptureView {
                 let display: id = displays.objectAtIndex(0);
                 let display_width: usize = msg_send![display, width];
                 let display_height: usize = msg_send![display, height];
-                let compression_session = VTCompressionSession::new(
+                let mut compression_buffer = BytesMut::new();
+                let compression_session = CompressionSession::new(
                     display_width,
                     display_height,
                     kCMVideoCodecType_H264,
-                    None,
-                    ptr::null(),
+                    move |status, flags, sample_buffer| {
+                        if status != 0 {
+                            println!("error encoding frame, code: {}", status);
+                            return;
+                        }
+                        let sample_buffer = CMSampleBuffer::wrap_under_get_rule(sample_buffer);
+
+                        let mut is_iframe = false;
+                        let attachments = sample_buffer.attachments();
+                        if let Some(attachments) = attachments.first() {
+                            is_iframe = attachments
+                                .find(kCMSampleAttachmentKey_NotSync as CFStringRef)
+                                .map_or(true, |not_sync| {
+                                    CFBooleanGetValue(*not_sync as CFBooleanRef)
+                                });
+                        }
+
+                        const START_CODE: [u8; 4] = [0x00, 0x00, 0x00, 0x01];
+                        if is_iframe {
+                            let format_description = sample_buffer.format_description();
+                            for ix in 0..format_description.h264_parameter_set_count() {
+                                let parameter_set =
+                                    format_description.h264_parameter_set_at_index(ix).unwrap();
+                                compression_buffer.extend_from_slice(&START_CODE);
+                                compression_buffer.extend_from_slice(parameter_set);
+                                let nal_unit = compression_buffer.split();
+                            }
+                        }
+                    },
                 )
                 .unwrap();
 
@@ -126,11 +163,7 @@ impl ScreenCaptureView {
                     let timing_info = buffer.sample_timing_info(0).unwrap();
                     let image_buffer = buffer.image_buffer();
                     compression_session
-                        .encode_frame(
-                            image_buffer.as_concrete_TypeRef(),
-                            timing_info.presentationTimeStamp,
-                            timing_info.duration,
-                        )
+                        .encode_frame(&image_buffer, timing_info)
                         .unwrap();
                     *surface_tx.lock().borrow_mut() = Some(image_buffer);
                 }) as Box<dyn FnMut(CMSampleBufferRef)>;

crates/media/Cargo.toml 🔗

@@ -10,6 +10,7 @@ doctest = false
 [dependencies]
 anyhow = "1.0"
 block = "0.1"
+bytes = "1.2"
 core-foundation = "0.9.3"
 foreign-types = "0.3"
 metal = "0.21.0"

crates/media/src/media.rs 🔗

@@ -195,8 +195,8 @@ pub mod core_media {
     #![allow(non_snake_case)]
 
     pub use crate::bindings::{
-        kCMTimeInvalid, kCMVideoCodecType_H264, CMItemIndex, CMSampleTimingInfo, CMTime,
-        CMTimeMake, CMVideoCodecType,
+        kCMSampleAttachmentKey_NotSync, kCMTimeInvalid, kCMVideoCodecType_H264, CMItemIndex,
+        CMSampleTimingInfo, CMTime, CMTimeMake, CMVideoCodecType,
     };
     use crate::core_video::{CVImageBuffer, CVImageBufferRef};
     use anyhow::{anyhow, Result};
@@ -357,18 +357,12 @@ pub mod video_toolbox {
 
     use super::*;
     use crate::{
-        core_media::{CMSampleBuffer, CMSampleBufferRef, CMTime, CMVideoCodecType},
+        core_media::{CMSampleBufferRef, CMTime, CMVideoCodecType},
         core_video::CVImageBufferRef,
     };
     use anyhow::{anyhow, Result};
-    use bindings::VTEncodeInfoFlags;
-    use core_foundation::{
-        base::OSStatus,
-        dictionary::CFDictionaryRef,
-        mach_port::CFAllocatorRef,
-        number::{CFBooleanGetValue, CFBooleanRef},
-        string::CFStringRef,
-    };
+    pub use bindings::VTEncodeInfoFlags;
+    use core_foundation::{base::OSStatus, dictionary::CFDictionaryRef, mach_port::CFAllocatorRef};
     use std::ptr;
 
     #[repr(C)]
@@ -402,7 +396,7 @@ pub mod video_toolbox {
                     ptr::null(),
                     ptr::null(),
                     ptr::null(),
-                    Some(Self::output),
+                    callback,
                     callback_data,
                     &mut this,
                 );
@@ -418,42 +412,6 @@ pub mod video_toolbox {
             }
         }
 
-        unsafe extern "C" fn output(
-            output_callback_ref_con: *mut c_void,
-            source_frame_ref_con: *mut c_void,
-            status: OSStatus,
-            info_flags: VTEncodeInfoFlags,
-            sample_buffer: CMSampleBufferRef,
-        ) {
-            if status != 0 {
-                println!("error encoding frame, code: {}", status);
-                return;
-            }
-            let sample_buffer = CMSampleBuffer::wrap_under_get_rule(sample_buffer);
-
-            let mut is_iframe = false;
-            let attachments = sample_buffer.attachments();
-            if let Some(attachments) = attachments.first() {
-                is_iframe = attachments
-                    .find(bindings::kCMSampleAttachmentKey_NotSync as CFStringRef)
-                    .map_or(true, |not_sync| {
-                        CFBooleanGetValue(*not_sync as CFBooleanRef)
-                    });
-            }
-
-            const START_CODE: [u8; 4] = [0x00, 0x00, 0x00, 0x01];
-            if is_iframe {
-                let format_description = sample_buffer.format_description();
-                for ix in 0..format_description.h264_parameter_set_count() {
-                    let parameter_set = format_description.h264_parameter_set_at_index(ix);
-                    stream.extend(START_CODE);
-                    stream.extend(parameter_set);
-                }
-            }
-
-            println!("YO!");
-        }
-
         pub fn encode_frame(
             &self,
             buffer: CVImageBufferRef,