1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
use std::{ffi::c_void, time::Duration};

use windows::{core::Interface, Win32::{Media::Audio::{eConsole, eRender, IAudioCaptureClient, IAudioClient, IMMDeviceEnumerator, MMDeviceEnumerator, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, WAVEFORMATEX, WAVE_FORMAT_PCM}, System::Com::{CoCreateInstance, CoInitializeEx, CoUninitialize, CLSCTX_ALL, COINIT_MULTITHREADED}}};

use crate::prelude::{AudioCaptureConfig, AudioChannelCount, AudioSampleRate};

pub struct WindowsAudioCaptureStream {
    should_couninit: bool,
    audio_client: IAudioClient,
}

pub enum WindowsAudioCaptureStreamCreateError {
    Other(String),
    EndpointEnumerationFailed,
    AudioClientActivationFailed,
    AudioClientInitializeFailed,
    AudioCaptureCreationFailed,
    StreamStartFailed,
}

pub enum WindowsAudioCaptureStreamError {
    Other(String),
    GetBufferFailed,
}

#[allow(unused)]
pub struct WindowsAudioCaptureStreamPacket<'a> {
    pub(crate) data: &'a [i16],
    pub(crate) channel_count: u32,
    pub(crate) origin_time: Duration,
    pub(crate) duration: Duration,
    pub(crate) sample_index: u64,
}

struct SendCaptureClient(*mut c_void);

unsafe impl Send for SendCaptureClient {}
unsafe impl Sync for SendCaptureClient {}

impl SendCaptureClient {
    fn from_iaudiocaptureclient(client: IAudioCaptureClient) -> Self {
        SendCaptureClient(client.into_raw())
    }

    fn into_iaudiocaptureclient(self) -> IAudioCaptureClient {
        unsafe { IAudioCaptureClient::from_raw(self.0) }
    }
}

impl WindowsAudioCaptureStream {
    pub fn new(config: AudioCaptureConfig, mut callback: Box<dyn for <'a> FnMut(Result<WindowsAudioCaptureStreamPacket<'a>, WindowsAudioCaptureStreamError>) + Send + 'static>) -> Result<Self, WindowsAudioCaptureStreamCreateError> {
        unsafe {
            let should_couninit = CoInitializeEx(None, COINIT_MULTITHREADED).is_ok();

            let mm_device_enumerator: IMMDeviceEnumerator = CoCreateInstance(&MMDeviceEnumerator, None, CLSCTX_ALL)
                .map_err(|e| WindowsAudioCaptureStreamCreateError::Other(format!("Failed to create MMDeviceEnumerator: {}", e.to_string())))?;
            let device = mm_device_enumerator.GetDefaultAudioEndpoint(eRender, eConsole)
                .map_err(|_| WindowsAudioCaptureStreamCreateError::EndpointEnumerationFailed)?;
            
            let audio_client: IAudioClient = device.Activate(CLSCTX_ALL, None)
                .map_err(|_| WindowsAudioCaptureStreamCreateError::AudioClientActivationFailed)?;

            let mut format = WAVEFORMATEX::default();
            format.wFormatTag = WAVE_FORMAT_PCM as u16;
            format.nSamplesPerSec = match config.sample_rate {
                AudioSampleRate::Hz8000  =>  8000,
                AudioSampleRate::Hz16000 => 16000,
                AudioSampleRate::Hz24000 => 24000,
                AudioSampleRate::Hz48000 => 48000,
            };
            format.wBitsPerSample = 16;
            format.nChannels = match config.channel_count {
                AudioChannelCount::Mono   => 1,
                AudioChannelCount::Stereo => 2,
            };
            format.nBlockAlign = format.nChannels * 2;
            format.nAvgBytesPerSec = format.nSamplesPerSec * format.nBlockAlign as u32;
            format.cbSize = 0;

            let callback_format = format.clone();

            let buffer_size = 512;
            let buffer_time = buffer_size as i64 * 10000000i64 / format.nSamplesPerSec as i64;

            let buffer_duration = Duration::from_nanos(buffer_time as u64 * 100);
            let half_buffer_duration = buffer_duration / 2;

            audio_client.Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, buffer_time, buffer_time, &format as *const _, None)
                .map_err(|_| WindowsAudioCaptureStreamCreateError::AudioClientInitializeFailed)?;

            let capture_client : IAudioCaptureClient = audio_client.GetService()
                .map_err(|_| WindowsAudioCaptureStreamCreateError::AudioCaptureCreationFailed)?;

            let capture_client_send = SendCaptureClient::from_iaudiocaptureclient(capture_client);

            std::thread::spawn(move || {
                {
                    let should_couninit = CoInitializeEx(None, COINIT_MULTITHREADED).is_ok();

                    let mut last_device_position = 0u64;
                    let mut sample_count = 0u64;

                    let capture_client = capture_client_send.into_iaudiocaptureclient();
                    loop {
                        std::thread::sleep(half_buffer_duration);

                        let _buffered_count = match capture_client.GetNextPacketSize() {
                            Ok(count) => count,
                            Err(_) => {
                                (callback)(Err(WindowsAudioCaptureStreamError::Other(format!("Stream failed - couldn't fetch packet size"))));
                                break;
                            }
                        };

                        let mut data_ptr: *mut u8 = std::ptr::null_mut();

                        let mut num_frames = 0u32;
                        let mut flags = 0u32;
                        let mut device_position = 0u64;

                        match capture_client.GetBuffer(&mut data_ptr as *mut _, &mut num_frames as *mut _, &mut flags as *mut _, Some(&mut device_position as *mut _), None) {
                            Ok(_) => {
                                let packet = WindowsAudioCaptureStreamPacket {
                                    data: std::slice::from_raw_parts(data_ptr as *const i16, num_frames as usize * 2),
                                    channel_count: callback_format.nChannels as u32,
                                    origin_time: Duration::from_nanos(device_position as u64 * 100),
                                    duration: Duration::from_nanos((device_position - last_device_position) as u64),
                                    sample_index: sample_count
                                };
                                (callback)(Ok(packet));
                                let _ = capture_client.ReleaseBuffer(num_frames);
                                last_device_position = device_position;
                                sample_count += num_frames as u64;
                            },
                            Err(_) => {
                                (callback)(Err(WindowsAudioCaptureStreamError::GetBufferFailed));
                                break;
                            }
                        }

                    }

                    if should_couninit {
                        CoUninitialize();
                    }
                }
            });

            audio_client.Start()
                .map_err(|_| WindowsAudioCaptureStreamCreateError::StreamStartFailed)?;

            Ok(WindowsAudioCaptureStream {
                should_couninit,
                audio_client
            })
        }
    }

    pub fn stop(&mut self) {
        unsafe {
            let _ = self.audio_client.Stop();
        }
    }
}

impl Drop for WindowsAudioCaptureStream {
    fn drop(&mut self) {
        unsafe {
            let _ = self.audio_client.Stop();
            if self.should_couninit {
                CoUninitialize();
            }
        }
    }
}