summaryrefslogtreecommitdiff
path: root/src/output/ffmpeg_output.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/output/ffmpeg_output.rs')
-rw-r--r--src/output/ffmpeg_output.rs417
1 files changed, 417 insertions, 0 deletions
diff --git a/src/output/ffmpeg_output.rs b/src/output/ffmpeg_output.rs
new file mode 100644
index 0000000..2132394
--- /dev/null
+++ b/src/output/ffmpeg_output.rs
@@ -0,0 +1,417 @@
+//! This file implements the output to save an audio file using ffmpeg
+//! libraries.
+
+use super::*;
+
+#[allow(unused_imports)]
+use std::{fs::File, time::Duration};
+
+#[allow(unused_imports)]
+use ac_ffmpeg::{
+ codec::{
+ audio::{
+ self,
+ frame::{ChannelLayout, SampleFormat},
+ AudioEncoder, AudioFrameMut,
+ },
+ CodecParameters, Encoder,
+ },
+ format::{
+ io::IO,
+ muxer::{Muxer, OutputFormat},
+ },
+ time::{TimeBase, Timestamp},
+ Error as FFError,
+};
+
+impl From<FFError> for OutputError {
+ fn from(ff: FFError) -> Self {
+ OutputError::FFMpeg(ff.to_string())
+ }
+}
+
+/// Open a given output file.
+fn open_output(path: &str, elementary_streams: &[CodecParameters]) -> Result<Muxer<File>, FFError> {
+ let output_format = OutputFormat::guess_from_file_name(path)
+ .ok_or_else(|| FFError::new(format!("unable to guess output format for file: {}", path)))?;
+
+ let output = std::fs::OpenOptions::new()
+ .create(true)
+ .write(true)
+ .open(path)
+ .map_err(|err| FFError::new(format!("unable to create output file {}: {}", path, err)))?;
+
+ let io = IO::from_seekable_write_stream(output);
+
+ let mut muxer_builder = Muxer::builder();
+
+ for codec_parameters in elementary_streams {
+ muxer_builder.add_stream(codec_parameters)?;
+ }
+
+ muxer_builder.build(io, output_format)
+}
+
+/// A dummy struct to hold a generic function for saving waves.
+#[derive(Default)]
+pub struct OpusOutput {}
+
+impl Output for OpusOutput {
+ fn save(&self, data: Wave, rate: Samples, name: &str) -> Result<(), OutputError> {
+ if data.is_empty() {
+ dbg!();
+ return Err(OutputError::EmptyWaves);
+ }
+
+ if std::fs::metadata(name).is_ok() {
+ std::fs::remove_file(name)?;
+ }
+
+ let sample_format: SampleFormat = "flt".parse().unwrap();
+
+ let channel_layout = ChannelLayout::from_channels(2u32).unwrap();
+
+ let encoder_builder = AudioEncoder::builder("libopus")?;
+
+ let time_base = TimeBase::new(1, 50);
+
+ let duration = Duration::from_nanos((data.len() * *rate as usize) as u64);
+
+ let mut encoder = encoder_builder
+ .sample_rate(*rate as u32)
+ .sample_format(sample_format)
+ .channel_layout(channel_layout)
+ .bit_rate(96000u64)
+ .time_base(time_base)
+ .build()?;
+
+ let codec_parameters = encoder.codec_parameters().into();
+
+ let mut muxer = open_output(name, &[codec_parameters])?;
+
+ // NOTE: FFMPEG assumes the bytes to be laid out in the
+ // platform-natural endianness, so this is actually portable,
+ // in the sense that this must be used with FFMPEG, which
+ // knows how to deal with this endianness.
+ let bytes: Vec<u8> = data
+ .iter()
+ .flat_map(|pulse| (**pulse as f32).to_ne_bytes())
+ .collect();
+
+ let mut frame_idx = 0;
+ let mut frame_timestamp = Timestamp::new(frame_idx, time_base);
+ let max_timestamp = Timestamp::from_millis(0) + duration;
+
+ // NOTE: Each frame occupies 20 milliseconds, as recommended
+ // in the official documentation page for FFMPEG.
+ let samples_per_frame = (*rate * 0.02f64) as usize;
+
+ let mut bytes_index = 0usize;
+
+ while frame_timestamp < max_timestamp && bytes_index < bytes.len() {
+ let mut frame = AudioFrameMut::silence(
+ channel_layout,
+ sample_format,
+ *rate as u32,
+ samples_per_frame,
+ )
+ .with_time_base(time_base)
+ .with_pts(frame_timestamp);
+
+ let mut planes = frame.planes_mut();
+
+ let data = planes[0].data_mut();
+
+ let number_of_bytes = std::cmp::min(samples_per_frame, (bytes.len() - bytes_index) / 4);
+
+ for i in 0..number_of_bytes {
+ data[i * 8] = bytes[bytes_index + 4 * i];
+ data[i * 8 + 1] = bytes[bytes_index + 4 * i + 1];
+ data[i * 8 + 2] = bytes[bytes_index + 4 * i + 2];
+ data[i * 8 + 3] = bytes[bytes_index + 4 * i + 3];
+ data[i * 8 + 4] = bytes[bytes_index + 4 * i];
+ data[i * 8 + 5] = bytes[bytes_index + 4 * i + 1];
+ data[i * 8 + 6] = bytes[bytes_index + 4 * i + 2];
+ data[i * 8 + 7] = bytes[bytes_index + 4 * i + 3];
+ }
+
+ let frame = frame.freeze();
+
+ encoder.push(frame)?;
+
+ while let Some(packet) = encoder.take()? {
+ muxer.push(packet.with_stream_index(0))?;
+ }
+
+ frame_idx += 1;
+
+ frame_timestamp = Timestamp::new(frame_idx, time_base);
+
+ bytes_index += samples_per_frame * 4;
+ }
+
+ encoder.flush()?;
+
+ while let Some(packet) = encoder.take()? {
+ muxer.push(packet.with_stream_index(0))?;
+ }
+
+ muxer.flush()?;
+
+ Ok(())
+ }
+}
+
+/// A dummy struct to hold a generic function for saving waves.
+#[derive(Default)]
+pub struct MP3Output {}
+
+impl Output for MP3Output {
+ fn save(&self, data: Wave, rate: Samples, name: &str) -> Result<(), OutputError> {
+ if data.is_empty() {
+ dbg!();
+ return Err(OutputError::EmptyWaves);
+ }
+
+ if std::fs::metadata(name).is_ok() {
+ std::fs::remove_file(name)?;
+ }
+
+ let sample_format: SampleFormat = "fltp".parse().unwrap();
+
+ let channel_layout = ChannelLayout::from_channels(2u32).unwrap();
+
+ let encoder_builder = AudioEncoder::builder("libmp3lame")?;
+
+ let time_base = TimeBase::new(1, 50);
+
+ let duration = Duration::from_nanos((data.len() * *rate as usize) as u64);
+
+ let mut encoder = encoder_builder
+ .sample_rate(*rate as u32)
+ .sample_format(sample_format)
+ .channel_layout(channel_layout)
+ .bit_rate(96000u64)
+ .time_base(time_base)
+ .build()?;
+
+ let codec_parameters = encoder.codec_parameters().into();
+
+ let mut muxer = open_output(name, &[codec_parameters])?;
+
+ // NOTE: FFMPEG assumes the bytes to be laid out in the
+ // platform-natural endianness, so this is actually portable,
+ // in the sense that this must be used with FFMPEG, which
+ // knows how to deal with this endianness.
+ let bytes: Vec<u8> = data
+ .iter()
+ .flat_map(|pulse| (**pulse as f32).to_ne_bytes())
+ .collect();
+
+ let mut frame_idx = 0;
+ let mut frame_timestamp = Timestamp::new(frame_idx, time_base);
+ let max_timestamp = Timestamp::from_millis(0) + duration;
+
+ // NOTE: Each frame occupies 20 milliseconds, as recommended
+ // in the official documentation page for FFMPEG.
+ let samples_per_frame = (*rate * 0.02f64) as usize;
+
+ let mut bytes_index = 0usize;
+
+ while frame_timestamp < max_timestamp && bytes_index < bytes.len() {
+ let mut frame = AudioFrameMut::silence(
+ channel_layout,
+ sample_format,
+ *rate as u32,
+ samples_per_frame,
+ )
+ .with_time_base(time_base)
+ .with_pts(frame_timestamp);
+
+ let mut planes = frame.planes_mut();
+
+ let number_of_bytes = std::cmp::min(samples_per_frame, (bytes.len() - bytes_index) / 4);
+
+ for i in 0..number_of_bytes {
+ planes[0].data_mut()[i * 4] = bytes[bytes_index + 4 * i];
+ planes[0].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1];
+ planes[0].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2];
+ planes[0].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3];
+ planes[1].data_mut()[i * 4] = bytes[bytes_index + 4 * i];
+ planes[1].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1];
+ planes[1].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2];
+ planes[1].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3];
+ }
+
+ let frame = frame.freeze();
+
+ encoder.push(frame)?;
+
+ while let Some(packet) = encoder.take()? {
+ muxer.push(packet.with_stream_index(0))?;
+ }
+
+ frame_idx += 1;
+
+ frame_timestamp = Timestamp::new(frame_idx, time_base);
+
+ bytes_index += samples_per_frame * 4;
+ }
+
+ encoder.flush()?;
+
+ while let Some(packet) = encoder.take()? {
+ muxer.push(packet.with_stream_index(0))?;
+ }
+
+ muxer.flush()?;
+
+ Ok(())
+ }
+}
+
+/// A dummy struct to hold a generic function for saving waves.
+#[derive(Default)]
+pub struct AACOutput {}
+
+impl Output for AACOutput {
+ fn save(&self, data: Wave, rate: Samples, name: &str) -> Result<(), OutputError> {
+ if data.is_empty() {
+ dbg!();
+ return Err(OutputError::EmptyWaves);
+ }
+
+ if std::fs::metadata(name).is_ok() {
+ std::fs::remove_file(name)?;
+ }
+
+ let sample_format: SampleFormat = "fltp".parse().unwrap();
+
+ let channel_layout = ChannelLayout::from_channels(2u32).unwrap();
+
+ let encoder_builder = AudioEncoder::builder("aac")?;
+
+ let time_base = TimeBase::new(1, 50);
+
+ let duration = Duration::from_nanos((data.len() * *rate as usize) as u64);
+
+ let mut encoder = encoder_builder
+ .sample_rate(*rate as u32)
+ .sample_format(sample_format)
+ .channel_layout(channel_layout)
+ .bit_rate(96000u64)
+ .time_base(time_base)
+ .build()?;
+
+ let codec_parameters = encoder.codec_parameters().into();
+
+ let mut muxer = open_output(name, &[codec_parameters])?;
+
+ // NOTE: FFMPEG assumes the bytes to be laid out in the
+ // platform-natural endianness, so this is actually portable,
+ // in the sense that this must be used with FFMPEG, which
+ // knows how to deal with this endianness.
+ let bytes: Vec<u8> = data
+ .iter()
+ .flat_map(|pulse| (**pulse as f32).to_ne_bytes())
+ .collect();
+
+ let mut frame_idx = 0;
+ let mut frame_timestamp = Timestamp::new(frame_idx, time_base);
+ let max_timestamp = Timestamp::from_millis(0) + duration;
+
+ // NOTE: Each frame occupies 20 milliseconds, as recommended
+ // in the official documentation page for FFMPEG.
+ let samples_per_frame = (*rate * 0.02f64) as usize;
+
+ let mut bytes_index = 0usize;
+
+ while frame_timestamp < max_timestamp && bytes_index < bytes.len() {
+ let mut frame = AudioFrameMut::silence(
+ channel_layout,
+ sample_format,
+ *rate as u32,
+ samples_per_frame,
+ )
+ .with_time_base(time_base)
+ .with_pts(frame_timestamp);
+
+ let mut planes = frame.planes_mut();
+
+ let number_of_bytes = std::cmp::min(samples_per_frame, (bytes.len() - bytes_index) / 4);
+
+ for i in 0..number_of_bytes {
+ planes[0].data_mut()[i * 4] = bytes[bytes_index + 4 * i];
+ planes[0].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1];
+ planes[0].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2];
+ planes[0].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3];
+ planes[1].data_mut()[i * 4] = bytes[bytes_index + 4 * i];
+ planes[1].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1];
+ planes[1].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2];
+ planes[1].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3];
+ }
+
+ let frame = frame.freeze();
+
+ encoder.push(frame)?;
+
+ while let Some(packet) = encoder.take()? {
+ muxer.push(packet.with_stream_index(0))?;
+ }
+
+ frame_idx += 1;
+
+ frame_timestamp = Timestamp::new(frame_idx, time_base);
+
+ bytes_index += samples_per_frame * 4;
+ }
+
+ encoder.flush()?;
+
+ while let Some(packet) = encoder.take()? {
+ muxer.push(packet.with_stream_index(0))?;
+ }
+
+ muxer.flush()?;
+
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_ffmpeg_format() {
+ let _output_format = OutputFormat::guess_from_file_name("test.opus").unwrap();
+ let encoder = AudioEncoder::builder("libopus").unwrap();
+ let format: SampleFormat = "flt".parse().unwrap();
+ let channel_layout = ChannelLayout::from_channels(2u32).unwrap();
+
+ encoder
+ .sample_rate(48000u32)
+ .sample_format(format)
+ .channel_layout(channel_layout)
+ .bit_rate(96000u64)
+ .time_base(TimeBase::new(1, 25))
+ .build()
+ .unwrap();
+
+ assert!(!format.is_planar());
+
+ let mut frame = AudioFrameMut::silence(channel_layout, format, 48000u32, 960);
+
+ let mut planes = frame.planes_mut();
+
+ println!("number of planes: {}", planes.len());
+
+ let data = planes[0].data_mut();
+
+ println!("frame size = {}", data.len());
+
+ data[0] = 16u8;
+
+ println!("Successfully changed a byte of the silent frame!");
+ }
+}