//! This file implements the output to save an audio file using ffmpeg //! libraries. use super::*; #[allow(unused_imports)] use std::{fs::File, time::Duration}; #[allow(unused_imports)] use ac_ffmpeg::{ codec::{ audio::{ self, frame::{ChannelLayout, SampleFormat}, AudioEncoder, AudioFrameMut, }, CodecParameters, Encoder, }, format::{ io::IO, muxer::{Muxer, OutputFormat}, }, time::{TimeBase, Timestamp}, Error as FFError, }; impl From for OutputError { fn from(ff: FFError) -> Self { OutputError::FFMpeg(ff.to_string()) } } /// Open a given output file. fn open_output(path: &str, elementary_streams: &[CodecParameters]) -> Result, FFError> { let output_format = OutputFormat::guess_from_file_name(path) .ok_or_else(|| FFError::new(format!("unable to guess output format for file: {}", path)))?; let output = std::fs::OpenOptions::new() .create(true) .write(true) .open(path) .map_err(|err| FFError::new(format!("unable to create output file {}: {}", path, err)))?; let io = IO::from_seekable_write_stream(output); let mut muxer_builder = Muxer::builder(); for codec_parameters in elementary_streams { muxer_builder.add_stream(codec_parameters)?; } muxer_builder.build(io, output_format) } /// A dummy struct to hold a generic function for saving waves. #[derive(Default)] pub struct OpusOutput {} impl Output for OpusOutput { fn save(&self, data: Wave, rate: Samples, name: &str) -> Result<(), OutputError> { if data.is_empty() { dbg!(); return Err(OutputError::EmptyWaves); } if std::fs::metadata(name).is_ok() { std::fs::remove_file(name)?; } let sample_format: SampleFormat = "flt".parse().unwrap(); let channel_layout = ChannelLayout::from_channels(2u32).unwrap(); let encoder_builder = AudioEncoder::builder("libopus")?; let time_base = TimeBase::new(1, 50); let duration = Duration::from_nanos((data.len() * *rate as usize) as u64); let mut encoder = encoder_builder .sample_rate(*rate as u32) .sample_format(sample_format) .channel_layout(channel_layout) .bit_rate(96000u64) .time_base(time_base) .build()?; let codec_parameters = encoder.codec_parameters().into(); let mut muxer = open_output(name, &[codec_parameters])?; // NOTE: FFMPEG assumes the bytes to be laid out in the // platform-natural endianness, so this is actually portable, // in the sense that this must be used with FFMPEG, which // knows how to deal with this endianness. let bytes: Vec = data .iter() .flat_map(|pulse| (**pulse as f32).to_ne_bytes()) .collect(); let mut frame_idx = 0; let mut frame_timestamp = Timestamp::new(frame_idx, time_base); let max_timestamp = Timestamp::from_millis(0) + duration; // NOTE: Each frame occupies 20 milliseconds, as recommended // in the official documentation page for FFMPEG. let samples_per_frame = (*rate * 0.02f64) as usize; let mut bytes_index = 0usize; while frame_timestamp < max_timestamp && bytes_index < bytes.len() { let mut frame = AudioFrameMut::silence( channel_layout, sample_format, *rate as u32, samples_per_frame, ) .with_time_base(time_base) .with_pts(frame_timestamp); let mut planes = frame.planes_mut(); let data = planes[0].data_mut(); let number_of_bytes = std::cmp::min(samples_per_frame, (bytes.len() - bytes_index) / 4); for i in 0..number_of_bytes { data[i * 8] = bytes[bytes_index + 4 * i]; data[i * 8 + 1] = bytes[bytes_index + 4 * i + 1]; data[i * 8 + 2] = bytes[bytes_index + 4 * i + 2]; data[i * 8 + 3] = bytes[bytes_index + 4 * i + 3]; data[i * 8 + 4] = bytes[bytes_index + 4 * i]; data[i * 8 + 5] = bytes[bytes_index + 4 * i + 1]; data[i * 8 + 6] = bytes[bytes_index + 4 * i + 2]; data[i * 8 + 7] = bytes[bytes_index + 4 * i + 3]; } let frame = frame.freeze(); encoder.push(frame)?; while let Some(packet) = encoder.take()? { muxer.push(packet.with_stream_index(0))?; } frame_idx += 1; frame_timestamp = Timestamp::new(frame_idx, time_base); bytes_index += samples_per_frame * 4; } encoder.flush()?; while let Some(packet) = encoder.take()? { muxer.push(packet.with_stream_index(0))?; } muxer.flush()?; Ok(()) } } /// A dummy struct to hold a generic function for saving waves. #[derive(Default)] pub struct MP3Output {} impl Output for MP3Output { fn save(&self, data: Wave, rate: Samples, name: &str) -> Result<(), OutputError> { if data.is_empty() { dbg!(); return Err(OutputError::EmptyWaves); } if std::fs::metadata(name).is_ok() { std::fs::remove_file(name)?; } let sample_format: SampleFormat = "fltp".parse().unwrap(); let channel_layout = ChannelLayout::from_channels(2u32).unwrap(); let encoder_builder = AudioEncoder::builder("libmp3lame")?; let time_base = TimeBase::new(1, 50); let duration = Duration::from_nanos((data.len() * *rate as usize) as u64); let mut encoder = encoder_builder .sample_rate(*rate as u32) .sample_format(sample_format) .channel_layout(channel_layout) .bit_rate(96000u64) .time_base(time_base) .build()?; let codec_parameters = encoder.codec_parameters().into(); let mut muxer = open_output(name, &[codec_parameters])?; // NOTE: FFMPEG assumes the bytes to be laid out in the // platform-natural endianness, so this is actually portable, // in the sense that this must be used with FFMPEG, which // knows how to deal with this endianness. let bytes: Vec = data .iter() .flat_map(|pulse| (**pulse as f32).to_ne_bytes()) .collect(); let mut frame_idx = 0; let mut frame_timestamp = Timestamp::new(frame_idx, time_base); let max_timestamp = Timestamp::from_millis(0) + duration; // NOTE: Each frame occupies 20 milliseconds, as recommended // in the official documentation page for FFMPEG. let samples_per_frame = (*rate * 0.02f64) as usize; let mut bytes_index = 0usize; while frame_timestamp < max_timestamp && bytes_index < bytes.len() { let mut frame = AudioFrameMut::silence( channel_layout, sample_format, *rate as u32, samples_per_frame, ) .with_time_base(time_base) .with_pts(frame_timestamp); let mut planes = frame.planes_mut(); let number_of_bytes = std::cmp::min(samples_per_frame, (bytes.len() - bytes_index) / 4); for i in 0..number_of_bytes { planes[0].data_mut()[i * 4] = bytes[bytes_index + 4 * i]; planes[0].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1]; planes[0].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2]; planes[0].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3]; planes[1].data_mut()[i * 4] = bytes[bytes_index + 4 * i]; planes[1].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1]; planes[1].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2]; planes[1].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3]; } let frame = frame.freeze(); encoder.push(frame)?; while let Some(packet) = encoder.take()? { muxer.push(packet.with_stream_index(0))?; } frame_idx += 1; frame_timestamp = Timestamp::new(frame_idx, time_base); bytes_index += samples_per_frame * 4; } encoder.flush()?; while let Some(packet) = encoder.take()? { muxer.push(packet.with_stream_index(0))?; } muxer.flush()?; Ok(()) } } /// A dummy struct to hold a generic function for saving waves. #[derive(Default)] pub struct AACOutput {} impl Output for AACOutput { fn save(&self, data: Wave, rate: Samples, name: &str) -> Result<(), OutputError> { if data.is_empty() { dbg!(); return Err(OutputError::EmptyWaves); } if std::fs::metadata(name).is_ok() { std::fs::remove_file(name)?; } let sample_format: SampleFormat = "fltp".parse().unwrap(); let channel_layout = ChannelLayout::from_channels(2u32).unwrap(); let encoder_builder = AudioEncoder::builder("aac")?; let time_base = TimeBase::new(1, 50); let duration = Duration::from_nanos((data.len() * *rate as usize) as u64); let mut encoder = encoder_builder .sample_rate(*rate as u32) .sample_format(sample_format) .channel_layout(channel_layout) .bit_rate(96000u64) .time_base(time_base) .build()?; let codec_parameters = encoder.codec_parameters().into(); let mut muxer = open_output(name, &[codec_parameters])?; // NOTE: FFMPEG assumes the bytes to be laid out in the // platform-natural endianness, so this is actually portable, // in the sense that this must be used with FFMPEG, which // knows how to deal with this endianness. let bytes: Vec = data .iter() .flat_map(|pulse| (**pulse as f32).to_ne_bytes()) .collect(); let mut frame_idx = 0; let mut frame_timestamp = Timestamp::new(frame_idx, time_base); let max_timestamp = Timestamp::from_millis(0) + duration; // NOTE: Each frame occupies 20 milliseconds, as recommended // in the official documentation page for FFMPEG. let samples_per_frame = (*rate * 0.02f64) as usize; let mut bytes_index = 0usize; while frame_timestamp < max_timestamp && bytes_index < bytes.len() { let mut frame = AudioFrameMut::silence( channel_layout, sample_format, *rate as u32, samples_per_frame, ) .with_time_base(time_base) .with_pts(frame_timestamp); let mut planes = frame.planes_mut(); let number_of_bytes = std::cmp::min(samples_per_frame, (bytes.len() - bytes_index) / 4); for i in 0..number_of_bytes { planes[0].data_mut()[i * 4] = bytes[bytes_index + 4 * i]; planes[0].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1]; planes[0].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2]; planes[0].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3]; planes[1].data_mut()[i * 4] = bytes[bytes_index + 4 * i]; planes[1].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1]; planes[1].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2]; planes[1].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3]; } let frame = frame.freeze(); encoder.push(frame)?; while let Some(packet) = encoder.take()? { muxer.push(packet.with_stream_index(0))?; } frame_idx += 1; frame_timestamp = Timestamp::new(frame_idx, time_base); bytes_index += samples_per_frame * 4; } encoder.flush()?; while let Some(packet) = encoder.take()? { muxer.push(packet.with_stream_index(0))?; } muxer.flush()?; Ok(()) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_ffmpeg_format() { let _output_format = OutputFormat::guess_from_file_name("test.opus").unwrap(); let encoder = AudioEncoder::builder("libopus").unwrap(); let format: SampleFormat = "flt".parse().unwrap(); let channel_layout = ChannelLayout::from_channels(2u32).unwrap(); encoder .sample_rate(48000u32) .sample_format(format) .channel_layout(channel_layout) .bit_rate(96000u64) .time_base(TimeBase::new(1, 25)) .build() .unwrap(); assert!(!format.is_planar()); let mut frame = AudioFrameMut::silence(channel_layout, format, 48000u32, 960); let mut planes = frame.planes_mut(); println!("number of planes: {}", planes.len()); let data = planes[0].data_mut(); println!("frame size = {}", data.len()); data[0] = 16u8; println!("Successfully changed a byte of the silent frame!"); } }