From e954df3f896bd18494cd27d77b26bbb2005de8a7 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Wed, 24 Aug 2022 23:54:13 +0800 Subject: First commit Now the project is in a somewhat complete state, ready for future enhancements. --- src/output/ffmpeg_output.rs | 417 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 417 insertions(+) create mode 100644 src/output/ffmpeg_output.rs (limited to 'src/output/ffmpeg_output.rs') diff --git a/src/output/ffmpeg_output.rs b/src/output/ffmpeg_output.rs new file mode 100644 index 0000000..2132394 --- /dev/null +++ b/src/output/ffmpeg_output.rs @@ -0,0 +1,417 @@ +//! This file implements the output to save an audio file using ffmpeg +//! libraries. + +use super::*; + +#[allow(unused_imports)] +use std::{fs::File, time::Duration}; + +#[allow(unused_imports)] +use ac_ffmpeg::{ + codec::{ + audio::{ + self, + frame::{ChannelLayout, SampleFormat}, + AudioEncoder, AudioFrameMut, + }, + CodecParameters, Encoder, + }, + format::{ + io::IO, + muxer::{Muxer, OutputFormat}, + }, + time::{TimeBase, Timestamp}, + Error as FFError, +}; + +impl From for OutputError { + fn from(ff: FFError) -> Self { + OutputError::FFMpeg(ff.to_string()) + } +} + +/// Open a given output file. +fn open_output(path: &str, elementary_streams: &[CodecParameters]) -> Result, FFError> { + let output_format = OutputFormat::guess_from_file_name(path) + .ok_or_else(|| FFError::new(format!("unable to guess output format for file: {}", path)))?; + + let output = std::fs::OpenOptions::new() + .create(true) + .write(true) + .open(path) + .map_err(|err| FFError::new(format!("unable to create output file {}: {}", path, err)))?; + + let io = IO::from_seekable_write_stream(output); + + let mut muxer_builder = Muxer::builder(); + + for codec_parameters in elementary_streams { + muxer_builder.add_stream(codec_parameters)?; + } + + muxer_builder.build(io, output_format) +} + +/// A dummy struct to hold a generic function for saving waves. +#[derive(Default)] +pub struct OpusOutput {} + +impl Output for OpusOutput { + fn save(&self, data: Wave, rate: Samples, name: &str) -> Result<(), OutputError> { + if data.is_empty() { + dbg!(); + return Err(OutputError::EmptyWaves); + } + + if std::fs::metadata(name).is_ok() { + std::fs::remove_file(name)?; + } + + let sample_format: SampleFormat = "flt".parse().unwrap(); + + let channel_layout = ChannelLayout::from_channels(2u32).unwrap(); + + let encoder_builder = AudioEncoder::builder("libopus")?; + + let time_base = TimeBase::new(1, 50); + + let duration = Duration::from_nanos((data.len() * *rate as usize) as u64); + + let mut encoder = encoder_builder + .sample_rate(*rate as u32) + .sample_format(sample_format) + .channel_layout(channel_layout) + .bit_rate(96000u64) + .time_base(time_base) + .build()?; + + let codec_parameters = encoder.codec_parameters().into(); + + let mut muxer = open_output(name, &[codec_parameters])?; + + // NOTE: FFMPEG assumes the bytes to be laid out in the + // platform-natural endianness, so this is actually portable, + // in the sense that this must be used with FFMPEG, which + // knows how to deal with this endianness. + let bytes: Vec = data + .iter() + .flat_map(|pulse| (**pulse as f32).to_ne_bytes()) + .collect(); + + let mut frame_idx = 0; + let mut frame_timestamp = Timestamp::new(frame_idx, time_base); + let max_timestamp = Timestamp::from_millis(0) + duration; + + // NOTE: Each frame occupies 20 milliseconds, as recommended + // in the official documentation page for FFMPEG. + let samples_per_frame = (*rate * 0.02f64) as usize; + + let mut bytes_index = 0usize; + + while frame_timestamp < max_timestamp && bytes_index < bytes.len() { + let mut frame = AudioFrameMut::silence( + channel_layout, + sample_format, + *rate as u32, + samples_per_frame, + ) + .with_time_base(time_base) + .with_pts(frame_timestamp); + + let mut planes = frame.planes_mut(); + + let data = planes[0].data_mut(); + + let number_of_bytes = std::cmp::min(samples_per_frame, (bytes.len() - bytes_index) / 4); + + for i in 0..number_of_bytes { + data[i * 8] = bytes[bytes_index + 4 * i]; + data[i * 8 + 1] = bytes[bytes_index + 4 * i + 1]; + data[i * 8 + 2] = bytes[bytes_index + 4 * i + 2]; + data[i * 8 + 3] = bytes[bytes_index + 4 * i + 3]; + data[i * 8 + 4] = bytes[bytes_index + 4 * i]; + data[i * 8 + 5] = bytes[bytes_index + 4 * i + 1]; + data[i * 8 + 6] = bytes[bytes_index + 4 * i + 2]; + data[i * 8 + 7] = bytes[bytes_index + 4 * i + 3]; + } + + let frame = frame.freeze(); + + encoder.push(frame)?; + + while let Some(packet) = encoder.take()? { + muxer.push(packet.with_stream_index(0))?; + } + + frame_idx += 1; + + frame_timestamp = Timestamp::new(frame_idx, time_base); + + bytes_index += samples_per_frame * 4; + } + + encoder.flush()?; + + while let Some(packet) = encoder.take()? { + muxer.push(packet.with_stream_index(0))?; + } + + muxer.flush()?; + + Ok(()) + } +} + +/// A dummy struct to hold a generic function for saving waves. +#[derive(Default)] +pub struct MP3Output {} + +impl Output for MP3Output { + fn save(&self, data: Wave, rate: Samples, name: &str) -> Result<(), OutputError> { + if data.is_empty() { + dbg!(); + return Err(OutputError::EmptyWaves); + } + + if std::fs::metadata(name).is_ok() { + std::fs::remove_file(name)?; + } + + let sample_format: SampleFormat = "fltp".parse().unwrap(); + + let channel_layout = ChannelLayout::from_channels(2u32).unwrap(); + + let encoder_builder = AudioEncoder::builder("libmp3lame")?; + + let time_base = TimeBase::new(1, 50); + + let duration = Duration::from_nanos((data.len() * *rate as usize) as u64); + + let mut encoder = encoder_builder + .sample_rate(*rate as u32) + .sample_format(sample_format) + .channel_layout(channel_layout) + .bit_rate(96000u64) + .time_base(time_base) + .build()?; + + let codec_parameters = encoder.codec_parameters().into(); + + let mut muxer = open_output(name, &[codec_parameters])?; + + // NOTE: FFMPEG assumes the bytes to be laid out in the + // platform-natural endianness, so this is actually portable, + // in the sense that this must be used with FFMPEG, which + // knows how to deal with this endianness. + let bytes: Vec = data + .iter() + .flat_map(|pulse| (**pulse as f32).to_ne_bytes()) + .collect(); + + let mut frame_idx = 0; + let mut frame_timestamp = Timestamp::new(frame_idx, time_base); + let max_timestamp = Timestamp::from_millis(0) + duration; + + // NOTE: Each frame occupies 20 milliseconds, as recommended + // in the official documentation page for FFMPEG. + let samples_per_frame = (*rate * 0.02f64) as usize; + + let mut bytes_index = 0usize; + + while frame_timestamp < max_timestamp && bytes_index < bytes.len() { + let mut frame = AudioFrameMut::silence( + channel_layout, + sample_format, + *rate as u32, + samples_per_frame, + ) + .with_time_base(time_base) + .with_pts(frame_timestamp); + + let mut planes = frame.planes_mut(); + + let number_of_bytes = std::cmp::min(samples_per_frame, (bytes.len() - bytes_index) / 4); + + for i in 0..number_of_bytes { + planes[0].data_mut()[i * 4] = bytes[bytes_index + 4 * i]; + planes[0].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1]; + planes[0].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2]; + planes[0].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3]; + planes[1].data_mut()[i * 4] = bytes[bytes_index + 4 * i]; + planes[1].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1]; + planes[1].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2]; + planes[1].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3]; + } + + let frame = frame.freeze(); + + encoder.push(frame)?; + + while let Some(packet) = encoder.take()? { + muxer.push(packet.with_stream_index(0))?; + } + + frame_idx += 1; + + frame_timestamp = Timestamp::new(frame_idx, time_base); + + bytes_index += samples_per_frame * 4; + } + + encoder.flush()?; + + while let Some(packet) = encoder.take()? { + muxer.push(packet.with_stream_index(0))?; + } + + muxer.flush()?; + + Ok(()) + } +} + +/// A dummy struct to hold a generic function for saving waves. +#[derive(Default)] +pub struct AACOutput {} + +impl Output for AACOutput { + fn save(&self, data: Wave, rate: Samples, name: &str) -> Result<(), OutputError> { + if data.is_empty() { + dbg!(); + return Err(OutputError::EmptyWaves); + } + + if std::fs::metadata(name).is_ok() { + std::fs::remove_file(name)?; + } + + let sample_format: SampleFormat = "fltp".parse().unwrap(); + + let channel_layout = ChannelLayout::from_channels(2u32).unwrap(); + + let encoder_builder = AudioEncoder::builder("aac")?; + + let time_base = TimeBase::new(1, 50); + + let duration = Duration::from_nanos((data.len() * *rate as usize) as u64); + + let mut encoder = encoder_builder + .sample_rate(*rate as u32) + .sample_format(sample_format) + .channel_layout(channel_layout) + .bit_rate(96000u64) + .time_base(time_base) + .build()?; + + let codec_parameters = encoder.codec_parameters().into(); + + let mut muxer = open_output(name, &[codec_parameters])?; + + // NOTE: FFMPEG assumes the bytes to be laid out in the + // platform-natural endianness, so this is actually portable, + // in the sense that this must be used with FFMPEG, which + // knows how to deal with this endianness. + let bytes: Vec = data + .iter() + .flat_map(|pulse| (**pulse as f32).to_ne_bytes()) + .collect(); + + let mut frame_idx = 0; + let mut frame_timestamp = Timestamp::new(frame_idx, time_base); + let max_timestamp = Timestamp::from_millis(0) + duration; + + // NOTE: Each frame occupies 20 milliseconds, as recommended + // in the official documentation page for FFMPEG. + let samples_per_frame = (*rate * 0.02f64) as usize; + + let mut bytes_index = 0usize; + + while frame_timestamp < max_timestamp && bytes_index < bytes.len() { + let mut frame = AudioFrameMut::silence( + channel_layout, + sample_format, + *rate as u32, + samples_per_frame, + ) + .with_time_base(time_base) + .with_pts(frame_timestamp); + + let mut planes = frame.planes_mut(); + + let number_of_bytes = std::cmp::min(samples_per_frame, (bytes.len() - bytes_index) / 4); + + for i in 0..number_of_bytes { + planes[0].data_mut()[i * 4] = bytes[bytes_index + 4 * i]; + planes[0].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1]; + planes[0].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2]; + planes[0].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3]; + planes[1].data_mut()[i * 4] = bytes[bytes_index + 4 * i]; + planes[1].data_mut()[i * 4 + 1] = bytes[bytes_index + 4 * i + 1]; + planes[1].data_mut()[i * 4 + 2] = bytes[bytes_index + 4 * i + 2]; + planes[1].data_mut()[i * 4 + 3] = bytes[bytes_index + 4 * i + 3]; + } + + let frame = frame.freeze(); + + encoder.push(frame)?; + + while let Some(packet) = encoder.take()? { + muxer.push(packet.with_stream_index(0))?; + } + + frame_idx += 1; + + frame_timestamp = Timestamp::new(frame_idx, time_base); + + bytes_index += samples_per_frame * 4; + } + + encoder.flush()?; + + while let Some(packet) = encoder.take()? { + muxer.push(packet.with_stream_index(0))?; + } + + muxer.flush()?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ffmpeg_format() { + let _output_format = OutputFormat::guess_from_file_name("test.opus").unwrap(); + let encoder = AudioEncoder::builder("libopus").unwrap(); + let format: SampleFormat = "flt".parse().unwrap(); + let channel_layout = ChannelLayout::from_channels(2u32).unwrap(); + + encoder + .sample_rate(48000u32) + .sample_format(format) + .channel_layout(channel_layout) + .bit_rate(96000u64) + .time_base(TimeBase::new(1, 25)) + .build() + .unwrap(); + + assert!(!format.is_planar()); + + let mut frame = AudioFrameMut::silence(channel_layout, format, 48000u32, 960); + + let mut planes = frame.planes_mut(); + + println!("number of planes: {}", planes.len()); + + let data = planes[0].data_mut(); + + println!("frame size = {}", data.len()); + + data[0] = 16u8; + + println!("Successfully changed a byte of the silent frame!"); + } +} -- cgit v1.2.3-18-g5258