Audio encoding using avcodec_fill_audio_frame() and memory leaks - ffmpeg

As a part of encoding decoded audio packets, I'm using avcodec_fill_audio_frame(). I'm passing allocated AVFrame pointer to along with buffer containing the decoded samples and other parameters number of channels, sample format, buffer size. Though the encoding is working fine I'm not able to completely eliminate the memory leaks. I've taken care of most of things but still I'm not able detect the leakage.
Below is the function which I'm using for encoding. Please suggest something.
AudioSample contains decoded data and it is completely managed in different class(free in class destructor). I'm freeing the AVFrame in FFmpegEncoder destructor and AVPacket is freed every time using av_free_packet() with av_packet_destruct enabled. What more do I need to free?
void FfmpegEncoder::WriteAudioSample(AudioSample *audS)
{
int num_audio_frame = 0;
AVCodecContext *c = NULL;
// AVFrame *frame;
AVPacket pkt;
av_init_packet(&pkt);
pkt.destruct = av_destruct_packet;
pkt.data = NULL;
pkt.size = 0;
int ret = 0, got_packet = 0;
c = m_out_aud_strm->codec;
static int64_t aud_pts_in = -1;
if((audS != NULL) && (audS->GetSampleLength() > 0) )
{
int byte_per_sample = av_get_bytes_per_sample(c->sample_fmt);
PRINT_VAL("Byte Per Sample ", byte_per_sample)
m_frame->nb_samples = (audS->GetSampleLength())/(c->channels*av_get_bytes_per_sample(c->sample_fmt));
if(m_frame->nb_samples == c->frame_size)
{
#if 1
if(m_need_resample && (c->channels >= 2))
{
uint8_t * t_buff1 = new uint8_t[audS->GetSampleLength()];
if(t_buff1 != NULL)
{
for(int64_t i = 0; i< m_frame->nb_samples; i++)
{
memcpy(t_buff1 + i*byte_per_sample, (uint8_t*)((uint8_t*)audS->GetAudioSampleData() + i*byte_per_sample*c->channels), byte_per_sample);
memcpy(t_buff1 + (audS->GetSampleLength())/2 + i*byte_per_sample, (uint8_t*)((uint8_t*)audS->GetAudioSampleData() + i*byte_per_sample*c->channels+ byte_per_sample), byte_per_sample);
}
audS->FillAudioSample(t_buff1, audS->GetSampleLength());
delete[] t_buff1;
}
}
#endif
ret = avcodec_fill_audio_frame(m_frame, c->channels, c->sample_fmt, (uint8_t*)audS->GetAudioSampleData(),m_frame->nb_samples*byte_per_sample*c->channels, 0);
//ret = avcodec_fill_audio_frame(&frame, c->channels, c->sample_fmt, t_buff,frame.nb_samples*byte_per_sample*c->channels, 0);
if(ret != 0)
{
PRINT_MSG("Avcodec Fill Audio Failed ")
}
else
{
got_packet = 0;
ret = avcodec_encode_audio2(c, &pkt, m_frame, &got_packet);
if(ret < 0 || got_packet == 0)
{
PRINT_MSG("failed to encode audio ")
}
else
{
PRINT_MSG("Audio Packet Encoded ");
aud_pts_in++;
pkt.pts = aud_pts_in;
pkt.dts = pkt.pts;
pkt.stream_index = m_out_aud_strm->index;
ret = av_interleaved_write_frame(oc, &pkt);
if(ret != 0)
{
PRINT_MSG("Error Write Audio PKT ")
}
else
{
PRINT_MSG("Audio PKT Writen ")
}
}
}
}
avcodec_flush_buffers(c);
// avcodec_free_frame(&frame);
}
av_free_packet(&pkt);
}
Thanks,
Pradeep

//================== SEND AUDIO OUTPUT =======================
void AVOutputStream::sendAudioOutput (AVFrame* inputFrame)
{
AVCodecContext *codecCtx = pOutputAudioStream->codec;
// set source data variables
sourceNumberOfChannels = inputFrame->channels;
sourceChannelLayout = inputFrame->channel_layout;
sourceSampleRate = inputFrame->sample_rate;
_sourceSampleFormat = (AVSampleFormat)inputFrame->format;
sourceNumberOfSamples = inputFrame->nb_samples;
// set destination data variables
destinationNumberOfChannels = codecCtx->channels;
destinationChannelLayout = codecCtx->channel_layout;
destinationSampleRate = codecCtx->sample_rate;
destinationSampleFormat = codecCtx->sample_fmt;//AV_SAMPLE_FMT_FLTP;//EncodecCtx->sample_fmt;
destinationLineSize = 0;
destinationData = NULL;
int returnVal = 0;
if (startDecode == false)
{
startDecode = true;
resamplerCtx = swr_alloc_set_opts(NULL,
destinationChannelLayout,
destinationSampleFormat,
destinationSampleRate,
sourceChannelLayout,
_sourceSampleFormat,
sourceSampleRate,
0,
NULL);
if (resamplerCtx == NULL)
{
std::cout << "Unable to create the resampler context for the audio frame";
isConnected = false;
}
// initialize the resampling context
returnVal = swr_init(resamplerCtx);
if (returnVal < 0)
{
std::cout << "Unable to init the resampler context, error:";
isConnected = false;
}
} //if (startDecode == false)
if (sourceSampleRate != 0)
destinationNumberOfSamples = destinationSampleRate/sourceSampleRate * sourceNumberOfSamples;
// allocate the destination samples buffer
returnVal = av_samples_alloc_array_and_samples(&destinationData,
&destinationLineSize,
destinationNumberOfChannels,
destinationNumberOfSamples,
destinationSampleFormat,
0);
if (returnVal < 0)
{
std::cout << "Unable to allocate destination samples, error";
isConnected = false;
}
// convert to destination format
returnVal = swr_convert(resamplerCtx,
destinationData,
destinationNumberOfSamples,
(const uint8_t **)inputFrame->data, //sourceData,
sourceNumberOfSamples);
if (returnVal < 0)
{
std::cout << "Resampling failed, error \n";
isConnected = false;
}
int bufferSize = av_samples_get_buffer_size(&destinationLineSize,
destinationNumberOfChannels,
destinationNumberOfSamples,
destinationSampleFormat,
0);
//whithout fifo
pOutputAudioFrame = av_frame_alloc();
pOutputAudioFrame->nb_samples = codecCtx->frame_size;//frameNumberOfSamples;
pOutputAudioFrame->format = codecCtx->sample_fmt;
pOutputAudioFrame->channel_layout = codecCtx->channel_layout;
pOutputAudioFrame->channels = codecCtx->channels;
pOutputAudioFrame->sample_rate = codecCtx->sample_rate;
returnVal = avcodec_fill_audio_frame(pOutputAudioFrame,
pOutputAudioFrame->channels,
(AVSampleFormat)pOutputAudioFrame->format,
(const uint8_t *)destinationData[0],
bufferSize,0);
pOutputAudioFrame->pts = inputFrame->pts;
if (returnVal < 0)
{
std::cout << "Unable to fill the audio frame wsampleIndexith captured audio data,error";
isConnected = false;
}
// encode the audio frame, fill a packet for streaming
av_init_packet(&outAudioPacket);
outAudioPacket.data = NULL;
outAudioPacket.size = 0;
outAudioPacket.dts = outAudioPacket.pts = 0;
int gotPacket;
// encoding
returnVal = avcodec_encode_audio2(codecCtx, &outAudioPacket, pOutputAudioFrame, &gotPacket);
// free buffers
av_freep(&destinationData[0]);
av_freep(&destinationData);
av_frame_free(&pOutputAudioFrame);
if (gotPacket)
{
outAudioPacket.stream_index = pOutputAudioStream->index;
outAudioPacket.flags |= AV_PKT_FLAG_KEY;
returnVal = av_interleaved_write_frame(pOutputFormatCtx, &outAudioPacket);
//returnVal = av_write_frame(pOutputFormatCtx, &outAudioPacket);
if (returnVal != 0)
{
std::cout << "Cannot write audio packet \n";
isConnected = false;
}
av_free_packet(&outAudioPacket);
} // if (gotPacket)
}
You can see after resample i free used buffers.
// free buffers
av_freep(&destinationData[0]);
av_freep(&destinationData);

Related

Output black when I decode h264 720p with ffmpeg

First, sorry for my english. When I decode h264 720p in ardrone2.0 my output is black and I cant see anything.
I have try to change the value of pCodecCtx->pix_fmt = AV_PIX_FMT_BGR24; to pCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P; and the value of pCodecCtxH264->pix_fmt = AV_PIX_FMT_BGR24; to pCodecCtxH264->pix_fmt = AV_PIX_FMT_YUV420P; but my program crash. What am I doing wrong?. Thank you, see part of my code:
av_register_all();
avcodec_register_all();
avformat_network_init();
// 1.2. Open video file
if(avformat_open_input(&pFormatCtx, drone_addr, NULL, NULL) != 0) {
mexPrintf("No conecct with Drone");
EndVideo();
return;
}
pCodec = avcodec_find_decoder(AV_CODEC_ID_H264);
pCodecCtx = avcodec_alloc_context3(pCodec);
pCodecCtx->pix_fmt = AV_PIX_FMT_BGR24;
pCodecCtx->skip_frame = AVDISCARD_DEFAULT;
pCodecCtx->error_concealment = FF_EC_GUESS_MVS | FF_EC_DEBLOCK;
pCodecCtx->err_recognition = AV_EF_CAREFUL;
pCodecCtx->skip_loop_filter = AVDISCARD_DEFAULT;
pCodecCtx->workaround_bugs = FF_BUG_AUTODETECT;
pCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecCtx->codec_id = AV_CODEC_ID_H264;
pCodecCtx->skip_idct = AVDISCARD_DEFAULT;
pCodecCtx->width = 1280;
pCodecCtx->height = 720;
pCodecH264 = avcodec_find_decoder(AV_CODEC_ID_H264);
pCodecCtxH264 = avcodec_alloc_context3(pCodecH264);
pCodecCtxH264->pix_fmt = AV_PIX_FMT_BGR24;
pCodecCtxH264->skip_frame = AVDISCARD_DEFAULT;
pCodecCtxH264->error_concealment = FF_EC_GUESS_MVS | FF_EC_DEBLOCK;
pCodecCtxH264->err_recognition = AV_EF_CAREFUL;
pCodecCtxH264->skip_loop_filter = AVDISCARD_DEFAULT;
pCodecCtxH264->workaround_bugs = FF_BUG_AUTODETECT;
pCodecCtxH264->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecCtxH264->codec_id = AV_CODEC_ID_H264;
pCodecCtxH264->skip_idct = AVDISCARD_DEFAULT;
if(avcodec_open2(pCodecCtxH264, pCodecH264, &optionsDict) < 0)
{
mexPrintf("Error opening H264 codec");
return ;
}
pFrame_BGR24 = av_frame_alloc();
if(pFrame_BGR24 == NULL) {
mexPrintf("Could not allocate pFrame_BGR24\n");
return ;
}
// Determine required buffer size and allocate buffer
buffer_BGR24 =
(uint8_t *)av_mallocz(av_image_get_buffer_size(AV_PIX_FMT_BGR24,
pCodecCtx->width, ((pCodecCtx->height == 720) ? 720 : pCodecCtx->height) *
sizeof(uint8_t)*3,1));
// Assign buffer to image planes
av_image_fill_arrays(pFrame_BGR24->data, pFrame_BGR24->linesize,
buffer_BGR24,AV_PIX_FMT_BGR24, pCodecCtx->width, pCodecCtx->height,1);
// format conversion context
pConvertCtx_BGR24 = sws_getContext(pCodecCtx->width, pCodecCtx->height,
pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height, AV_PIX_FMT_BGR24,
SWS_BILINEAR | SWS_ACCURATE_RND, 0, 0, 0);
// 1.6. get video frames
pFrame = av_frame_alloc();
av_init_packet(&packet);
packet.data = NULL;
packet.size = 0;
}
//Captura un frame
void video::capture(mxArray *plhs[]) {
if(av_read_frame(pFormatCtx, &packet) < 0){
mexPrintf("Error al leer frame");
return;
}
do {
do {
rest = avcodec_send_packet(pCodecCtxH264, &packet);
} while(rest == AVERROR(EAGAIN));
if(rest == AVERROR_EOF || rest == AVERROR(EINVAL)) {
printf("AVERROR(EAGAIN): %d, AVERROR_EOF: %d,
AVERROR(EINVAL): %d\n", AVERROR(EAGAIN), AVERROR_EOF,
AVERROR(EINVAL));
printf("fe_read_frame: Frame getting error (%d)!\n", rest);
return;
}
rest = avcodec_receive_frame(pCodecCtxH264, pFrame);
} while(rest == AVERROR(EAGAIN));
if(rest == AVERROR_EOF || rest == AVERROR(EINVAL)) {
// An error or EOF occured,index break out and return what
// we have so far.
printf("AVERROR(EAGAIN): %d, AVERROR_EOF: %d, AVERROR(EINVAL): %d\n",
AVERROR(EAGAIN), AVERROR_EOF, AVERROR(EINVAL));
printf("fe_read_frame: EOF or some othere decoding error (%d)!\n",
rest);
return;
}
// 2.1.1. convert frame to GRAYSCALE [or BGR] for OpenCV
sws_scale(pConvertCtx_BGR24, (const uint8_t* const*)pFrame->data,
pFrame->linesize, 0,pCodecCtx->height, pFrame_BGR24->data,
pFrame_BGR24->linesize);
//}
av_packet_unref(&packet);
av_init_packet(&packet);
mwSize dims[] = {(pCodecCtx->width)*((pCodecCtx->height == 720) ? 720 :
pCodecCtx->height)*sizeof(uint8_t)*3};
plhs[0] = mxCreateNumericArray(1,dims,mxUINT8_CLASS, mxREAL);
//plhs[0]=mxCreateDoubleMatrix(pCodecCtx->height,pCodecCtx-
>width,mxREAL);
point=mxGetPr(plhs[0]);
memcpy(point, pFrame_BGR24->data[0],(pCodecCtx->width)*(pCodecCtx-
>height)*sizeof(uint8_t)*3);
}
Go to debugger and see your memcpy. I am not sure if it works for all dimensions that you want. Also, there may be more memory problems. For example, try to see what is the value of buffer_BGR24 and pFrame. I bet that sometimes, they do not return right values. Check them out in the code.

FFMPEG AAC encoding causes audio to be lower in pitch

I built a sample application that encodes AAC (from PortAudio) into a MP4 container (no video stream).
The resulting audio is lower in pitch.
#include "stdafx.h"
#include "TestRecording.h"
#include "libffmpeg.h"
TestRecording::TestRecording()
{
}
TestRecording::~TestRecording()
{
}
struct RecordingContext
{
RecordingContext()
{
formatContext = NULL;
audioStream = NULL;
audioFrame = NULL;
audioFrameframeNumber = 0;
}
libffmpeg::AVFormatContext* formatContext;
libffmpeg::AVStream* audioStream;
libffmpeg::AVFrame* audioFrame;
int audioFrameframeNumber;
};
static int AudioRecordCallback(const void *inputBuffer, void *outputBuffer,
unsigned long framesPerBuffer,
const PaStreamCallbackTimeInfo* timeInfo,
PaStreamCallbackFlags statusFlags,
void *userData)
{
RecordingContext* recordingContext = (RecordingContext*)userData;
libffmpeg::avcodec_fill_audio_frame(recordingContext->audioFrame,
recordingContext->audioFrame->channels,
recordingContext->audioStream->codec->sample_fmt,
static_cast<const unsigned char*>(inputBuffer),
(framesPerBuffer * sizeof(float) * recordingContext->audioFrame->channels),
0);
libffmpeg::AVPacket pkt;
libffmpeg::av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
int gotpacket;
int result = avcodec_encode_audio2(recordingContext->audioStream->codec, &pkt, recordingContext->audioFrame, &gotpacket);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't encode the audio frame to acc");
return paContinue;
}
if (gotpacket)
{
pkt.stream_index = recordingContext->audioStream->index;
recordingContext->audioFrameframeNumber++;
// this codec requires no bitstream filter, just send it to the muxer!
result = libffmpeg::av_write_frame(recordingContext->formatContext, &pkt);
if (result < 0)
{
LOG(ERROR) << "Couldn't write the encoded audio frame";
libffmpeg::av_free_packet(&pkt);
return paContinue;
}
libffmpeg::av_free_packet(&pkt);
}
return paContinue;
}
static bool InitializeRecordingContext(RecordingContext* recordingContext)
{
int result = libffmpeg::avformat_alloc_output_context2(&recordingContext->formatContext, NULL, NULL, "C:\\Users\\Paul\\Desktop\\test.mp4");
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't create output format context");
return false;
}
libffmpeg::AVCodec *audioCodec;
audioCodec = libffmpeg::avcodec_find_encoder(libffmpeg::AV_CODEC_ID_AAC);
if (audioCodec == NULL)
{
LOG(ERROR) << "Couldn't find the encoder for AAC";
}
recordingContext->audioStream = libffmpeg::avformat_new_stream(recordingContext->formatContext, audioCodec);
if (!recordingContext->audioStream)
{
LOG(ERROR) << "Couldn't create the audio stream";
return false;
}
recordingContext->audioStream->codec->bit_rate = 64000;
recordingContext->audioStream->codec->sample_fmt = libffmpeg::AV_SAMPLE_FMT_FLTP;
recordingContext->audioStream->codec->sample_rate = 48000;
recordingContext->audioStream->codec->channel_layout = AV_CH_LAYOUT_STEREO;
recordingContext->audioStream->codec->channels = libffmpeg::av_get_channel_layout_nb_channels(recordingContext->audioStream->codec->channel_layout);
recordingContext->audioStream->codecpar->bit_rate = recordingContext->audioStream->codec->bit_rate;
recordingContext->audioStream->codecpar->format = recordingContext->audioStream->codec->sample_fmt;
recordingContext->audioStream->codecpar->sample_rate = recordingContext->audioStream->codec->sample_rate;
recordingContext->audioStream->codecpar->channel_layout = recordingContext->audioStream->codec->channel_layout;
recordingContext->audioStream->codecpar->channels = recordingContext->audioStream->codec->channels;
result = libffmpeg::avcodec_open2(recordingContext->audioStream->codec, audioCodec, NULL);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't open the audio codec");
return false;
}
// create a new frame to store the audio samples
recordingContext->audioFrame = libffmpeg::av_frame_alloc();
if (!recordingContext->audioFrame)
{
LOG(ERROR) << "Couldn't alloce the output audio frame";
return false;
}
recordingContext->audioFrame->nb_samples = recordingContext->audioStream->codec->frame_size;
recordingContext->audioFrame->channel_layout = recordingContext->audioStream->codec->channel_layout;
recordingContext->audioFrame->channels = recordingContext->audioStream->codec->channels;
recordingContext->audioFrame->format = recordingContext->audioStream->codec->sample_fmt;
recordingContext->audioFrame->sample_rate = recordingContext->audioStream->codec->sample_rate;
result = libffmpeg::av_frame_get_buffer(recordingContext->audioFrame, 0);
if (result < 0)
{
LOG(ERROR) << "Coudln't initialize the output audio frame buffer";
return false;
}
// some formats want video_stream headers to be separate
if (!strcmp(recordingContext->formatContext->oformat->name, "mp4") || !strcmp(recordingContext->formatContext->oformat->name, "mov") || !strcmp(recordingContext->formatContext->oformat->name, "3gp"))
{
recordingContext->audioStream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
// open the ouput file
if (!(recordingContext->formatContext->oformat->flags & AVFMT_NOFILE))
{
result = libffmpeg::avio_open(&recordingContext->formatContext->pb, recordingContext->formatContext->filename, AVIO_FLAG_WRITE);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't open the output file");
return false;
}
}
// write the stream headers
result = libffmpeg::avformat_write_header(recordingContext->formatContext, NULL);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't write the headers to the file");
return false;
}
return true;
}
static bool FinalizeRecordingContext(RecordingContext* recordingContext)
{
int result = 0;
// write the trailing information
if (recordingContext->formatContext->pb)
{
result = libffmpeg::av_write_trailer(recordingContext->formatContext);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't write the trailer information");
return false;
}
}
// close all the codes
for (int i = 0; i < (int)recordingContext->formatContext->nb_streams; i++)
{
result = libffmpeg::avcodec_close(recordingContext->formatContext->streams[i]->codec);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't close the codec");
return false;
}
}
// close the output file
if (recordingContext->formatContext->pb)
{
if (!(recordingContext->formatContext->oformat->flags & AVFMT_NOFILE))
{
result = libffmpeg::avio_close(recordingContext->formatContext->pb);
if (result < 0)
{
LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't close the output file");
return false;
}
}
}
// free the format context and all of its data
libffmpeg::avformat_free_context(recordingContext->formatContext);
recordingContext->formatContext = NULL;
recordingContext->audioStream = NULL;
if (recordingContext->audioFrame)
{
libffmpeg::av_frame_free(&recordingContext->audioFrame);
recordingContext->audioFrame = NULL;
}
return true;
}
int TestRecording::Test()
{
PaError result = paNoError;
result = Pa_Initialize();
if (result != paNoError) LOGINT_WITH_MESSAGE(ERROR, result, "Error initializing audio device framework");
RecordingContext recordingContext;
if (!InitializeRecordingContext(&recordingContext))
{
LOG(ERROR) << "Couldn't start recording file";
return 0;
}
auto defaultDevice = Pa_GetDefaultInputDevice();
auto deviceInfo = Pa_GetDeviceInfo(defaultDevice);
PaStreamParameters inputParameters;
inputParameters.device = defaultDevice;
inputParameters.channelCount = 2;
inputParameters.sampleFormat = paFloat32;
inputParameters.suggestedLatency = deviceInfo->defaultLowInputLatency;
inputParameters.hostApiSpecificStreamInfo = NULL;
PaStream* stream = NULL;
result = Pa_OpenStream(
&stream,
&inputParameters,
NULL,
48000,
1024,
paClipOff,
AudioRecordCallback,
&recordingContext);
if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't open the audio stream");
result = Pa_StartStream(stream);
if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't start the audio stream");
Sleep(1000 * 5);
result = Pa_StopStream(stream);
if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't stop the audio stream");
if (!FinalizeRecordingContext(&recordingContext)) LOG(ERROR) << "Couldn't stop recording file";
result = Pa_CloseStream(stream);
if (result != paNoError)LOGINT_WITH_MESSAGE(ERROR, result, "Couldn't stop the audio stream");
return 0;
}
Here is the stdout, in case it helps.
https://gist.github.com/pauldotknopf/9f24a604ce1f8a081aa68da1bf169e98
Why is the audio lower in pitch? I assume I am overlooking a parameter that needs to be configured between PortAudio and FFMPEG. Is there something super obvious that I am missing?

Recording audio in wav format on windows

I'm trying to record audio in a wav format on windows. However the recorded bytes are always equal to zero. Am I misinterpreting somethings from the windows API?
void RecordSound(void)
{
static int ChannelOn = 0;
int err;
int fp;
static WAVEFORMATEX fmt;
static WAVEHDR hdr;
HWAVEIN hwi = 1;
fmt.nSamplesPerSec = 44100;
fmt.wBitsPerSample = 16;
fmt.wFormatTag= WAVE_FORMAT_PCM;
fmt.nChannels= 2;
fmt.nBlockAlign = fmt.wBitsPerSample*fmt.nChannels/8;
fmt.nAvgBytesPerSec= fmt.nSamplesPerSec*fmt.nBlockAlign;
hdr.lpData = (LPSTR)buf;
hdr.dwBufferLength = BUF_SIZE;
if(!ChannelOn) {
err = waveInOpen(&hwi, 0, &fmt, 0, 0, 0);
printf("\nerr = %x\n", err);
if(!err) {
waveInAddBuffer(hwi, &hdr, 0);
waveInStart(hwi);
puts("Record channel on\n");
ChannelOn = 1;
}
} else {
waveInClose(hwi);
puts("Record channel off\n");
ChannelOn = 0;
puts("Writing wav to test.wav");
fp = open("test.wav" , "w");
write(fp, &fmt, sizeof(WAVEFORMATEX));
write(fp, &buf, hdr.dwBytesRecorded);
memset(&hdr, 0 , sizeof(WAVEHDR));
memset(&fmt, 0 , sizeof(WAVEFORMATEX));
}
}

Why am I getting blips when encoding a sound file using Java JNA?

I have implemented a hello world libavcodec using JNA to generate a wav file containing a pure 440Hz sine wave. But when I actually run the program the wav file contains annoying clicks and blips (compare to pure sin wav created from the C program). How am I calling avcodec_encode_audio2 wrong?
Here is my Java code. All the sources are also at github in case you want to try to compile it.
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
import java.util.Objects;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.LineUnavailableException;
import javax.sound.sampled.TargetDataLine;
public class Sin {
/**
* Abstract class that allows you to put the initialization and cleanup
* code at the same place instead of separated by the big try block.
*/
public static abstract class SharedPtr<T> implements AutoCloseable {
public T ptr;
public SharedPtr(T ptr) {
this.ptr = ptr;
}
/**
* Abstract override forces method to throw no checked exceptions.
* Subclasses will call a C function that throws no exceptions.
*/
#Override public abstract void close();
}
/**
* #param args
* #throws IOException
* #throws LineUnavailableException
*/
public static void main(String[] args) throws IOException, LineUnavailableException {
final AvcodecLibrary avcodec = AvcodecLibrary.INSTANCE;
final AvformatLibrary avformat = AvformatLibrary.INSTANCE;
final AvutilLibrary avutil = AvutilLibrary.INSTANCE;
avcodec.avcodec_register_all();
avformat.av_register_all();
AVOutputFormat.ByReference format = null;
String format_name = "wav", file_url = "file:sinjava.wav";
for (AVOutputFormat.ByReference formatIter = avformat.av_oformat_next(null); formatIter != null; formatIter = avformat.av_oformat_next(formatIter)) {
formatIter.setAutoWrite(false);
String iterName = formatIter.name;
if (format_name.equals(iterName)) {
format = formatIter;
break;
}
}
Objects.requireNonNull(format);
System.out.format("Found format %s%n", format_name);
AVCodec codec = avcodec.avcodec_find_encoder(format.audio_codec); // one of AvcodecLibrary.CodecID
Objects.requireNonNull(codec);
codec.setAutoWrite(false);
try (
SharedPtr<AVFormatContext> fmtCtxPtr = new SharedPtr<AVFormatContext>(avformat.avformat_alloc_context()) {#Override public void close(){if (null!=ptr) avformat.avformat_free_context(ptr);}};
) {
AVFormatContext fmtCtx = Objects.requireNonNull(fmtCtxPtr.ptr);
fmtCtx.setAutoWrite(false);
fmtCtx.setAutoRead(false);
fmtCtx.oformat = format; fmtCtx.writeField("oformat");
AVStream st = avformat.avformat_new_stream(fmtCtx, codec);
if (null == st)
throw new IllegalStateException();
AVCodecContext c = st.codec;
if (null == c)
throw new IllegalStateException();
st.setAutoWrite(false);
fmtCtx.readField("nb_streams");
st.id = fmtCtx.nb_streams - 1; st.writeField("id");
assert st.id >= 0;
System.out.format("New stream: id=%d%n", st.id);
if (0 != (format.flags & AvformatLibrary.AVFMT_GLOBALHEADER)) {
c.flags |= AvcodecLibrary.CODEC_FLAG_GLOBAL_HEADER;
}
c.writeField("flags");
c.bit_rate = 64000; c.writeField("bit_rate");
int bestSampleRate;
if (null == codec.supported_samplerates) {
bestSampleRate = 44100;
} else {
bestSampleRate = 0;
for (int offset = 0, sample_rate = codec.supported_samplerates.getInt(offset); sample_rate != 0; codec.supported_samplerates.getInt(++offset)) {
bestSampleRate = Math.max(bestSampleRate, sample_rate);
}
assert bestSampleRate > 0;
}
c.sample_rate = bestSampleRate; c.writeField("sample_rate");
c.channel_layout = AvutilLibrary.AV_CH_LAYOUT_STEREO; c.writeField("channel_layout");
c.channels = avutil.av_get_channel_layout_nb_channels(c.channel_layout); c.writeField("channels");
assert 2 == c.channels;
c.sample_fmt = AvutilLibrary.AVSampleFormat.AV_SAMPLE_FMT_S16; c.writeField("sample_fmt");
c.time_base.num = 1;
c.time_base.den = bestSampleRate;
c.writeField("time_base");
c.setAutoWrite(false);
AudioFormat javaSoundFormat = new AudioFormat(bestSampleRate, Short.SIZE, c.channels, true, ByteOrder.nativeOrder() == ByteOrder.BIG_ENDIAN);
DataLine.Info javaDataLineInfo = new DataLine.Info(TargetDataLine.class, javaSoundFormat);
if (! AudioSystem.isLineSupported(javaDataLineInfo))
throw new IllegalStateException();
int err;
if ((err = avcodec.avcodec_open(c, codec)) < 0) {
throw new IllegalStateException();
}
assert c.channels != 0;
AVIOContext.ByReference[] ioCtxReference = new AVIOContext.ByReference[1];
if (0 != (err = avformat.avio_open(ioCtxReference, file_url, AvformatLibrary.AVIO_FLAG_WRITE))) {
throw new IllegalStateException("averror " + err);
}
try (
SharedPtr<AVIOContext.ByReference> ioCtxPtr = new SharedPtr<AVIOContext.ByReference>(ioCtxReference[0]) {#Override public void close(){if (null!=ptr) avutil.av_free(ptr.getPointer());}}
) {
AVIOContext.ByReference ioCtx = Objects.requireNonNull(ioCtxPtr.ptr);
fmtCtx.pb = ioCtx; fmtCtx.writeField("pb");
int averr = avformat.avformat_write_header(fmtCtx, null);
if (averr < 0) {
throw new IllegalStateException("" + averr);
}
st.read(); // it is modified by avformat_write_header
System.out.format("Wrote header. fmtCtx->nb_streams=%d, st->time_base=%d/%d; st->avg_frame_rate=%d/%d%n", fmtCtx.nb_streams, st.time_base.num, st.time_base.den, st.avg_frame_rate.num, st.avg_frame_rate.den);
avformat.avio_flush(ioCtx);
int frame_size = c.frame_size != 0 ? c.frame_size : 4096;
int expectedBufferSize = frame_size * c.channels * (Short.SIZE/8);
boolean supports_small_last_frame = c.frame_size == 0 ? true : 0 != (codec.capabilities & AvcodecLibrary.CODEC_CAP_SMALL_LAST_FRAME);
int bufferSize = avutil.av_samples_get_buffer_size((IntBuffer)null, c.channels, frame_size, c.sample_fmt, 1);
assert bufferSize == expectedBufferSize: String.format("expected %d; got %d", expectedBufferSize, bufferSize);
ByteBuffer samples = ByteBuffer.allocate(expectedBufferSize);
samples.order(ByteOrder.nativeOrder());
int audio_time = 0; // unit: (c.time_base) s = (1/c.sample_rate) s
int audio_sample_count = supports_small_last_frame ?
3 * c.sample_rate :
3 * c.sample_rate / frame_size * frame_size;
while (audio_time < audio_sample_count) {
int frame_audio_time = audio_time;
samples.clear();
int nb_samples_in_frame = 0;
// encode a single tone sound
for (; samples.hasRemaining() && audio_time < audio_sample_count; nb_samples_in_frame++, audio_time++) {
double x = 2*Math.PI*440/c.sample_rate * audio_time;
double y = 10000 * Math.sin(x);
samples.putShort((short) y);
samples.putShort((short) y);
}
samples.flip();
try (
SharedPtr<AVFrame> framePtr = new SharedPtr<AVFrame>(avcodec.avcodec_alloc_frame()) {#Override public void close() {if (null!=ptr) avutil.av_free(ptr.getPointer());}};
) {
AVFrame frame = Objects.requireNonNull(framePtr.ptr);
frame.setAutoRead(false); // will be an in param
frame.setAutoWrite(false);
frame.nb_samples = nb_samples_in_frame; frame.writeField("nb_samples"); // actually unused during encoding
// Presentation time, in AVStream.time_base units.
frame.pts = avutil.av_rescale_q(frame_audio_time, c.time_base, st.time_base); // i * codec_time_base / st_time_base
frame.writeField("pts");
assert c.channels > 0;
int bytesPerSample = avutil.av_get_bytes_per_sample(c.sample_fmt);
assert bytesPerSample > 0;
if (0 != (err = avcodec.avcodec_fill_audio_frame(frame, c.channels, c.sample_fmt, samples, samples.capacity(), 1))) {
throw new IllegalStateException(""+err);
}
AVPacket packet = new AVPacket(); // one of the few structs from ffmpeg with guaranteed size
avcodec.av_init_packet(packet);
packet.size = 0;
packet.data = null;
packet.stream_index = st.index; packet.writeField("stream_index");
// encode the samples
IntBuffer gotPacket = IntBuffer.allocate(1);
if (0 != (err = avcodec.avcodec_encode_audio2(c, packet, frame, gotPacket))) {
throw new IllegalStateException("" + err);
} else if (0 != gotPacket.get()) {
packet.read();
averr = avformat.av_write_frame(fmtCtx, packet);
if (averr < 0)
throw new IllegalStateException("" + averr);
}
System.out.format("encoded frame: codec time = %d; pts=%d = av_rescale_q(%d,%d/%d,%d/%d) (%.02fs) contains %d samples (%.02fs); got_packet=%d; packet.size=%d%n",
frame_audio_time,
frame.pts,
frame_audio_time, st.codec.time_base.num,st.codec.time_base.den,st.time_base.num,st.time_base.den,
1.*frame_audio_time/c.sample_rate, frame.nb_samples, 1.*frame.nb_samples/c.sample_rate, gotPacket.array()[0], packet.size);
}
}
if (0 != (err = avformat.av_write_trailer(fmtCtx))) {
throw new IllegalStateException();
}
avformat.avio_flush(ioCtx);
}
}
System.out.println("Done writing");
}
}
I also rewrote it in C, and the C version works fine without any blips. But I can’t figure out how I am using the library differently; all the library function calls should be identical!
//! gcc --std=c99 sin.c $(pkg-config --cflags --libs libavutil libavformat libavcodec) -o sin
// sudo apt-get install libswscale-dev
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <libavutil/opt.h>
#include <libavutil/mathematics.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavcodec/avcodec.h>
int main(int argc, char *argv[]) {
const char *format_name = "wav", *file_url = "file:sin.wav";
avcodec_register_all();
av_register_all();
AVOutputFormat *format = NULL;
for (AVOutputFormat *formatIter = av_oformat_next(NULL); formatIter != NULL; formatIter = av_oformat_next(formatIter)) {
int hasEncoder = NULL != avcodec_find_encoder(formatIter->audio_codec);
if (0 == strcmp(format_name, formatIter->name)) {
format = formatIter;
break;
}
}
printf("Found format %s\n", format->name);
AVCodec *codec = avcodec_find_encoder(format->audio_codec);
if (! codec) {
fprintf(stderr, "Could not find codec %d\n", format->audio_codec);
exit(1);
}
AVFormatContext *fmtCtx = avformat_alloc_context();
if (! fmtCtx) {
fprintf(stderr, "error allocating AVFormatContext\n");
exit(1);
}
fmtCtx->oformat = format;
AVStream *st = avformat_new_stream(fmtCtx, codec);
if (! st) {
fprintf(stderr, "error allocating AVStream\n");
exit(1);
}
if (fmtCtx->nb_streams != 1) {
fprintf(stderr, "avformat_new_stream should have incremented nb_streams, but it's still %d\n", fmtCtx->nb_streams);
exit(1);
}
AVCodecContext *c = st->codec;
if (! c) {
fprintf(stderr, "avformat_new_stream should have allocated a AVCodecContext for my stream\n");
exit(1);
}
st->id = fmtCtx->nb_streams - 1;
printf("Created stream %d\n", st->id);
if (0 != (format->flags & AVFMT_GLOBALHEADER)) {
c->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
c->bit_rate = 64000;
int bestSampleRate;
if (NULL == codec->supported_samplerates) {
bestSampleRate = 44100;
printf("Setting sample rate: %d\n", bestSampleRate);
} else {
bestSampleRate = 0;
for (const int *sample_rate_iter = codec->supported_samplerates; *sample_rate_iter != 0; sample_rate_iter++) {
if (*sample_rate_iter >= bestSampleRate)
bestSampleRate = *sample_rate_iter;
}
printf("Using best supported sample rate: %d\n", bestSampleRate);
}
c->sample_rate = bestSampleRate;
c->channel_layout = AV_CH_LAYOUT_STEREO;
c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
c->time_base.num = 1;
c->time_base.den = c->sample_rate;
if (c->channels != 2) {
fprintf(stderr, "av_get_channel_layout_nb_channels returned %d instead of 2\n", c->channels);
exit(1);
}
c->sample_fmt = AV_SAMPLE_FMT_S16;
int averr;
if ((averr = avcodec_open2(c, codec, NULL)) < 0) {
fprintf(stderr, "avcodec_open2 returned error %d\n", averr);
exit(1);
}
AVIOContext *ioCtx = NULL;
if (0 != (averr = avio_open(&ioCtx, file_url, AVIO_FLAG_WRITE))) {
fprintf(stderr, "avio_open returned error %d\n", averr);
exit(1);
}
if (ioCtx == NULL) {
fprintf(stderr, "AVIOContext should have been set by avio_open\n");
exit(1);
}
fmtCtx->pb = ioCtx;
if (0 != (averr = avformat_write_header(fmtCtx, NULL))) {
fprintf(stderr, "avformat_write_header returned error %d\n", averr);
exit(1);
}
printf("Wrote header. fmtCtx->nb_streams=%d, st->time_base=%d/%d; st->avg_frame_rate=%d/%d\n", fmtCtx->nb_streams, st->time_base.num, st->time_base.den, st->avg_frame_rate.num, st->avg_frame_rate.den);
int align = 1;
int sample_size = av_get_bytes_per_sample(c->sample_fmt);
if (sample_size != sizeof(int16_t)) {
fprintf(stderr, "expected sample size=%zu but got %d\n", sizeof(int16_t), sample_size);
exit(1);
}
int frame_size = c->frame_size != 0 ? c->frame_size : 4096;
int bufferSize = av_samples_get_buffer_size(NULL, c->channels, frame_size, c->sample_fmt, align);
int expectedBufferSize = frame_size * c->channels * sample_size;
int supports_small_last_frame = c->frame_size == 0 ? 1 : 0 != (codec->capabilities & CODEC_CAP_SMALL_LAST_FRAME);
if (bufferSize != expectedBufferSize) {
fprintf(stderr, "expected buffer size=%d but got %d\n", expectedBufferSize, bufferSize);
exit(1);
}
int16_t *samples = (int16_t*)malloc(bufferSize);
uint32_t audio_time = 0; // unit: (1/c->sample_rate) s
uint32_t audio_sample_count = supports_small_last_frame ?
3 * c->sample_rate :
3 * c->sample_rate / frame_size * frame_size;
while (audio_time < audio_sample_count) {
uint32_t frame_audio_time = audio_time; // unit: (1/c->sample_rate) s
AVFrame *frame = avcodec_alloc_frame();
if (frame == NULL) {
fprintf(stderr, "avcodec_alloc_frame failed\n");
exit(1);
}
for (uint32_t i = 0; i != frame_size && audio_time < audio_sample_count; i++, audio_time++) {
samples[2*i] = samples[2*i + 1] = 10000 * sin(2*M_PI*440/c->sample_rate * audio_time);
frame->nb_samples = i+1; // actually unused during encoding
}
// frame->format = c->sample_fmt; // unused during encoding
frame->pts = av_rescale_q(frame_audio_time, c->time_base, st->time_base);
if (0 != (averr = avcodec_fill_audio_frame(frame, c->channels, c->sample_fmt, (const uint8_t*)samples, bufferSize, align))) {
fprintf(stderr, "avcodec_fill_audio_frame returned error %d\n", averr);
exit(1);
}
AVPacket packet;
av_init_packet(&packet);
packet.data = NULL;
packet.size = 0;
int got_packet;
if (0 != (averr = avcodec_encode_audio2(c, &packet, frame, &got_packet))) {
fprintf(stderr, "avcodec_encode_audio2 returned error %d\n", averr);
exit(1);
}
if (got_packet) {
packet.stream_index = st->index;
if (0 < (averr = av_write_frame(fmtCtx, &packet))) {
fprintf(stderr, "av_write_frame returned error %d\n", averr);
exit(1);
} else if (averr == 1) {
// end of stream wanted.
}
}
printf("encoded frame: codec time = %u; format pts=%ld = av_rescale_q(%u,%d/%d,%d/%d) (%.02fs) contains %d samples (%.02fs); got_packet=%d; packet.size=%d\n",
frame_audio_time,
frame->pts,
frame_audio_time, c->time_base.num, c->time_base.den, st->time_base.num, st->time_base.den,
1.*frame_audio_time/c->sample_rate, frame->nb_samples, 1.*frame->nb_samples/c->sample_rate, got_packet, packet.size);
av_free(frame);
}
free(samples);
cleanupFile:
if (0 != (averr = av_write_trailer(fmtCtx))) {
fprintf(stderr, "av_write_trailer returned error %d\n", averr);
exit(1);
}
avio_flush(ioCtx);
avio_close(ioCtx);
avformat_free_context(fmtCtx);
}
The problem was that ByteBuffer.allocate(int) creates a buffer whose address is not stable across JNA function calls. Every time you call a native function, it copies the bytes into a temporary array just for that invocation. By contrast, ByteBuffer.allocateDirect(int) creates a buffer whose native pointers are stable. This is apparently a well-known pitfall of using ByteBuffer in JNA, but I didn’t notice it in the fine print of Using Pointers and Arrays.
So I just had to fix the samples creation to ByteBuffer samples = ByteBuffer.allocateDirect(expectedBufferSize);. The subsequent avcodec_fill_audio_frame call does not copy samples; it simply points the frame->data[0] to the uint8_t* address, so the samples array needs to have a stable address.
Without having done what you are doing, I suspect the garbage collector.
See How can I disable Java garbage collector? - it says you can't, so increase the memory.

The .mp4 video does not play, which is created from ffmpeg library (not command line)

I use ffmpeg library to encode frames to a .mp4 video. The program runs smoothly without error. But the output .mp4 video does not play. Properties of the file does not even show it is a video file, no any information of video stream.
The related code is:
const char* ouVideoFileName = "output.mp4";
AVCodecID ouCodec_id = CODEC_ID_H264;
But if I change it to:
const char* ouVideoFileName = "output.avi";
AVCodecID ouCodec_id = CODEC_ID_H264;
The .avi video plays correctly.
What's wrong with .mp4 video?
You will need to share more of your code to find exact issue. Normally once you specify the container format in output file name, you will need to use av_guess_format to get the output format. After that you can use av_find_encoder for suggest codec_id
You will need to do something like this
AVFormatContext *m_outformat = NULL;
AVOutputFormat *outfmt = NULL;
std::string outfile = "clip_out.mp4";
outfmt = av_guess_format(NULL,outfile.c_str(),NULL);
if(outfmt == NULL)
{
ret = -1;
return ret;
}
else
{
m_outformat = avformat_alloc_context();
if(m_outformat)
{
m_outformat->oformat = outfmt;
_snprintf(m_outformat->filename, sizeof(m_outformat->filename), "%s", outfile.c_str());
}
else
{
ret = -1;
return ret;
}
}
AVCodec *out_vid_codec,*out_aud_codec;
out_vid_codec = out_aud_codec = NULL;
if(outfmt->video_codec != AV_CODEC_ID_NONE && m_in_vid_strm != NULL)
{
out_vid_codec = avcodec_find_encoder(outfmt->video_codec);
if(NULL == out_vid_codec)
{
PRINT_MSG("Could Not Find Vid Encoder")
ret = -1;
return ret;
}
else
{
PRINT_MSG("Found Out Vid Encoder ")
m_out_vid_strm = avformat_new_stream(m_outformat, out_vid_codec);
if(NULL == m_out_vid_strm)
{
PRINT_MSG("Failed to Allocate Output Vid Strm ")
ret = -1;
return ret;
}
else
{
PRINT_MSG("Allocated Video Stream ")
if(avcodec_copy_context(m_out_vid_strm->codec, m_informat->streams[m_in_vid_strm_idx]->codec) != 0)
{
PRINT_MSG("Failed to Copy Context ")
ret = -1;
return ret;
}
else
{
m_out_vid_strm->sample_aspect_ratio.den = m_out_vid_strm->codec->sample_aspect_ratio.den;
m_out_vid_strm->sample_aspect_ratio.num = m_in_vid_strm->codec->sample_aspect_ratio.num;
PRINT_MSG("Copied Context ")
m_out_vid_strm->codec->codec_id = m_in_vid_strm->codec->codec_id;
m_out_vid_strm->codec->time_base.num = 1;
m_out_vid_strm->codec->time_base.den = m_fps*(m_in_vid_strm->codec->ticks_per_frame);
m_out_vid_strm->time_base.num = 1;
m_out_vid_strm->time_base.den = 1000;
m_out_vid_strm->r_frame_rate.num = m_fps;
m_out_vid_strm->r_frame_rate.den = 1;
m_out_vid_strm->avg_frame_rate.den = 1;
m_out_vid_strm->avg_frame_rate.num = m_fps;
m_out_vid_strm->duration = (m_out_end_time - m_out_start_time)*1000;
}
}
}
}
if(outfmt->audio_codec != AV_CODEC_ID_NONE && m_in_aud_strm != NULL)
{
out_aud_codec = avcodec_find_encoder(outfmt->audio_codec);
if(NULL == out_aud_codec)
{
PRINT_MSG("Could Not Find Out Aud Encoder ")
ret = -1;
return ret;
}
else
{
PRINT_MSG("Found Out Aud Encoder ")
m_out_aud_strm = avformat_new_stream(m_outformat, out_aud_codec);
if(NULL == m_out_aud_strm)
{
PRINT_MSG("Failed to Allocate Out Vid Strm ")
ret = -1;
return ret;
}
else
{
if(avcodec_copy_context(m_out_aud_strm->codec, m_informat->streams[m_in_aud_strm_idx]->codec) != 0)
{
PRINT_MSG("Failed to Copy Context ")
ret = -1;
return ret;
}
else
{
PRINT_MSG("Copied Context ")
m_out_aud_strm->codec->codec_id = m_in_aud_strm->codec->codec_id;
m_out_aud_strm->codec->codec_tag = 0;
m_out_aud_strm->pts = m_in_aud_strm->pts;
m_out_aud_strm->duration = m_in_aud_strm->duration;
m_out_aud_strm->time_base.num = m_in_aud_strm->time_base.num;
m_out_aud_strm->time_base.den = m_in_aud_strm->time_base.den;
}
}
}
}
if (!(outfmt->flags & AVFMT_NOFILE))
{
if (avio_open2(&m_outformat->pb, outfile.c_str(), AVIO_FLAG_WRITE,NULL, NULL) < 0)
{
PRINT_VAL("Could Not Open File ", outfile)
ret = -1;
return ret;
}
}
/* Write the stream header, if any. */
if (avformat_write_header(m_outformat, NULL) < 0)
{
PRINT_VAL("Error Occurred While Writing Header ", outfile)
ret = -1;
return ret;
}
else
{
PRINT_MSG("Written Output header ")
m_init_done = true;
}
Now you can start the encoding of frames
I previously followed the example of decoding_encoding.c in FFmpeg documentation.
Later on, I followed the example of muxing.c, now it works!

Resources