Transcoding audio using xuggler - ffmpeg

I am trying to convert an audio file with the header
Opening audio decoder: [pcm] Uncompressed PCM audio decoder
AUDIO: 44100 Hz, 2 ch, s16le, 1411.2 kbit/100.00% (ratio: 176400->176400)
Selected audio codec: [pcm] afm: pcm (Uncompressed PCM)
I want to transcode this file to mp3 format. I have following code snippet but its not working well. I have written it using XUGGLER code snippet for transcoding audio and video.
Audio decoder is
audioDecoder = IStreamCoder.make(IStreamCoder.Direction.DECODING, ICodec.findDecodingCodec(ICodec.ID.CODEC_ID_PCM_S16LE));
audioDecoder.setSampleRate(44100);
audioDecoder.setBitRate(176400);
audioDecoder.setChannels(2);
audioDecoder.setTimeBase(IRational.make(1,1000));
if (audioDecoder.open(IMetaData.make(), IMetaData.make()) < 0)
return false;
return true;
Audio encoder is
outContainer = IContainer.make();
outContainerFormat = IContainerFormat.make();
outContainerFormat.setOutputFormat("mp3", urlOut, null);
int retVal = outContainer.open(urlOut, IContainer.Type.WRITE, outContainerFormat);
if (retVal < 0) {
System.out.println("Could not open output container");
return false;
}
outAudioCoder = IStreamCoder.make(IStreamCoder.Direction.ENCODING, ICodec.findEncodingCodec(ICodec.ID.CODEC_ID_MP3));
outAudioStream = outContainer.addNewStream(outAudioCoder);
outAudioCoder.setSampleRate(new Integer(44100));
outAudioCoder.setChannels(2);
retVal = outAudioCoder.open(IMetaData.make(), IMetaData.make());
if (retVal < 0) {
System.out.println("Could not open audio coder");
return false;
}
retVal = outContainer.writeHeader();
if (retVal < 0) {
System.out.println("Could not write output FLV header: ");
return false;
}
return true;
And here is encode method where i send packets of 32 byte to transcode
public void encode(byte[] audioFrame){
//duration of 1 video frame
long lastVideoPts = 0;
IPacket packet_out = IPacket.make();
int lastPos = 0;
int lastPos_out = 0;
IAudioSamples audioSamples = IAudioSamples.make(48000, audioDecoder.getChannels());
IAudioSamples audioSamples_resampled = IAudioSamples.make(48000, audioDecoder.getChannels());
//we always have 32 bytes/sample
int pos = 0;
int audioFrameLength = audioFrame.length;
int audioFrameCnt = 1;
iBuffer = IBuffer.make(null, audioFrame, 0, audioFrameLength);
IPacket packet = IPacket.make(iBuffer);
//packet.setKeyPacket(true);
packet.setTimeBase(IRational.make(1,1000));
packet.setDuration(20);
packet.setDts(audioFrameCnt*20);
packet.setPts(audioFrameCnt*20);
packet.setStreamIndex(1);
packet.setPosition(lastPos);
lastPos+=audioFrameLength;
int pksz = packet.getSize();
packet.setComplete(true, pksz);
/*
* A packet can actually contain multiple samples
*/
int offset = 0;
int retVal;
while(offset < packet.getSize())
{
int bytesDecoded = audioDecoder.decodeAudio(audioSamples, packet, offset);
if (bytesDecoded < 0)
throw new RuntimeException("got error decoding audio ");
offset += bytesDecoded;
if (audioSamples.isComplete())
{
int samplesConsumed = 0;
while (samplesConsumed < audioSamples.getNumSamples()) {
retVal = outAudioCoder.encodeAudio(packet_out, audioSamples, samplesConsumed);
if (retVal <= 0)
throw new RuntimeException("Could not encode audio");
samplesConsumed += retVal;
if (packet_out.isComplete()) {
packet_out.setPosition(lastPos_out);
packet_out.setStreamIndex(1);
lastPos_out+=packet_out.getSize();
retVal = outContainer.writePacket(packet_out);
if(retVal < 0){
throw new RuntimeException("Could not write data packet");
}
}
}
}
}
}
I get an output file but it doesnt get played. I have very little experience of audio encoding and sampling. Thanks in advance.

Finally I am able to live transcode a pcm stream to mp3 stream.
There were couple of issues in the code :
I was trying to transcode only one thing i.e audio and the code snippet was transcoding audio as well as video so there was an issue in setting stream index.
packet_out.setStreamIndex(1); packet_out.setStreamIndex(0)
Second thing was calculations ffmpeg guid
channel * bits * sampling rate = bit rate
This thing was miscalculated at my end.
Number of samples in audio samples depends upon your sampling rate. That was wrong in my code.
NOTE : this is a pretty old code
byte[] data = new byte[418];
public void encode(byte[] audioFrame) {
IPacket packet_out = IPacket.make();
int lastPos_out = 0;
IAudioSamples audioSamples = IAudioSamples.make(11025, audioDecoder.getChannels());
//IAudioSamples audioSamples_resampled = IAudioSamples.make(48000, audioDecoder.getChannels());
//we always have 32 bytes/sample
int pos = 0;
int audioFrameLength = audioFrame.length;
int audioFrameCnt = 1;
iBuffer = IBuffer.make(null, audioFrame, 0, audioFrameLength);
IPacket packet = IPacket.make(iBuffer);
//packet.setKeyPacket(true);
packet.setTimeBase(IRational.make(1, 1000));
packet.setStreamIndex(0);
int pksz = packet.getSize();
packet.setComplete(true, pksz);
/*
* A packet can actually contain multiple samples
*/
int offset = 0;
int retVal;
while (offset < packet.getSize()) {
int bytesDecoded = audioDecoder.decodeAudio(audioSamples, packet, offset);
if (bytesDecoded < 0)
throw new RuntimeException("got error decoding audio ");
offset += bytesDecoded;
if (audioSamples.isComplete()) {
/*audioResampler.resample(audioSamples_resampled, audioSamples, audioSamples.getNumSamples());
audioSamples_resampled.setPts(Global.NO_PTS);*/
int samplesConsumed = 0;
while (samplesConsumed < audioSamples.getNumSamples()) {
retVal = outAudioCoder.encodeAudio(packet_out, audioSamples, samplesConsumed);
if (retVal <= 0)
throw new RuntimeException("Could not encode audio");
samplesConsumed += retVal;
if (packet_out.isComplete()) {
packet_out.setPosition(lastPos_out);
packet_out.setStreamIndex(0);
lastPos_out += packet_out.getSize();
System.out.println("size" + packet_out.getSize());
packet_out.getByteBuffer().get(data,0,packet_out.getSize());
try {
fo.write(data);
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
packet_out.reset();
//retVal = outContainer.writePacket(packet_out);
if (retVal < 0) {
throw new RuntimeException("Could not write data packet");
}
}
}
}
}
}

Related

Request for ffmpeg raw data to mp4 container example

I have a binary file with raw h264 data which is arranged like that
NAL(SPS), NAL(PPS), NAL(Frame), NAL(SPS), NAL(PPS)....
and i want to mux it (without encode) into a mp4 container.
The muxing.c in the ffmpeg example do the encoding of yuv data, but it is different from my case, and i have no ideas how to change the example to do what i want to do...
I knew the commaned ffmpeg -i h264file -c copy h264.mp4 can do what i want to do, but i have to do it in my program, so i need to know how to use the ffmpeg api to do the same thing, but so far, i cannot find any simple example to do it. Is there anyone has hint on how to do it?? Thanks
Updated, i have write the code as below from the reference, it seems can create the mp4 but the time is not correct, it lost the frame rate information and the time information, it play very fast.
av_register_all();
int ret;
AVDictionary *opt = NULL;
//bool is264 = true;
const char * inputFileName = "input.264";
const char * outputFileName = "output.mp4";
AVFormatContext *ic = avformat_alloc_context();
if((ret = avformat_open_input(&ic, inputFileName, NULL, NULL)) < 0)
return -1;//
// Get format info (retrieve stream information)
if ((ret = avformat_find_stream_info(ic, NULL)) < 0)
return ret; // Couldn't find stream information
for (int i = 0; i < ic->nb_streams; i++)
{
AVStream *stream;
AVCodecContext *codec_ctx;
stream = ic->streams[i];
codec_ctx = stream->codec;
/* Reencode video & audio and remux subtitles etc. */
if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO
|| codec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
/* Open decoder */
ret = avcodec_open2(codec_ctx, avcodec_find_decoder(codec_ctx->codec_id), NULL);
if (ret < 0) {
//av_log(NULL, AV_LOG_ERROR, "Failed to open decoder for stream #%u\n", i);
return ret;
}
}
}
// Dump information about file onto standard error
av_dump_format(ic, 0, inputFileName, 0);
AVFormatContext *oc;
avformat_alloc_output_context2(&oc, NULL, NULL, outputFileName);
if (!oc) {
//printf("Could not deduce output format from file extension: using MPEG.\n");
//avformat_alloc_output_context2(&oc, NULL, "mpeg", outputFileName);
return -1;
}
AVStream *ist = ic->streams[0];
AVCodec *out_vid_codec = avcodec_find_encoder(oc->oformat->video_codec);
if (NULL == out_vid_codec)
return -1; // Couldn't find video encoder
AVStream *out_vid_strm = avformat_new_stream(oc, out_vid_codec);
if (NULL == out_vid_strm)
return -1; // Couldn't output video stream
ret = avcodec_copy_context(out_vid_strm->codec, ist->codec);
if (ret < 0)
return ret; // Failed to copy context
ret = avio_open(&oc->pb, outputFileName, AVIO_FLAG_WRITE);
ret = avformat_write_header(oc, NULL);
AVPacket pkt;
while(1)
{
AVStream *in_stream, *out_stream;
ret = av_read_frame(ic, &pkt);
if (ret < 0)
break;
pkt.stream_index = 0;
in_stream = ic->streams[pkt.stream_index];
out_stream = oc->streams[pkt.stream_index];
pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
//log_packet(ofmt_ctx, &pkt, "out");
ret = av_interleaved_write_frame(oc, &pkt);
if (ret < 0) {
fprintf(stderr, "Error muxing packet\n");
break;
}
av_packet_unref(&pkt);
}
av_write_trailer(oc);

record and play file using wrapper ffmpeg

I am using C# wrapper for ffmpeg from ffmpeg
I want record rtsp stream and play it but I can not decoder frame from file
using this code I write file test.avi
unsafe
{
AVFormatContext* context = FFmpegInvoke.avformat_alloc_context();
int video_stream_index=0;
FFmpegInvoke.av_register_all();
FFmpegInvoke.avcodec_register_all();
FFmpegInvoke.avformat_network_init();
//open rtsp
if (FFmpegInvoke.avformat_open_input(&context, "rtsp://admin:admin#192.168.0.71:554", null, null) != 0)
{
return ;
}
if (FFmpegInvoke.avformat_find_stream_info(context, null) < 0)
{
return ;
}
//search video stream
for (int i = 0; i < context->nb_streams; i++)
{
if (context->streams[i]->codec->codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO)
video_stream_index = i;
}
AVPacket packet;
FFmpegInvoke.av_init_packet(&packet);
//open output file
AVOutputFormat* fmt = FFmpegInvoke.av_guess_format("h264", null, null);
AVFormatContext* oc = FFmpegInvoke.avformat_alloc_context();
oc->oformat = fmt;
FFmpegInvoke.avio_open2(&oc->pb, "test.mkv", FFmpegInvoke.AVIO_FLAG_WRITE, null, null);
AVStream* stream = null;
int cnt = 0;
//start reading packets from stream and write them to file
/// FFmpegInvoke.av_read_play(context);//play RTSP
while (FFmpegInvoke.av_read_frame(context, &packet) >= 0 && cnt < 1000)
{//read 100 frames
if (packet.stream_index == video_stream_index)
{//packet is video
if (stream == null)
{//create stream in file
stream = FFmpegInvoke.avformat_new_stream(oc, context->streams[video_stream_index]->codec->codec);
FFmpegInvoke.avcodec_copy_context(stream->codec, context->streams[video_stream_index]->codec);
stream->sample_aspect_ratio = context->streams[video_stream_index]->codec->sample_aspect_ratio;
FFmpegInvoke.avformat_write_header(oc, null);
}
packet.stream_index = stream->id;
var p1 = new FileInfo("test.mkv").Length;
FFmpegInvoke.av_write_frame(oc, &packet);
cnt++;
}
FFmpegInvoke.av_free_packet(&packet);
FFmpegInvoke.av_init_packet(&packet);
}
FFmpegInvoke.av_read_pause(context);
FFmpegInvoke.av_write_trailer(oc);
FFmpegInvoke.avio_close(oc->pb);
FFmpegInvoke.avformat_free_context(oc);
}
and using this code I want to play me file
unsafe{
string url = "test.mkv";
FFmpegInvoke.av_register_all();
FFmpegInvoke.avcodec_register_all();
FFmpegInvoke.avformat_network_init();
AVFormatContext* pFormatContext = FFmpegInvoke.avformat_alloc_context();
if (FFmpegInvoke.avformat_open_input(&pFormatContext, url, null, null) != 0)
throw new Exception("Could not open file");
if (FFmpegInvoke.avformat_find_stream_info(pFormatContext, null) != 0)
throw new Exception("Could not find stream info");
AVStream* pStream = null;
for (int i = 0; i < pFormatContext->nb_streams; i++)
{
if (pFormatContext->streams[i]->codec->codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO)
{
pStream = pFormatContext->streams[i];
break;
}
}
var packet = new AVPacket();
AVPacket* pPacket = &packet;
FFmpegInvoke.av_init_packet(pPacket);
AVCodecContext codecContext = *(pStream->codec);
int width = codecContext.width;
int height = codecContext.height;
AVPixelFormat sourcePixFmt = codecContext.pix_fmt;
AVCodecID codecId = codecContext.codec_id;
var convertToPixFmt = AVPixelFormat.PIX_FMT_BGR24;
SwsContext* pConvertContext = FFmpegInvoke.sws_getContext(width, height, sourcePixFmt,
width, height, convertToPixFmt,
FFmpegInvoke.SWS_FAST_BILINEAR, null, null, null);
if (pConvertContext == null)
throw new Exception("Could not initialize the conversion context");
var pConvertedFrame = (AVPicture*)FFmpegInvoke.avcodec_alloc_frame();
int convertedFrameBufferSize = FFmpegInvoke.avpicture_get_size(convertToPixFmt, width, height);
var pConvertedFrameBuffer = (byte*)FFmpegInvoke.av_malloc((uint)convertedFrameBufferSize);
FFmpegInvoke.avpicture_fill(pConvertedFrame, pConvertedFrameBuffer, convertToPixFmt, width, height);
AVCodec* pCodec = FFmpegInvoke.avcodec_find_decoder(codecId);
if (pCodec == null)
throw new Exception("Unsupported codec");
// Reusing codec context from stream info,
// as an alternative way it could look like this: (but it works not for all kind of codecs)
// AVCodecContext* pCodecContext = FFmpegInvoke.avcodec_alloc_context3(pCodec);
AVCodecContext* pCodecContext = &codecContext;
if ((pCodec->capabilities & FFmpegInvoke.CODEC_CAP_TRUNCATED) == FFmpegInvoke.CODEC_CAP_TRUNCATED)
pCodecContext->flags |= FFmpegInvoke.CODEC_FLAG_TRUNCATED;
AVFrame* pDecodedFrame = FFmpegInvoke.avcodec_alloc_frame();
if (FFmpegInvoke.av_read_frame(pFormatContext, pPacket) < 0)
throw new System.IO.EndOfStreamException();
int gotPicture = 0;
int size = FFmpegInvoke.avcodec_decode_video2(pCodecContext, pDecodedFrame, &gotPicture, pPacket);
if (size < 0)
throw new Exception(string.Format("Error while decoding frame "));
if (gotPicture == 1)
{
}
size =-22.Why? what is wrong?How play my file using ffmpeg?

Muxing with libav

I have a program which is supposed to demux input mpeg-ts, transcode the mpeg2 into h264 and then mux the audio alongside the transcoded video. When I open the resulting muxed file with VLC I neither get audio nor video. Here is the relevant code.
My main worker loop is as follows:
void
*writer_thread(void *thread_ctx) {
struct transcoder_ctx_t *ctx = (struct transcoder_ctx_t *) thread_ctx;
AVStream *video_stream = NULL, *audio_stream = NULL;
AVFormatContext *output_context = init_output_context(ctx, &video_stream, &audio_stream);
struct mux_state_t mux_state = {0};
//from omxtx
mux_state.pts_offset = av_rescale_q(ctx->input_context->start_time, AV_TIME_BASE_Q, output_context->streams[ctx->video_stream_index]->time_base);
//write stream header if any
avformat_write_header(output_context, NULL);
//do not start doing anything until we get an encoded packet
pthread_mutex_lock(&ctx->pipeline.video_encode.is_running_mutex);
while (!ctx->pipeline.video_encode.is_running) {
pthread_cond_wait(&ctx->pipeline.video_encode.is_running_cv, &ctx->pipeline.video_encode.is_running_mutex);
}
while (!ctx->pipeline.video_encode.eos || !ctx->processed_audio_queue->queue_finished) {
//FIXME a memory barrier is required here so that we don't race
//on above variables
//fill a buffer with video data
OERR(OMX_FillThisBuffer(ctx->pipeline.video_encode.h, omx_get_next_output_buffer(&ctx->pipeline.video_encode)));
write_audio_frame(output_context, audio_stream, ctx); //write full audio frame
//FIXME no guarantee that we have a full frame per packet?
write_video_frame(output_context, video_stream, ctx, &mux_state); //write full video frame
//encoded_video_queue is being filled by the previous command
}
av_write_trailer(output_context);
//free all the resources
avcodec_close(video_stream->codec);
avcodec_close(audio_stream->codec);
/* Free the streams. */
for (int i = 0; i < output_context->nb_streams; i++) {
av_freep(&output_context->streams[i]->codec);
av_freep(&output_context->streams[i]);
}
if (!(output_context->oformat->flags & AVFMT_NOFILE)) {
/* Close the output file. */
avio_close(output_context->pb);
}
/* free the stream */
av_free(output_context);
free(mux_state.pps);
free(mux_state.sps);
}
The code for initialising libav output context is this:
static
AVFormatContext *
init_output_context(const struct transcoder_ctx_t *ctx, AVStream **video_stream, AVStream **audio_stream) {
AVFormatContext *oc;
AVOutputFormat *fmt;
AVStream *input_stream, *output_stream;
AVCodec *c;
AVCodecContext *cc;
int audio_copied = 0; //copy just 1 stream
fmt = av_guess_format("mpegts", NULL, NULL);
if (!fmt) {
fprintf(stderr, "[DEBUG] Error guessing format, dying\n");
exit(199);
}
oc = avformat_alloc_context();
if (!oc) {
fprintf(stderr, "[DEBUG] Error allocating context, dying\n");
exit(200);
}
oc->oformat = fmt;
snprintf(oc->filename, sizeof(oc->filename), "%s", ctx->output_filename);
oc->debug = 1;
oc->start_time_realtime = ctx->input_context->start_time;
oc->start_time = ctx->input_context->start_time;
oc->duration = 0;
oc->bit_rate = 0;
for (int i = 0; i < ctx->input_context->nb_streams; i++) {
input_stream = ctx->input_context->streams[i];
output_stream = NULL;
if (input_stream->index == ctx->video_stream_index) {
//copy stuff from input video index
c = avcodec_find_encoder(CODEC_ID_H264);
output_stream = avformat_new_stream(oc, c);
*video_stream = output_stream;
cc = output_stream->codec;
cc->width = input_stream->codec->width;
cc->height = input_stream->codec->height;
cc->codec_id = CODEC_ID_H264;
cc->codec_type = AVMEDIA_TYPE_VIDEO;
cc->bit_rate = ENCODED_BITRATE;
cc->time_base = input_stream->codec->time_base;
output_stream->avg_frame_rate = input_stream->avg_frame_rate;
output_stream->r_frame_rate = input_stream->r_frame_rate;
output_stream->start_time = AV_NOPTS_VALUE;
} else if ((input_stream->codec->codec_type == AVMEDIA_TYPE_AUDIO) && !audio_copied) {
/* i care only about audio */
c = avcodec_find_encoder(input_stream->codec->codec_id);
output_stream = avformat_new_stream(oc, c);
*audio_stream = output_stream;
avcodec_copy_context(output_stream->codec, input_stream->codec);
/* Apparently fixes a crash on .mkvs with attachments: */
av_dict_copy(&output_stream->metadata, input_stream->metadata, 0);
/* Reset the codec tag so as not to cause problems with output format */
output_stream->codec->codec_tag = 0;
audio_copied = 1;
}
}
for (int i = 0; i < oc->nb_streams; i++) {
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
oc->streams[i]->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (oc->streams[i]->codec->sample_rate == 0)
oc->streams[i]->codec->sample_rate = 48000; /* ish */
}
if (!(fmt->flags & AVFMT_NOFILE)) {
fprintf(stderr, "[DEBUG] AVFMT_NOFILE set, allocating output container\n");
if (avio_open(&oc->pb, ctx->output_filename, AVIO_FLAG_WRITE) < 0) {
fprintf(stderr, "[DEBUG] error creating the output context\n");
exit(1);
}
}
return oc;
}
Finally this is the code for writing audio:
static
void
write_audio_frame(AVFormatContext *oc, AVStream *st, struct transcoder_ctx_t *ctx) {
AVPacket pkt = {0}; // data and size must be 0;
struct packet_t *source_audio;
av_init_packet(&pkt);
if (!(source_audio = packet_queue_get_next_item_asynch(ctx->processed_audio_queue))) {
return;
}
pkt.stream_index = st->index;
pkt.size = source_audio->data_length;
pkt.data = source_audio->data;
pkt.pts = source_audio->PTS;
pkt.dts = source_audio->DTS;
pkt.duration = source_audio->duration;
pkt.destruct = avpacket_destruct;
/* Write the compressed frame to the media file. */
if (av_interleaved_write_frame(oc, &pkt) != 0) {
fprintf(stderr, "[DEBUG] Error while writing audio frame\n");
}
packet_queue_free_packet(source_audio, 0);
}
A resulting mpeg4 file can be obtained from here: http://87.120.131.41/dl/mpeg4.h264
I have ommited the write_video_frame code since it is a lot more complicated and I might be making something wrong there as I'm doing timebase conversation etc. For audio however I'm doing 1:1 copy. Each packet_t packet contains data from av_read_frame from the input mpegts container. In the worst case I'd expect that my audio is working and not my video. However I cannot get either of those to work. Seems the documentation is rather vague on making things like that - I've tried both libav and ffmpeg irc channels to no avail. Any information regarding how I can debug the issue will be greatly appreciated.
When different containers yield different results in libav it is almost always a timebase issue. All containers have a time_base that they like, and some will accept custom values... sometimes.
You must rescale the time base before putting it in the container. Generally tinkering with the mux state struct isn't something you want to do and I think what you did there doesn't do what you think. Try printing out all of the timebases to find out what they are.
Each frame you must recalculate PTS at least. If you do it before you call encode the encoder will produce the proper DTS. Do the same for the audio, but generally set the DTS it to AV_NO_PTS and sometimes you can get away with setting the audio PTS to that as well. To rescale easily use the av_rescale(...) functions.
Be careful assuming that you have MPEG-2 data in a MPEG-TS container, that is not always true.

Audio encoding using avcodec_fill_audio_frame() and memory leaks

As a part of encoding decoded audio packets, I'm using avcodec_fill_audio_frame(). I'm passing allocated AVFrame pointer to along with buffer containing the decoded samples and other parameters number of channels, sample format, buffer size. Though the encoding is working fine I'm not able to completely eliminate the memory leaks. I've taken care of most of things but still I'm not able detect the leakage.
Below is the function which I'm using for encoding. Please suggest something.
AudioSample contains decoded data and it is completely managed in different class(free in class destructor). I'm freeing the AVFrame in FFmpegEncoder destructor and AVPacket is freed every time using av_free_packet() with av_packet_destruct enabled. What more do I need to free?
void FfmpegEncoder::WriteAudioSample(AudioSample *audS)
{
int num_audio_frame = 0;
AVCodecContext *c = NULL;
// AVFrame *frame;
AVPacket pkt;
av_init_packet(&pkt);
pkt.destruct = av_destruct_packet;
pkt.data = NULL;
pkt.size = 0;
int ret = 0, got_packet = 0;
c = m_out_aud_strm->codec;
static int64_t aud_pts_in = -1;
if((audS != NULL) && (audS->GetSampleLength() > 0) )
{
int byte_per_sample = av_get_bytes_per_sample(c->sample_fmt);
PRINT_VAL("Byte Per Sample ", byte_per_sample)
m_frame->nb_samples = (audS->GetSampleLength())/(c->channels*av_get_bytes_per_sample(c->sample_fmt));
if(m_frame->nb_samples == c->frame_size)
{
#if 1
if(m_need_resample && (c->channels >= 2))
{
uint8_t * t_buff1 = new uint8_t[audS->GetSampleLength()];
if(t_buff1 != NULL)
{
for(int64_t i = 0; i< m_frame->nb_samples; i++)
{
memcpy(t_buff1 + i*byte_per_sample, (uint8_t*)((uint8_t*)audS->GetAudioSampleData() + i*byte_per_sample*c->channels), byte_per_sample);
memcpy(t_buff1 + (audS->GetSampleLength())/2 + i*byte_per_sample, (uint8_t*)((uint8_t*)audS->GetAudioSampleData() + i*byte_per_sample*c->channels+ byte_per_sample), byte_per_sample);
}
audS->FillAudioSample(t_buff1, audS->GetSampleLength());
delete[] t_buff1;
}
}
#endif
ret = avcodec_fill_audio_frame(m_frame, c->channels, c->sample_fmt, (uint8_t*)audS->GetAudioSampleData(),m_frame->nb_samples*byte_per_sample*c->channels, 0);
//ret = avcodec_fill_audio_frame(&frame, c->channels, c->sample_fmt, t_buff,frame.nb_samples*byte_per_sample*c->channels, 0);
if(ret != 0)
{
PRINT_MSG("Avcodec Fill Audio Failed ")
}
else
{
got_packet = 0;
ret = avcodec_encode_audio2(c, &pkt, m_frame, &got_packet);
if(ret < 0 || got_packet == 0)
{
PRINT_MSG("failed to encode audio ")
}
else
{
PRINT_MSG("Audio Packet Encoded ");
aud_pts_in++;
pkt.pts = aud_pts_in;
pkt.dts = pkt.pts;
pkt.stream_index = m_out_aud_strm->index;
ret = av_interleaved_write_frame(oc, &pkt);
if(ret != 0)
{
PRINT_MSG("Error Write Audio PKT ")
}
else
{
PRINT_MSG("Audio PKT Writen ")
}
}
}
}
avcodec_flush_buffers(c);
// avcodec_free_frame(&frame);
}
av_free_packet(&pkt);
}
Thanks,
Pradeep
//================== SEND AUDIO OUTPUT =======================
void AVOutputStream::sendAudioOutput (AVFrame* inputFrame)
{
AVCodecContext *codecCtx = pOutputAudioStream->codec;
// set source data variables
sourceNumberOfChannels = inputFrame->channels;
sourceChannelLayout = inputFrame->channel_layout;
sourceSampleRate = inputFrame->sample_rate;
_sourceSampleFormat = (AVSampleFormat)inputFrame->format;
sourceNumberOfSamples = inputFrame->nb_samples;
// set destination data variables
destinationNumberOfChannels = codecCtx->channels;
destinationChannelLayout = codecCtx->channel_layout;
destinationSampleRate = codecCtx->sample_rate;
destinationSampleFormat = codecCtx->sample_fmt;//AV_SAMPLE_FMT_FLTP;//EncodecCtx->sample_fmt;
destinationLineSize = 0;
destinationData = NULL;
int returnVal = 0;
if (startDecode == false)
{
startDecode = true;
resamplerCtx = swr_alloc_set_opts(NULL,
destinationChannelLayout,
destinationSampleFormat,
destinationSampleRate,
sourceChannelLayout,
_sourceSampleFormat,
sourceSampleRate,
0,
NULL);
if (resamplerCtx == NULL)
{
std::cout << "Unable to create the resampler context for the audio frame";
isConnected = false;
}
// initialize the resampling context
returnVal = swr_init(resamplerCtx);
if (returnVal < 0)
{
std::cout << "Unable to init the resampler context, error:";
isConnected = false;
}
} //if (startDecode == false)
if (sourceSampleRate != 0)
destinationNumberOfSamples = destinationSampleRate/sourceSampleRate * sourceNumberOfSamples;
// allocate the destination samples buffer
returnVal = av_samples_alloc_array_and_samples(&destinationData,
&destinationLineSize,
destinationNumberOfChannels,
destinationNumberOfSamples,
destinationSampleFormat,
0);
if (returnVal < 0)
{
std::cout << "Unable to allocate destination samples, error";
isConnected = false;
}
// convert to destination format
returnVal = swr_convert(resamplerCtx,
destinationData,
destinationNumberOfSamples,
(const uint8_t **)inputFrame->data, //sourceData,
sourceNumberOfSamples);
if (returnVal < 0)
{
std::cout << "Resampling failed, error \n";
isConnected = false;
}
int bufferSize = av_samples_get_buffer_size(&destinationLineSize,
destinationNumberOfChannels,
destinationNumberOfSamples,
destinationSampleFormat,
0);
//whithout fifo
pOutputAudioFrame = av_frame_alloc();
pOutputAudioFrame->nb_samples = codecCtx->frame_size;//frameNumberOfSamples;
pOutputAudioFrame->format = codecCtx->sample_fmt;
pOutputAudioFrame->channel_layout = codecCtx->channel_layout;
pOutputAudioFrame->channels = codecCtx->channels;
pOutputAudioFrame->sample_rate = codecCtx->sample_rate;
returnVal = avcodec_fill_audio_frame(pOutputAudioFrame,
pOutputAudioFrame->channels,
(AVSampleFormat)pOutputAudioFrame->format,
(const uint8_t *)destinationData[0],
bufferSize,0);
pOutputAudioFrame->pts = inputFrame->pts;
if (returnVal < 0)
{
std::cout << "Unable to fill the audio frame wsampleIndexith captured audio data,error";
isConnected = false;
}
// encode the audio frame, fill a packet for streaming
av_init_packet(&outAudioPacket);
outAudioPacket.data = NULL;
outAudioPacket.size = 0;
outAudioPacket.dts = outAudioPacket.pts = 0;
int gotPacket;
// encoding
returnVal = avcodec_encode_audio2(codecCtx, &outAudioPacket, pOutputAudioFrame, &gotPacket);
// free buffers
av_freep(&destinationData[0]);
av_freep(&destinationData);
av_frame_free(&pOutputAudioFrame);
if (gotPacket)
{
outAudioPacket.stream_index = pOutputAudioStream->index;
outAudioPacket.flags |= AV_PKT_FLAG_KEY;
returnVal = av_interleaved_write_frame(pOutputFormatCtx, &outAudioPacket);
//returnVal = av_write_frame(pOutputFormatCtx, &outAudioPacket);
if (returnVal != 0)
{
std::cout << "Cannot write audio packet \n";
isConnected = false;
}
av_free_packet(&outAudioPacket);
} // if (gotPacket)
}
You can see after resample i free used buffers.
// free buffers
av_freep(&destinationData[0]);
av_freep(&destinationData);

x264 & libavcodec

After some considerable amount of time while trying to build the ffmpeg static library with the x264 encoder on Windows, I have spent some more time for writing some example with it.
Of course, there are tons of "instructions" on how to build, how to use, bla bla... But, non of them works on Windows. I guess the Linux guys are in better position here. Now, the zillion dollars question is "What's the purpose of all that?". Not only that this is useless on Windows, but I could have bought some third party library that actually works.
If somebody is about to say "But, it works!". I must say, give me a working proof. I don't care about 200x100 at 10fps. I don't need H264 for that. Show me how to compress a single second of 1080i footage. It's H264, it's crossplatform (sounds funny if you ask me), Google is using it (it has to be perfect, right?), some more hipe here...
Firstly don't try and build on windows - especially if you use VS - get it from here
Then the sequence is something like:
// in ctor
ffmpeg::avcodec_register_all();
ffmpeg::avcodec_init();
ffmpeg::av_register_all();
bool createFile(const String &fileName,unsigned int width,unsigned int height,unsigned int fps)
{
close();
pFormatCtx=ffmpeg::avformat_alloc_context();
if(!pFormatCtx)
{
printf("Error allocating format context\n");
return false;
}
pOutputFormat = ffmpeg::av_guess_format( "mp4", filename,NULL);
pFormatCtx->oformat = pOutputFormat;
_snprintf(pFormatCtx->filename, sizeof(pFormatCtx->filename), "%s",filename);
// Add the video stream
pVideoStream = av_new_stream(pFormatCtx,0);
if(!pVideoStream )
{
printf("Could not allocate stream\n");
return false;
}
pCodecCtx=pVideoStream->codec;
pCodecCtx->codec_id = pOutputFormat->video_codec;
pCodecCtx->codec_type = ffmpeg::AVMEDIA_TYPE_VIDEO;
pCodecCtx->width = Width = width;
pCodecCtx->height = Height = height;
pCodecCtx->time_base.num = 1;
pCodecCtx->time_base.den = Fps = fps;
pCodecCtx->pix_fmt = ffmpeg::PIX_FMT_YUV420P;
// needed for x264 to work
pCodecCtx->me_range = 16;
pCodecCtx->max_qdiff = 4;
pCodecCtx->qmin = 10;
pCodecCtx->qmax = 51;
pCodecCtx->qcompress = 0.6;
pCodecCtx->gop_size = 12;
avcodec_thread_init(pCodecCtx, 10);
// some formats want stream headers to be separate
if(pFormatCtx->oformat->flags & AVFMT_GLOBALHEADER)
pCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;
if (av_set_parameters(pFormatCtx, NULL) < 0)
{
printf("Invalid output format parameters\n");
return false;
}
ffmpeg::dump_format(pFormatCtx, 0, pFormatCtx->filename, 1);
// open_video
// find the video encoder
pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
if (!pCodec)
{
printf("codec not found\n");
return false;
}
// open the codec
if (avcodec_open(pCodecCtx, pCodec) < 0)
{
printf("could not open codec\n");
return false;
}
// Allocate memory for output
if(!initOutputBuf())
{
printf("Can't allocate memory for output bitstream\n");
return false;
}
// Allocate the YUV frame
if(!initFrame())
{
printf("Can't init frame\n");
return false;
}
if (url_fopen(&pFormatCtx->pb,pFormatCtx->filename, URL_WRONLY) < 0)
{
printf( "Could not open '%s'\n", pFormatCtx->filename);
return false;
}
av_write_header(pFormatCtx);
return true;
}
Then for each frame
int encodeImage(const QImage &img)
{
if (!convertImage_sws(img)) { // SWS conversion
return false;
}
ppicture->pts=pCodecCtx->frame_number;
//memset(outbuf,0,outbuf_size);
int out_size = ffmpeg::avcodec_encode_video(pCodecCtx,outbuf,outbuf_size,ppicture);
if (out_size > 0) {
ffmpeg::AVPacket pkt;
av_init_packet(&pkt);
if (pCodecCtx->coded_frame->pts != (0x8000000000000000LL))
pkt.pts= av_rescale_q(pCodecCtx->coded_frame->pts, pCodecCtx->time_base, pVideoStream->time_base);
if(pCodecCtx->coded_frame->key_frame)
pkt.flags |= AV_PKT_FLAG_KEY;
pkt.stream_index= pVideoStream->index;
pkt.data= outbuf;
pkt.size= out_size;
if (av_write_frame(pFormatCtx, &pkt) < 0 ) {
return 0;
}
}
return out_size;
}
void close()
{
av_write_trailer(pFormatCtx);
// close_video
avcodec_close(pVideoStream->codec);
freeFrame();
freeOutputBuf();
/* free the streams */
for(int i = 0; i < pFormatCtx->nb_streams; i++)
{
av_freep(&pFormatCtx->streams[i]->codec);
av_freep(&pFormatCtx->streams[i]);
}
// Close file
url_fclose(pFormatCtx->pb);
// Free the stream
av_free(pFormatCtx);
}
bool initFrame()
{
ppicture = ffmpeg::avcodec_alloc_frame();
if(ppicture==0)
return false;
int size = avpicture_get_size(pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height);
picture_buf = new uint8_t[size];
if(picture_buf==0)
{
av_free(ppicture);
ppicture=0;
return false;
}
// Setup the planes
avpicture_fill((ffmpeg::AVPicture *)ppicture, picture_buf,pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height);
ppicture->pts = 0;
return true;
}
If you want to encode with libavcodec+libx264 in interlaced mode, use CODEC_FLAG_INTERLACED_DCT. If possible you should use libx264 or the CLI program directly though, it's less work.

Resources