FFmpeg - MJPEG decoding gives inconsistent values - ffmpeg

I have a set of JPEG frames which I am muxing into an avi, which gives me a mjpeg video. This is the command I run on the console:
ffmpeg -y -start_number 0 -i %06d.JPEG -codec copy vid.avi
When I try to demux the video using ffmpeg C api, I get frames which are slightly different in values. Demuxing code looks something like this:
AVFormatContext* fmt_ctx = NULL;
AVCodecContext* cdc_ctx = NULL;
AVCodec* vid_cdc = NULL;
int ret;
unsigned int height, width;
....
// read_nframes is the number of frames to read
output_arr = new unsigned char [height * width * 3 *
sizeof(unsigned char) * read_nframes];
avcodec_open2(cdc_ctx, vid_cdc, NULL);
int num_bytes;
uint8_t* buffer = NULL;
const AVPixelFormat out_format = AV_PIX_FMT_RGB24;
num_bytes = av_image_get_buffer_size(out_format, width, height, 1);
buffer = (uint8_t*)av_malloc(num_bytes * sizeof(uint8_t));
AVFrame* vid_frame = NULL;
vid_frame = av_frame_alloc();
AVFrame* conv_frame = NULL;
conv_frame = av_frame_alloc();
av_image_fill_arrays(conv_frame->data, conv_frame->linesize, buffer,
out_format, width, height, 1);
struct SwsContext *sws_ctx = NULL;
sws_ctx = sws_getContext(width, height, cdc_ctx->pix_fmt,
width, height, out_format,
SWS_BILINEAR, NULL,NULL,NULL);
int frame_num = 0;
AVPacket vid_pckt;
while (av_read_frame(fmt_ctx, &vid_pckt) >=0) {
ret = avcodec_send_packet(cdc_ctx, &vid_pckt);
if (ret < 0)
break;
ret = avcodec_receive_frame(cdc_ctx, vid_frame);
if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
break;
if (ret >= 0) {
// convert image from native format to planar GBR
sws_scale(sws_ctx, vid_frame->data,
vid_frame->linesize, 0, vid_frame->height,
conv_frame->data, conv_frame->linesize);
unsigned char* r_ptr = output_arr +
(height * width * sizeof(unsigned char) * 3 * frame_num);
unsigned char* g_ptr = r_ptr + (height * width * sizeof(unsigned char));
unsigned char* b_ptr = g_ptr + (height * width * sizeof(unsigned char));
unsigned int pxl_i = 0;
for (unsigned int r = 0; r < height; ++r) {
uint8_t* avframe_r = conv_frame->data[0] + r*conv_frame->linesize[0];
for (unsigned int c = 0; c < width; ++c) {
r_ptr[pxl_i] = avframe_r[0];
g_ptr[pxl_i] = avframe_r[1];
b_ptr[pxl_i] = avframe_r[2];
avframe_r += 3;
++pxl_i;
}
}
++frame_num;
if (frame_num >= read_nframes)
break;
}
}
...
In my experience around two-thirds of the pixel values are different, each by +-1 (in a range of [0,255]). I am wondering is it due to some decoding scheme FFmpeg uses for reading JPEG frames? I tried encoding and decoding png frames, and it works perfectly fine. I am sure this is something to do with the libav decoding process because the MD5 values are consistent between the images and the video:
ffmpeg -i %06d.JPEG -f framemd5 -
ffmpeg -i vid.avi -f framemd5 -
In short my goal is to get the same pixel by pixel values for each JPEG frame as I would I have gotten if I was reading the JPEG images directly. Here is the stand-alone bitbucket code I used. It includes cmake files to build code, and a couple of jpeg frames with the converted avi file to test this problem. (give '--filetype png' to test the png decoding).

Related

FFmpeg: how to encode last audio frame?

Considering the following snippet: ( from https://ffmpeg.org/doxygen/trunk/encode_audio_8c-example.html )
for (i = 0; i < 200; i++) {
/* make sure the frame is writable -- makes a copy if the encoder
* kept a reference internally */
ret = av_frame_make_writable(frame);
if (ret < 0)
exit(1);
samples = (uint16_t*)frame->data[0];
for (j = 0; j < c->frame_size; j++) {
samples[2*j] = (int)(sin(t) * 10000);
for (k = 1; k < c->channels; k++)
samples[2*j + k] = samples[2*j];
t += tincr;
}
encode(c, frame, pkt, f);
}
If I understand the example correctly, the generated audio stream consists exactly of 200 frames of size c->frame_size which are encoded and saved to disk.
However, if I want to encode a generic stream of data of size soundsize, I will have a certain number of frames of fixed size c->frame_size, i.e. size_t nframes = soundsize / c->frame_size; plus one last frame of size: size_t rem_lastframe = soundsize % c->frame_size;
Can you explain me how to process this last frame? The frame_size seems to be fixed and chosen by the codec.
This is what ffmpeg does.
if (src->nb_samples < avctx->frame_size) {
ret = pad_last_frame(avctx, dst, src);
...
You can use apad filter or mimic what libavcodec does
/**
* Pad last frame with silence.
*/
static int pad_last_frame(AVCodecContext *s, AVFrame *frame, const AVFrame *src)
{
int ret;
frame->format = src->format;
frame->channel_layout = src->channel_layout;
frame->channels = src->channels;
frame->nb_samples = s->frame_size;
ret = av_frame_get_buffer(frame, 0);
if (ret < 0)
goto fail;
ret = av_frame_copy_props(frame, src);
if (ret < 0)
goto fail;
if ((ret = av_samples_copy(frame->extended_data, src->extended_data, 0, 0,
src->nb_samples, s->channels, s->sample_fmt)) < 0)
goto fail;
if ((ret = av_samples_set_silence(frame->extended_data, src->nb_samples,
frame->nb_samples - src->nb_samples,
s->channels, s->sample_fmt)) < 0)
goto fail;
return 0;
fail:
av_frame_unref(frame);
return ret;
}

how to copy other frame buffer with ffmpeg

I have a question for frame copy with other position.
I got a 1280640 decoded frame (YUV420) from h264/mp4 and I wanna merge 4 frame to 25601280 frame buffer . as below.
enter image description here
The code is like this but does not working.
enter code here
/* pCodecCtx and pFrame for decoded frame information */
int Encoding2JPEG( AVCodecContext *pCodecCtx, AVFrame *pFrame, int FrameNo, int Quality)
{
int gotFrame;
/* New Context for JPEG encoding */
AVCodec *jpeg_codec = NULL;
AVCodecContext *jpeg_ctx =NULL;
int ret;
AVPacket *packet;
packet = av_packet_alloc();
if(!packet)
{
printf("alloc fail!\n");
return -1;
}
if(frame)
printf("received frame for encoding %3"PRId64"\n", frame->pts);
pFrame_gather = av_frame_alloc();
if(!pFrame_gather)
{
printf("Could not allocate gater frame!\n");
return -1;
}
pFrame_gather->format = pCodecCtx->pix_fmt;
pFrame_gather->width = pCodecCtx->width*2;
pFrame_gather->height = pCodecCtx->height*2;
if ( av_frame_get_buffer(pFrame_gather, 0) < 0)
printf("Could not allocate the gather frame data!\n");
/* Buffer Clear */
ptrdiff_t linesize[4] = { frame->linesize[0], 0, 0, 0 };
if( av_image_fill_black ( pFrame_gather->data, linesize, pCodecCtx->pix_fmt, \
AVCOL_RANGE_JPEG , pFrame_gather->width, pFrame_gather->height) != 0 )
printf("image clear error!\n");
ret = av_frame_make_writable(pFrame_gather);
if( ret <0)
printf("writeable set fail!\n");
/* set position of destination */
int top_band =0;
int left_band=200;
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pCodecCtx->pix_fmt);
int y_shift;
int x_shift;
int max_step[4];
if (pix_fmt < 0 || pix_fmt >= AV_PIX_FMT_NB)
return -1;
y_shift = desc->log2_chroma_h;
x_shift = desc->log2_chroma_w;
av_image_fill_max_pixsteps(max_step, NULL, desc);
/* Y Data */
memcpy( pFrame_gather->data[0] + (top_band * pFrame->linesize[0]) + left_band,\
frame->data[0] , pCodecCtx->width*pCodecCtx->height);
/* U Data */
memcpy ( pFrame_gather->data[1] + ((top_band >> y_shift) * pFrame->linesize[1]) + \
(left_band >> x_shift), frame->data[1], pCodecCtx->width/2*pCodecCtx->height/2);
/* V DAta */
memcpy ( pFrame_gather->data[2] + ((top_band >> y_shift) * pFrame->linesize[2]) \
+ (left_band >> x_shift), frame->data[2], pCodecCtx->width/2*pCodecCtx->height/2);
/* unref variable .... */
....
}
Upper source code is something wring. It does not working.
How to memory copy with position?

FFMPEG Stream series of pictures over network

I would like to generate video frames on one computer and stream them to another computer to be manipulated and displayed. I want to do this programatically, because the source images are generated from a 3rd party c++ library and on the receiving end I want to manipulate the images before displaying them. I have been pulling my hair out trying FFMPEG. I got the encoding example to work, but am not sure what to do after that. I have just about googled everything i could think of but cannot figure out which FFMPEG library/function to call once I have a stuffed AVPacket. It seems that I need to use either the AVIO or AVFormat or the muxer but it is not clear how to initialize and get them working. I would appreciate any help you could offer.
To provide some context I have the example 'decoding_encoding.c' which is provided with ffmpeg working. Here is the spot that I am struggling with:
/* encode the image */
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
fwrite(pkt.data, 1, pkt.size, f);
//
// instead of writing to a file, i want to stream to a network
// What do I need to do with pkt to do that?
//
av_free_packet(&pkt);
}
Most documentation (and the Dranger tutorial) focus on reading and writing files. I do not need to use a file. I want to stream video over a network. It seems like I need AVFormat and AVio but I jsut can't figure out how they fit together.
The key was to use avio_open2() to open the socket connection and then pass the data and size fields of pkt to avio_write(). He is an example working program (derived from decode_encode.c in the doc/examples directory of ffmpeg):
#include "stdafx.h"
/*
* Copyright (c) 2001 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* #file
* libavcodec API use example.
*
* #example decoding_encoding.c
* Note that libavcodec only handles codecs (mpeg, mpeg4, etc...),
* not file formats (avi, vob, mp4, mov, mkv, mxf, flv, mpegts, mpegps, etc...). See library 'libavformat' for the
* format handling
*/
#include <math.h>
#include <stdio.h>
#include <cstdio>
#include <winsock.h>
extern "C"
{
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
#include <libavutil/channel_layout.h>
#include <libavutil/common.h>
#include <libavutil/imgutils.h>
#include <libavutil/mathematics.h>
#include <libavutil/samplefmt.h>
#include <libavformat\avio.h>
#include <libavformat\avformat.h>
}
#define INBUF_SIZE 4096
#define AUDIO_INBUF_SIZE 20480
#define AUDIO_REFILL_THRESH 4096
static void video_encode_example(const char *filename, AVCodecID codec_id)
{
AVCodec *codec;
AVCodecContext *c = NULL;
int i, ret, x, y, got_output;
FILE *f;
AVFrame *frame;
AVPacket pkt;
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
errno_t err;
int errval;
printf("Encode video file %s\n", filename);
/* find the mpeg1 video encoder */
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* put sample parameters */
c->bit_rate = 4000000;
/* resolution must be a multiple of two */
c->width = 1024;
c->height = 768;
/* frames per second */
c->time_base.den = 25;
c->time_base.num = 1;
/* emit one intra frame every ten frames
* check frame pict_type before passing frame
* to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
* then gop_size is ignored and the output of encoder
* will always be I frame irrespective to gop_size
*/
c->gop_size = 12;
c->max_b_frames = 0;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec_id == AV_CODEC_ID_H264)
{
av_opt_set(c->priv_data, "preset", "slow", 0);
av_opt_set(c->priv_data, "tune", "zerolatency", 0);
}
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
AVFormatContext* format = avformat_alloc_context();
AVDictionary *options = NULL;
av_dict_set(&options, "pkt_size", "1300", 0);
av_dict_set(&options, "buffer_size", "65535", 0);
AVIOContext * server = NULL;
avio_open2(&server, "udp://192.168.0.13:5555", AVIO_FLAG_WRITE, NULL, &options);
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
/* the image can be allocated by any means and av_image_alloc() is
* just the most convenient way if av_malloc() is to be used */
ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height,
c->pix_fmt, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate raw picture buffer\n");
exit(1);
}
// Cycle through the test pattern for 1 hour
for (int j = 0; j < 3600; j++)
{
/* encode 1 second of video */
for (i = 0; i < 25; i++) {
Sleep(40);
av_init_packet(&pkt);
pkt.data = NULL; // packet data will be allocated by the encoder
pkt.size = 1300;
fflush(stdout);
/* prepare a dummy image */
/* Y */
for (y = 0; y < c->height; y++) {
for (x = 0; x < c->width; x++) {
frame->data[0][y * frame->linesize[0] + x] = x + y + i * 3;
}
}
/* Cb and Cr */
for (y = 0; y < c->height / 2; y++) {
for (x = 0; x < c->width / 2; x++) {
frame->data[1][y * frame->linesize[1] + x] = 128 + y + i * 2;
frame->data[2][y * frame->linesize[2] + x] = 64 + x + i * 5;
}
}
frame->pts = i+j*25;
/* encode the image */
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
//fwrite(pkt.data, 1, pkt.size, f);
avio_write(server, pkt.data, pkt.size);
av_free_packet(&pkt);
}
}
}
/* get the delayed frames */
for (got_output = 1; got_output; i++) {
fflush(stdout);
ret = avcodec_encode_video2(c, &pkt, NULL, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
printf("Write frame %3d (size=%5d)\n", i, pkt.size);
avio_write(server, pkt.data, pkt.size);
av_free_packet(&pkt);
}
}
avcodec_close(c);
av_free(c);
av_freep(&frame->data[0]);
av_frame_free(&frame);
printf("\n");
}
int main(int argc, char **argv)
{
const char *output_type;
/* register all the codecs */
av_register_all();
avcodec_register_all();
avformat_network_init();
video_encode_example("test.h264", AV_CODEC_ID_H264);
return 0;
}

How to encode resampled PCM-audio to AAC using ffmpeg-API when input pcm samples count not equal 1024

I am working on capturing and streaming audio to RTMP server at a moment. I work under MacOS (in Xcode), so for capturing audio sample-buffer I use AVFoundation-framework. But for encoding and streaming I need to use ffmpeg-API and libfaac encoder. So output format must be AAC (for supporting stream playback on iOS-devices).
And I faced with such problem: audio-capturing device (in my case logitech camera) gives me sample-buffer with 512 LPCM samples, and I can select input sample-rate from 16000, 24000, 36000 or 48000 Hz. When I give these 512 samples to AAC-encoder (configured for appropriate sample-rate), I hear a slow and jerking audio (seems as like pice of silence after each frame).
I figured out (maybe I am wrong), that libfaac encoder accepts audio frames only with 1024 samples. When I set input samplerate to 24000 and resample input sample-buffer to 48000 before encoding, I obtain 1024 resampled samples. After encoding these 1024 sampels to AAC, I hear proper sound on output. But my web-cam produce 512 samples in buffer for any input samplerate, when output sample-rate must be 48000 Hz. So I need to do resampling in any case, and I will not obtain exactly 1024 samples in buffer after resampling.
Is there a way to solve this problem within ffmpeg-API functionality?
I would be grateful for any help.
PS:
I guess that I can accumulate resampled buffers until count of samples become 1024, and then encode it, but this is stream so there will be troubles with resulting timestamps and with other input devices, and such solution is not suitable.
The current issue came out of the problem described in [question]: How to fill audio AVFrame (ffmpeg) with the data obtained from CMSampleBufferRef (AVFoundation)?
Here is a code with audio-codec configs (there also was video stream but video work fine):
/*global variables*/
static AVFrame *aframe;
static AVFrame *frame;
AVOutputFormat *fmt;
AVFormatContext *oc;
AVStream *audio_st, *video_st;
Init ()
{
AVCodec *audio_codec, *video_codec;
int ret;
avcodec_register_all();
av_register_all();
avformat_network_init();
avformat_alloc_output_context2(&oc, NULL, "flv", filename);
fmt = oc->oformat;
oc->oformat->video_codec = AV_CODEC_ID_H264;
oc->oformat->audio_codec = AV_CODEC_ID_AAC;
video_st = NULL;
audio_st = NULL;
if (fmt->video_codec != AV_CODEC_ID_NONE)
{ //… /*init video codec*/}
if (fmt->audio_codec != AV_CODEC_ID_NONE) {
audio_codec= avcodec_find_encoder(fmt->audio_codec);
if (!(audio_codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(fmt->audio_codec));
exit(1);
}
audio_st= avformat_new_stream(oc, audio_codec);
if (!audio_st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
audio_st->id = oc->nb_streams-1;
//AAC:
audio_st->codec->sample_fmt = AV_SAMPLE_FMT_S16;
audio_st->codec->bit_rate = 32000;
audio_st->codec->sample_rate = 48000;
audio_st->codec->profile=FF_PROFILE_AAC_LOW;
audio_st->time_base = (AVRational){1, audio_st->codec->sample_rate };
audio_st->codec->channels = 1;
audio_st->codec->channel_layout = AV_CH_LAYOUT_MONO;
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
audio_st->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if (video_st)
{
// …
/*prepare video*/
}
if (audio_st)
{
aframe = avcodec_alloc_frame();
if (!aframe) {
fprintf(stderr, "Could not allocate audio frame\n");
exit(1);
}
AVCodecContext *c;
int ret;
c = audio_st->codec;
ret = avcodec_open2(c, audio_codec, 0);
if (ret < 0) {
fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));
exit(1);
}
//…
}
And resampling and encoding audio:
if (mType == kCMMediaType_Audio)
{
CMSampleTimingInfo timing_info;
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &timing_info);
double pts=0;
double dts=0;
AVCodecContext *c;
AVPacket pkt = { 0 }; // data and size must be 0;
int got_packet, ret;
av_init_packet(&pkt);
c = audio_st->codec;
CMItemCount numSamples = CMSampleBufferGetNumSamples(sampleBuffer);
NSUInteger channelIndex = 0;
CMBlockBufferRef audioBlockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
size_t audioBlockBufferOffset = (channelIndex * numSamples * sizeof(SInt16));
size_t lengthAtOffset = 0;
size_t totalLength = 0;
SInt16 *samples = NULL;
CMBlockBufferGetDataPointer(audioBlockBuffer, audioBlockBufferOffset, &lengthAtOffset, &totalLength, (char **)(&samples));
const AudioStreamBasicDescription *audioDescription = CMAudioFormatDescriptionGetStreamBasicDescription(CMSampleBufferGetFormatDescription(sampleBuffer));
SwrContext *swr = swr_alloc();
int in_smprt = (int)audioDescription->mSampleRate;
av_opt_set_int(swr, "in_channel_layout", AV_CH_LAYOUT_MONO, 0);
av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout, 0);
av_opt_set_int(swr, "in_channel_count", audioDescription->mChannelsPerFrame, 0);
av_opt_set_int(swr, "out_channel_count", audio_st->codec->channels, 0);
av_opt_set_int(swr, "out_channel_layout", audio_st->codec->channel_layout, 0);
av_opt_set_int(swr, "in_sample_rate", audioDescription->mSampleRate,0);
av_opt_set_int(swr, "out_sample_rate", audio_st->codec->sample_rate,0);
av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_S16, 0);
av_opt_set_sample_fmt(swr, "out_sample_fmt", audio_st->codec->sample_fmt, 0);
swr_init(swr);
uint8_t **input = NULL;
int src_linesize;
int in_samples = (int)numSamples;
ret = av_samples_alloc_array_and_samples(&input, &src_linesize, audioDescription->mChannelsPerFrame,
in_samples, AV_SAMPLE_FMT_S16P, 0);
*input=(uint8_t*)samples;
uint8_t *output=NULL;
int out_samples = av_rescale_rnd(swr_get_delay(swr, in_smprt) +in_samples, (int)audio_st->codec->sample_rate, in_smprt, AV_ROUND_UP);
av_samples_alloc(&output, NULL, audio_st->codec->channels, out_samples, audio_st->codec->sample_fmt, 0);
in_samples = (int)numSamples;
out_samples = swr_convert(swr, &output, out_samples, (const uint8_t **)input, in_samples);
aframe->nb_samples =(int) out_samples;
ret = avcodec_fill_audio_frame(aframe, audio_st->codec->channels, audio_st->codec->sample_fmt,
(uint8_t *)output,
(int) out_samples *
av_get_bytes_per_sample(audio_st->codec->sample_fmt) *
audio_st->codec->channels, 1);
aframe->channel_layout = audio_st->codec->channel_layout;
aframe->channels=audio_st->codec->channels;
aframe->sample_rate= audio_st->codec->sample_rate;
if (timing_info.presentationTimeStamp.timescale!=0)
pts=(double) timing_info.presentationTimeStamp.value/timing_info.presentationTimeStamp.timescale;
aframe->pts=pts*audio_st->time_base.den;
aframe->pts = av_rescale_q(aframe->pts, audio_st->time_base, audio_st->codec->time_base);
ret = avcodec_encode_audio2(c, &pkt, aframe, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));
exit(1);
}
swr_free(&swr);
if (got_packet)
{
pkt.stream_index = audio_st->index;
pkt.pts = av_rescale_q(pkt.pts, audio_st->codec->time_base, audio_st->time_base);
pkt.dts = av_rescale_q(pkt.dts, audio_st->codec->time_base, audio_st->time_base);
// Write the compressed frame to the media file.
ret = av_interleaved_write_frame(oc, &pkt);
if (ret != 0) {
fprintf(stderr, "Error while writing audio frame: %s\n",
av_err2str(ret));
exit(1);
}
}
I also ended up here after having a similar problem. I'm reading audio and video from a Blackmagic Decklink SDI card in 720p50 meaning I had 960 samples per videoframe (48k/50fps) I wanted to encode together with the video. Got really weird audio when only sending 960 samples to aacenc and it didn't really complain about this fact either.
Started to use AVAudioFifo (see ffmpeg/doc/examples/transcode_aac.c) and kept adding frames to it until I had enough frames to satisfy aacenc. This will mean I have samples playing too late I guess, since pts will be set on 1024 samples when the first 960 should really have another value. But, it's not really noticeable as far as I can hear/see.
I got a similar problem. I was encoding PCM packets to AAC while the length of PCM packets are sometimes smaller than 1024.
If I encode the packet that's smaller than 1024, the audio will be slow. On the other hand, if I throw it away, the audio will get faster. swr_convert function didn't have any automatic buffering from my observation.
I ended up with a buffer scheme that packets was filled to a 1024 buffer and the buffer gets encoded and cleaned everytime it's full.
The function to fill buffer is below:
// put frame data into buffer of fixed size
bool ffmpegHelper::putAudioBuffer(const AVFrame *pAvFrameIn, AVFrame **pAvFrameBuffer, AVCodecContext *dec_ctx, int frame_size, int &k0) {
// prepare pFrameAudio
if (!(*pAvFrameBuffer)) {
if (!(*pAvFrameBuffer = av_frame_alloc())) {
av_log(NULL, AV_LOG_ERROR, "Alloc frame failed\n");
return false;
} else {
(*pAvFrameBuffer)->format = dec_ctx->sample_fmt;
(*pAvFrameBuffer)->channels = dec_ctx->channels;
(*pAvFrameBuffer)->sample_rate = dec_ctx->sample_rate;
(*pAvFrameBuffer)->nb_samples = frame_size;
int ret = av_frame_get_buffer(*pAvFrameBuffer, 0);
if (ret < 0) {
char err[500];
av_log(NULL, AV_LOG_ERROR, "get audio buffer failed: %s\n",
av_make_error_string(err, AV_ERROR_MAX_STRING_SIZE, ret));
return false;
}
(*pAvFrameBuffer)->nb_samples = 0;
(*pAvFrameBuffer)->pts = pAvFrameIn->pts;
}
}
// copy input data to buffer
int n_channels = pAvFrameIn->channels;
int new_samples = min(pAvFrameIn->nb_samples - k0, frame_size - (*pAvFrameBuffer)->nb_samples);
int k1 = (*pAvFrameBuffer)->nb_samples;
if (pAvFrameIn->format == AV_SAMPLE_FMT_S16) {
int16_t *d_in = (int16_t *)pAvFrameIn->data[0];
d_in += n_channels * k0;
int16_t *d_out = (int16_t *)(*pAvFrameBuffer)->data[0];
d_out += n_channels * k1;
for (int i = 0; i < new_samples; ++i) {
for (int j = 0; j < pAvFrameIn->channels; ++j) {
*d_out++ = *d_in++;
}
}
} else {
printf("not handled format for audio buffer\n");
return false;
}
(*pAvFrameBuffer)->nb_samples += new_samples;
k0 += new_samples;
return true;
}
And the loop for fill buffer and encode is below:
// transcoding needed
int got_frame;
AVMediaType stream_type;
// decode the packet (do it your self)
decodePacket(packet, dec_ctx, &pAvFrame_, got_frame);
if (enc_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
ret = 0;
// break audio packet down to buffer
if (enc_ctx->frame_size > 0) {
int k = 0;
while (k < pAvFrame_->nb_samples) {
if (!putAudioBuffer(pAvFrame_, &pFrameAudio_, dec_ctx, enc_ctx->frame_size, k))
return false;
if (pFrameAudio_->nb_samples == enc_ctx->frame_size) {
// the buffer is full, encode it (do it yourself)
ret = encodeFrame(pFrameAudio_, stream_index, got_frame, false);
if (ret < 0)
return false;
pFrameAudio_->pts += enc_ctx->frame_size;
pFrameAudio_->nb_samples = 0;
}
}
} else {
ret = encodeFrame(pAvFrame_, stream_index, got_frame, false);
}
} else {
// encode packet directly
ret = encodeFrame(pAvFrame_, stream_index, got_frame, false);
}
You have to break sample buffer into chunks of size 1024, i did for recording mp3 in android for more info follow these links link1,links2
If anyone ended up here, I had the same issue, and just as #Mohit pointed out for AAC each audio frame has to be broken down into 1024 bytes chunks.
example:
uint8_t *buffer = (uint8_t*) malloc(1024);
AVFrame *frame = av_frame_alloc();
while((fread(buffer, 1024, 1, fp)) == 1) {
frame->data[0] = buffer;
}
A possible solution is to use asetnsamples filter which sets the number of samples for each output audio frame :
https://ffmpeg.org/ffmpeg-filters.html#asetnsamples
You can feed the filter with your input frames and the resulting output frames each have the desired number of samples. The value for the number of samples in filter should be equal to frame_size of the encoder AVCodecContext.

How to write a video encoder with ffmpeg that explicitly controls the position of keyframes?

I want to write an encoder with ffmpeg which can put iFrames (keyframes) at positions I want. Where can I found tutorials or reference material for it?
P.S
Is it possible to do this with mencoder or any opensource encoder. I want to encode H263 file. I am writing under & for linux.
You'll need to look at the libavcodec documentation - specifically, at avcodec_encode_video(). I found that the best available documentation is in the ffmpeg header files and the API sample source code that's provided with the ffmpeg source. Specifically, look at libavcodec/api-example.c or even ffmpeg.c.
To force an I frame, you'll need to set the pict_type member of the picture you're encoding to 1: 1 is an I frame, 2 is a P frame, and I don't remember what's the code for a B frame off the top of my head... Also, the key_frame member needs to be set to 1.
Some introductory material is available here and here, but I don't really know how good it is.
You'll need to be careful how you allocate the frame objects that the API calls require. api-example.c is your best bet as far as that goes, in my opinion. Look for the function video_encode_example() - it's concise and illustrates all the important things you need to worry about - pay special attention to the second call to avcodec_encode_video() that passes a NULL picture argument - it's required to get the last frames of video since MPEG video is encoded out of sequence and you may end up with a delay of a few frames.
An up-to-date version of api-example.c can be found at http://ffmpeg.org/doxygen/trunk/doc_2examples_2decoding_encoding_8c-example.html
It does the entire video encoding in a single and relatively short function. So this is probably a good place to start. Compile and run it. And then start modifying it until it does what you want.
It also has audio encoding and audio & video decoding examples.
GStreamer has decent documentation, has bindings for a number of languages (although the native API is C), and supports any video format you can find plugins for, including H.263 via gstreamer-ffmpeg.
you will need libavcodec library, For the first step I think you can learn about its use in ffplay.c file inside ffmpeg source code. It would tell you a lot. You can check my project also about video at rtstegvideo.sourceforge.net.
Hope this help.
If you're Java programmer then use Xuggler.
Minimal runnable example on FFmpeg 2.7
Based on Ori Pessach's answer, below is a minimal example that generates frames of form.
I
P
B
P
...
The key parts of the code that control frame type are:
c = avcodec_alloc_context3(codec);
/* Minimal distance of I-frames. This is the maximum value allowed,
or else we get a warning at runtime. */
c->keyint_min = 600;
/* Or else it defaults to 0 b-frames are not allowed. */
c->max_b_frames = 1;
and:
frame->key_frame = 0;
switch (frame->pts % 4) {
case 0:
frame->key_frame = 1;
frame->pict_type = AV_PICTURE_TYPE_I;
break;
case 1:
case 3:
frame->pict_type = AV_PICTURE_TYPE_P;
break;
case 2:
frame->pict_type = AV_PICTURE_TYPE_B;
break;
}
We can then verify the frame type with:
ffprobe -select_streams v \
-show_frames \
-show_entries frame=pict_type \
-of csv \
tmp.h264
as mentioned at: https://superuser.com/questions/885452/extracting-the-index-of-key-frames-from-a-video-using-ffmpeg
Some rules were enforced by FFmpeg even if I try to overcome them:
the first frame is an I-frame
cannot place a B0frame before an I-frame (TODO why?)
Preview of generated output.
#include <libavcodec/avcodec.h>
#include <libavutil/imgutils.h>
#include <libavutil/opt.h>
#include <libswscale/swscale.h>
static AVCodecContext *c = NULL;
static AVFrame *frame;
static AVPacket pkt;
static FILE *file;
struct SwsContext *sws_context = NULL;
/*
Convert RGB24 array to YUV. Save directly to the `frame`,
modifying its `data` and `linesize` fields
*/
static void ffmpeg_encoder_set_frame_yuv_from_rgb(uint8_t *rgb) {
const int in_linesize[1] = { 3 * c->width };
sws_context = sws_getCachedContext(sws_context,
c->width, c->height, AV_PIX_FMT_RGB24,
c->width, c->height, AV_PIX_FMT_YUV420P,
0, 0, 0, 0);
sws_scale(sws_context, (const uint8_t * const *)&rgb, in_linesize, 0,
c->height, frame->data, frame->linesize);
}
/*
Generate 2 different images with four colored rectangles, each 25 frames long:
Image 1:
black | red
------+-----
green | blue
Image 2:
yellow | red
-------+-----
green | white
*/
uint8_t* generate_rgb(int width, int height, int pts, uint8_t *rgb) {
int x, y, cur;
rgb = realloc(rgb, 3 * sizeof(uint8_t) * height * width);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
cur = 3 * (y * width + x);
rgb[cur + 0] = 0;
rgb[cur + 1] = 0;
rgb[cur + 2] = 0;
if ((frame->pts / 25) % 2 == 0) {
if (y < height / 2) {
if (x < width / 2) {
/* Black. */
} else {
rgb[cur + 0] = 255;
}
} else {
if (x < width / 2) {
rgb[cur + 1] = 255;
} else {
rgb[cur + 2] = 255;
}
}
} else {
if (y < height / 2) {
rgb[cur + 0] = 255;
if (x < width / 2) {
rgb[cur + 1] = 255;
} else {
rgb[cur + 2] = 255;
}
} else {
if (x < width / 2) {
rgb[cur + 1] = 255;
rgb[cur + 2] = 255;
} else {
rgb[cur + 0] = 255;
rgb[cur + 1] = 255;
rgb[cur + 2] = 255;
}
}
}
}
}
return rgb;
}
/* Allocate resources and write header data to the output file. */
void ffmpeg_encoder_start(const char *filename, int codec_id, int fps, int width, int height) {
AVCodec *codec;
int ret;
codec = avcodec_find_encoder(codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
c->bit_rate = 400000;
c->width = width;
c->height = height;
c->time_base.num = 1;
c->time_base.den = fps;
/* I, P, B frame placement parameters. */
c->gop_size = 600;
c->max_b_frames = 1;
c->keyint_min = 600;
c->pix_fmt = AV_PIX_FMT_YUV420P;
if (codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
file = fopen(filename, "wb");
if (!file) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
ret = av_image_alloc(frame->data, frame->linesize, c->width, c->height, c->pix_fmt, 32);
if (ret < 0) {
fprintf(stderr, "Could not allocate raw picture buffer\n");
exit(1);
}
}
/*
Write trailing data to the output file
and free resources allocated by ffmpeg_encoder_start.
*/
void ffmpeg_encoder_finish(void) {
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
int got_output, ret;
do {
fflush(stdout);
ret = avcodec_encode_video2(c, &pkt, NULL, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_packet_unref(&pkt);
}
} while (got_output);
fwrite(endcode, 1, sizeof(endcode), file);
fclose(file);
avcodec_close(c);
av_free(c);
av_freep(&frame->data[0]);
av_frame_free(&frame);
}
/*
Encode one frame from an RGB24 input and save it to the output file.
Must be called after ffmpeg_encoder_start, and ffmpeg_encoder_finish
must be called after the last call to this function.
*/
void ffmpeg_encoder_encode_frame(uint8_t *rgb) {
int ret, got_output;
ffmpeg_encoder_set_frame_yuv_from_rgb(rgb);
av_init_packet(&pkt);
pkt.data = NULL;
pkt.size = 0;
switch (frame->pts % 4) {
case 0:
frame->key_frame = 1;
frame->pict_type = AV_PICTURE_TYPE_I;
break;
case 1:
case 3:
frame->key_frame = 0;
frame->pict_type = AV_PICTURE_TYPE_P;
break;
case 2:
frame->key_frame = 0;
frame->pict_type = AV_PICTURE_TYPE_B;
break;
}
ret = avcodec_encode_video2(c, &pkt, frame, &got_output);
if (ret < 0) {
fprintf(stderr, "Error encoding frame\n");
exit(1);
}
if (got_output) {
fwrite(pkt.data, 1, pkt.size, file);
av_packet_unref(&pkt);
}
}
/* Represents the main loop of an application which generates one frame per loop. */
static void encode_example(const char *filename, int codec_id) {
int pts;
int width = 320;
int height = 240;
uint8_t *rgb = NULL;
ffmpeg_encoder_start(filename, codec_id, 25, width, height);
for (pts = 0; pts < 100; pts++) {
frame->pts = pts;
rgb = generate_rgb(width, height, pts, rgb);
ffmpeg_encoder_encode_frame(rgb);
}
ffmpeg_encoder_finish();
}
int main(void) {
avcodec_register_all();
encode_example("tmp.h264", AV_CODEC_ID_H264);
encode_example("tmp.mpg", AV_CODEC_ID_MPEG1VIDEO);
/* TODO: is this encoded correctly? Possible to view it without container? */
/*encode_example("tmp.vp8", AV_CODEC_ID_VP8);*/
return 0;
}
Tested on Ubuntu 15.10. GitHub upstream.
Do you really want to do this?
In most cases, you are better off just controlling the global parameters of AVCodecContext.
FFmpeg does smart things like using a keyframe if the new frame is completely different from the previous one, and not much would be gained from differential encoding.
For example, if we set just:
c->keyint_min = 600;
then we get exactly 4 key-frames on the above example, which is logical since there are 4 abrupt frame changes on the generated video.

Resources