Convert from NV12 to RGB/YUV420P using libswscale

Convert from NV12 to RGB/YUV420P using libswscale - ffmpeg

I'm developing an application which needs to transform NV12 frames from h264_cuvid decoder to RGB in order to modify those frames. I checked this question but I don't not the 'Stride' value.
My code is the following:
uint8_t *inData[2] = { videoFrame->data[0], videoFrame->data[0] + videoFrame->width * videoFrame->height };
int inLinesize[2] = { videoFrame->width, videoFrame->width };
sws_scale(convert_yuv_to_rgb, inData, inLinesize, 0, videoFrame->height, aux_frame->data, aux_frame->linesize);
But it does not work. Although the problem is on colours because I can see the luminance plane correctly.

I ended up using a video filter based on this example.
char args[512];
int ret;
AVFilter *buffersrc = avfilter_get_by_name("buffer");
AVFilter *buffersink = avfilter_get_by_name("buffersink");
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
AVFilterGraph *filter_graph = avfilter_graph_alloc();
AVBufferSinkParams *buffersink_params;
enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_RGB32, AV_PIX_FMT_NONE };
/* buffer video source: the decoded frames from the decoder will be inserted here. */
snprintf(args, sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
inStream.width, inStream.height, inStream.pix_fmt,
inStream.time_base.num, inStream.time_base.den,
inStream.sample_aspect_ratio.num, inStream.sample_aspect_ratio.den);
ret = avfilter_graph_create_filter(&buffersrc_ctx_to_rgb_, buffersrc, "in", args, NULL, filter_graph);
if (ret < 0) {
throw SVSException(QString("Could not create filter graph, error: %1").arg(svsAvErrorToFormattedString(ret)));
}
/* buffer video sink: to terminate the filter chain. */
buffersink_params = av_buffersink_params_alloc();
buffersink_params->pixel_fmts = pix_fmts;
ret = avfilter_graph_create_filter(&buffersink_ctx_to_rgb_, buffersink, "out", NULL, buffersink_params, filter_graph);
if (ret < 0) {
throw SVSException(QString("Cannot create buffer sink, error: %1").arg(svsAvErrorToFormattedString(ret)));
}
/* Endpoints for the filter graph. */
outputs -> name = av_strdup("in");
outputs -> filter_ctx = buffersrc_ctx_to_rgb_;
outputs -> pad_idx = 0;
outputs -> next = NULL;
/* Endpoints for the filter graph. */
inputs -> name = av_strdup("out");
inputs -> filter_ctx = buffersink_ctx_to_rgb_;
inputs -> pad_idx = 0;
inputs -> next = NULL;
QString filter_description = "format=pix_fmts=rgb32";
if ((ret = avfilter_graph_parse_ptr(filter_graph, filter_description.toStdString().c_str(), &inputs, &outputs, NULL)) < 0) {
svsCritical("", QString("Could not add the filter to graph, error: %1").arg(svsAvErrorToFormattedString(ret)))
}
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) {
svsCritical("", QString("Could not configure the graph, error: %1").arg(svsAvErrorToFormattedString(ret)))
}
return;
I created another one to convert from RGB to YUV420P before encoding in a similar way.

Related

Extending Media Foundation Encoder to support 10-bit video encoding

The HEVC Media Foundation Encoder in Windows will only encode 8-bit video. My GFX NVidia card also supports 10-bit HDR and alpha-mode video encoding, so I decided to create my own IMFTransform to use the NVidia SDK.
I 've registered my DLL using MFTRegister:
MFT_REGISTER_TYPE_INFO aMediaTypesIn[] =
{
{ MFMediaType_Video, MFVideoFormat_ARGB32 },
{ MFMediaType_Video, MFVideoFormat_RGB32 },
{ MFMediaType_Video, MFVideoFormat_RGB32 },
{ MFMediaType_Video, MFVideoFormat_RGB10 },
{ MFMediaType_Video, MyFakeFmt },
};
MFT_REGISTER_TYPE_INFO aMediaTypesOut[] =
{
{ MFMediaType_Video, MFVideoFormat_H264 },
{ MFMediaType_Video, MFVideoFormat_H265 },
{ MFMediaType_Video, MFVideoFormat_HEVC },
};
// Size of the array.
const DWORD cNumMediaTypesI = ARRAY_SIZE(aMediaTypesIn);
const DWORD cNumMediaTypesO = ARRAY_SIZE(aMediaTypesOut);
hr = MFTRegister(
GUID_NVidiaEncoder, // CLSID.
MFT_CATEGORY_VIDEO_ENCODER, // Category.
The MyFakeFmt is a non existing input type to fool the Sink Writer to pick my encoder when calling SetInputMediaType instead of the predefined Microsoft's transform. This works OK.
int wi = 1920;
int he = 1080;
int fps = 30;
int br = 4000;
auto fmt = MFVideoFormat_H264;
bool Our = 1;
const wchar_t* fil = L"r:\\1.mp4";
std::vector<DWORD> frame;
frame.resize(wi * he);
// Test
CComPtr<IMFSinkWriter> wr;
DeleteFile(fil);
CComPtr<IMFAttributes> attrs;
MFCreateAttributes(&attrs, 0);
auto hr = MFCreateSinkWriterFromURL(fil, 0, attrs, &wr);
DWORD str = (DWORD)-1;
CComPtr<IMFMediaType> mt2;
MFCreateMediaType(&mt2);
mt2->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
mt2->SetGUID(MF_MT_SUBTYPE, fmt);
MFSetAttributeRatio(mt2, MF_MT_FRAME_RATE, fps, 1);
hr = MFSetAttributeSize(mt2, MF_MT_FRAME_SIZE,wi, he);
MFSetAttributeRatio(mt2, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
mt2->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
mt2->SetUINT32(MF_MT_VIDEO_NOMINAL_RANGE, MFNominalRange_Normal);
mt2->SetUINT32(MF_MT_AVG_BITRATE, br*1000);
hr = wr->AddStream(mt2, &str);
CComPtr<IMFMediaType> mt1;
MFCreateMediaType(&mt1);
mt1->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
mt1->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_ARGB32);
hr = MFSetAttributeSize(mt1, MF_MT_FRAME_SIZE, wi, he);
// Force our selection
if (Our)
{
mt1->SetGUID(MF_MT_SUBTYPE, MyFakeFmt);
hr = wr->SetInputMediaType(str, mt1, 0);
}
mt1->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_ARGB32);
hr = wr->SetInputMediaType(str, mt1, 0);
hr = wr->BeginWriting();
for(int i = 0 ; i < 15 ; i++)
{
auto i2 = i % 5;
if (i2 == 0) Frm(frame, wi, he, 0xFFFFFFFF);
if (i2 == 1 || i2 == 4) Frm(frame, wi, he, 0xFF0000FF); // some colors
if (i2 == 2) Frm(frame, wi, he, 0xFFFF00FF); //
if (i2 == 3) Frm(frame, wi, he, 0xFF00FFFF); //
CComPtr<IMFSample> s;
MFCreateSample(&s);
int secs = 1;
hr = s->SetSampleDuration(10 * 1000 * 1000 * secs);
hr = s->SetSampleTime(10 * 1000 * 1000 * i);
CComPtr<IMFMediaBuffer> b;
MFCreateMemoryBuffer((DWORD)(frame.size() * 4), &b);
b->SetCurrentLength((DWORD)(frame.size() * 4));
BYTE* by = 0;
DWORD ml = 0, cl = 0;
b->Lock(&by, &ml, &cl);
memcpy(by, frame.data(), frame.size() * 4);
b->Unlock();
hr = s->AddBuffer(b);
b = 0;
hr = wr->WriteSample(str, s);
}
hr = wr->Finalize();
wr = 0;
The problems start with the call to Finalize to end the writing. At that point, everything seems to work normally. Note that I have tested the NVidia IMFTransform I 've created with input frames and it encodes and outputs them correctly as raw data.
When I call Finalize and the type is MFVideoFormat_H264 , the call succeeds. However the generated mp4 plays weirdly:
For some reason also, MediaInfo shows 1 FPS. Why?
When the output is MFVideoFormat_HEVC, then Finalize fails with `0xc00d4a45 : Sink could not create valid output file because required headers were not provided to the sink.'.
I 've also tried to convert the raw .h264 file I 'm saving with ffmpeg to mp4, and this works. The mp4 generated plays correctly.
Adding a MF_MT_MPEG_SEQUENCE_HEADER didn't help (besides, I think this is only needed for H.264)
const char* bl4 = "\x00\x00\x00\x01\x67\x42\xC0\x28\x95\xB0\x1E\x00\x89\xF9\x70\x16\xC8\x00\x00\x03\x00\x08\x00\x00\x03\x01\xE0\x6D\x04\x42\x37\x00\x00\x00\x01\x68\xCA\x8F\x20";
mt2->SetBlob(MF_MT_MPEG_SEQUENCE_HEADER, (UINT8*)bl4, 39);
What do you make of all that?
Thanks

libx264 Input picture width (640) is greater than stride (0)

I'm trying to encode a series of Cairo surfaces by using libav. Here I initialize AV stuff:
AVStream* video_stream;
AVCodec* vcodec;
gint ret;
/* Setup video container */
avformat_alloc_output_context2(&img->video_format_context, NULL, NULL, filename);
if (img->video_format_context == NULL)
{
img_message(img, TRUE, _("Failed to find a suitable container for %s\n"),filename);
return FALSE;
}
ret = avio_open(&img->video_format_context->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0)
{
img_message(img, TRUE, _("Couldn't write output file %s\n"),filename);
return FALSE;
}
/* Setup video codec */
vcodec = avcodec_find_encoder(codec_id);
if (!vcodec)
{
img_message(img, TRUE, _("Couldn't find any encoder for %s\n"),filename);
return FALSE;
}
/* Create video stream */
video_stream = avformat_new_stream(img->video_format_context, vcodec);
video_stream->id = 0;
if (! video_stream)
{
img_message(img, TRUE, _("Couldn't not allocate video stream\n"));
return FALSE;
}
/* Allocate video encoding context */
img->codec_context = avcodec_alloc_context3(vcodec);
if (! img->codec_context)
{
img_message(img, TRUE, _("Couldn't allocate video enconding context\n"));
return FALSE;
}
/* Setup video enconding context parameters */
img->codec_context->codec_id = codec_id;
img->codec_context->codec_type = AVMEDIA_TYPE_VIDEO;
img->codec_context->width = img->video_size[0];
img->codec_context->height = img->video_size[1];
img->codec_context->sample_aspect_ratio = (struct AVRational) {1, 1};
img->codec_context->pix_fmt = vcodec->pix_fmts[0];
img->codec_context->framerate = av_d2q(frame_rate, INT_MAX);
if (codec_id == AV_CODEC_ID_VP8 || codec_id == AV_CODEC_ID_VP9 || codec_id == AV_CODEC_ID_THEORA || codec_id == AV_CODEC_ID_FLV1 ||
AV_CODEC_ID_MPEG1VIDEO || codec_id == AV_CODEC_ID_MPEG2VIDEO)
img->codec_context->bit_rate = round(bitrate_crf * 1000000);
img->codec_context->time_base = av_inv_q(img->codec_context->framerate);
video_stream->time_base = img->codec_context->time_base;
if (img->video_format_context->oformat->flags & AVFMT_GLOBALHEADER)
img->codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
/* Some codecs require the CRF value */
if (codec_id == AV_CODEC_ID_H264 || codec_id == AV_CODEC_ID_H265)
{
gchar *crf = g_strdup_printf("%i", bitrate_crf);
av_opt_set(img->codec_context->priv_data, "crf", crf, AV_OPT_SEARCH_CHILDREN);
g_free(crf);
}
/* Set exporting stage to be multithreaded */
AVDictionary* opts = NULL;
av_dict_set(&opts, "threads", "auto", 0);
/* Open video encoder */
ret = avcodec_open2(img->codec_context, vcodec, &opts);
if (ret < 0)
{
img_message(img, TRUE, _("Failed to open the video encoder\n"));
return FALSE;
}
/* Copy video encoder parameters to output stream */
ret = avcodec_parameters_from_context(video_stream->codecpar, img->codec_context);
if (ret < 0)
{
img_message(img, TRUE, _("Failed to copy video encoder parameters to output stream\n"));
return FALSE;
}
/* AVFRAME stuff */
img->video_frame = av_frame_alloc();
img->video_frame->format = AV_PIX_FMT_RGBA;
img->video_frame->width = img->video_size[0];
img->video_frame->height = img->video_size[1];
av_frame_make_writable(img->video_frame);
ret = av_frame_get_buffer(img->video_frame, 1);
if (ret < 0)
img_message(img,TRUE, _("Could not allocate the video frame data\n"));
img->video_packet = av_packet_alloc();
And here I called repeatedly (the function is called somewehere else) av_send_frame() but it throws the error in the subject:
gint width, height, stride, row, col, offset;
uint8_t *pix;
/* Image info and pixel data */
width = cairo_image_surface_get_width( surface );
height = cairo_image_surface_get_height( surface );
stride = cairo_image_surface_get_stride( surface );
pix = cairo_image_surface_get_data( surface );
for( row = 0; row < height; row++ )
{
for( col = 0; col < width; col++ )
{
offset = 3 * col + row * img->video_frame->linesize[0];
img->video_frame->data[0][offset + 0] = pix[0];
img->video_frame->data[0][offset + 1] = pix[1];
img->video_frame->data[0][offset + 2] = pix[2];
}
}
img_export_encode_av_frame(img->video_frame, img->video_format_context, img->codec_context, img->video_packet);
return TRUE;
}
void img_export_encode_av_frame(AVFrame *frame, AVFormatContext *fmt, AVCodecContext *ctx, AVPacket *pkt)
{
gint ret;
/* send the frame to the encoder */
ret = avcodec_send_frame(ctx, frame);
if (ret < 0)
g_print("Error sending a frame for encoding\n");
while (ret >= 0)
{
ret = avcodec_receive_packet(ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0)
g_print("Error during encoding\n");
av_interleaved_write_frame(fmt, pkt);
av_packet_unref(pkt);
}
}
I googled here also but with no luck. It seems I'm the only one to encode a cairo surface. Grepping the error message in ffmpeg sources didn't help. How do I set the stride? I read ffmpeg does it for me once I allocate the buffer for the frame but in my case it seems it doesn't. Where am I wrong?

Output black when I decode h264 720p with ffmpeg

First, sorry for my english. When I decode h264 720p in ardrone2.0 my output is black and I cant see anything.
I have try to change the value of pCodecCtx->pix_fmt = AV_PIX_FMT_BGR24; to pCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P; and the value of pCodecCtxH264->pix_fmt = AV_PIX_FMT_BGR24; to pCodecCtxH264->pix_fmt = AV_PIX_FMT_YUV420P; but my program crash. What am I doing wrong?. Thank you, see part of my code:
av_register_all();
avcodec_register_all();
avformat_network_init();
// 1.2. Open video file
if(avformat_open_input(&pFormatCtx, drone_addr, NULL, NULL) != 0) {
mexPrintf("No conecct with Drone");
EndVideo();
return;
}
pCodec = avcodec_find_decoder(AV_CODEC_ID_H264);
pCodecCtx = avcodec_alloc_context3(pCodec);
pCodecCtx->pix_fmt = AV_PIX_FMT_BGR24;
pCodecCtx->skip_frame = AVDISCARD_DEFAULT;
pCodecCtx->error_concealment = FF_EC_GUESS_MVS | FF_EC_DEBLOCK;
pCodecCtx->err_recognition = AV_EF_CAREFUL;
pCodecCtx->skip_loop_filter = AVDISCARD_DEFAULT;
pCodecCtx->workaround_bugs = FF_BUG_AUTODETECT;
pCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecCtx->codec_id = AV_CODEC_ID_H264;
pCodecCtx->skip_idct = AVDISCARD_DEFAULT;
pCodecCtx->width = 1280;
pCodecCtx->height = 720;
pCodecH264 = avcodec_find_decoder(AV_CODEC_ID_H264);
pCodecCtxH264 = avcodec_alloc_context3(pCodecH264);
pCodecCtxH264->pix_fmt = AV_PIX_FMT_BGR24;
pCodecCtxH264->skip_frame = AVDISCARD_DEFAULT;
pCodecCtxH264->error_concealment = FF_EC_GUESS_MVS | FF_EC_DEBLOCK;
pCodecCtxH264->err_recognition = AV_EF_CAREFUL;
pCodecCtxH264->skip_loop_filter = AVDISCARD_DEFAULT;
pCodecCtxH264->workaround_bugs = FF_BUG_AUTODETECT;
pCodecCtxH264->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecCtxH264->codec_id = AV_CODEC_ID_H264;
pCodecCtxH264->skip_idct = AVDISCARD_DEFAULT;
if(avcodec_open2(pCodecCtxH264, pCodecH264, &optionsDict) < 0)
{
mexPrintf("Error opening H264 codec");
return ;
}
pFrame_BGR24 = av_frame_alloc();
if(pFrame_BGR24 == NULL) {
mexPrintf("Could not allocate pFrame_BGR24\n");
return ;
}
// Determine required buffer size and allocate buffer
buffer_BGR24 =
(uint8_t *)av_mallocz(av_image_get_buffer_size(AV_PIX_FMT_BGR24,
pCodecCtx->width, ((pCodecCtx->height == 720) ? 720 : pCodecCtx->height) *
sizeof(uint8_t)*3,1));
// Assign buffer to image planes
av_image_fill_arrays(pFrame_BGR24->data, pFrame_BGR24->linesize,
buffer_BGR24,AV_PIX_FMT_BGR24, pCodecCtx->width, pCodecCtx->height,1);
// format conversion context
pConvertCtx_BGR24 = sws_getContext(pCodecCtx->width, pCodecCtx->height,
pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height, AV_PIX_FMT_BGR24,
SWS_BILINEAR | SWS_ACCURATE_RND, 0, 0, 0);
// 1.6. get video frames
pFrame = av_frame_alloc();
av_init_packet(&packet);
packet.data = NULL;
packet.size = 0;
}
//Captura un frame
void video::capture(mxArray *plhs[]) {
if(av_read_frame(pFormatCtx, &packet) < 0){
mexPrintf("Error al leer frame");
return;
}
do {
do {
rest = avcodec_send_packet(pCodecCtxH264, &packet);
} while(rest == AVERROR(EAGAIN));
if(rest == AVERROR_EOF || rest == AVERROR(EINVAL)) {
printf("AVERROR(EAGAIN): %d, AVERROR_EOF: %d,
AVERROR(EINVAL): %d\n", AVERROR(EAGAIN), AVERROR_EOF,
AVERROR(EINVAL));
printf("fe_read_frame: Frame getting error (%d)!\n", rest);
return;
}
rest = avcodec_receive_frame(pCodecCtxH264, pFrame);
} while(rest == AVERROR(EAGAIN));
if(rest == AVERROR_EOF || rest == AVERROR(EINVAL)) {
// An error or EOF occured,index break out and return what
// we have so far.
printf("AVERROR(EAGAIN): %d, AVERROR_EOF: %d, AVERROR(EINVAL): %d\n",
AVERROR(EAGAIN), AVERROR_EOF, AVERROR(EINVAL));
printf("fe_read_frame: EOF or some othere decoding error (%d)!\n",
rest);
return;
}
// 2.1.1. convert frame to GRAYSCALE [or BGR] for OpenCV
sws_scale(pConvertCtx_BGR24, (const uint8_t* const*)pFrame->data,
pFrame->linesize, 0,pCodecCtx->height, pFrame_BGR24->data,
pFrame_BGR24->linesize);
//}
av_packet_unref(&packet);
av_init_packet(&packet);
mwSize dims[] = {(pCodecCtx->width)*((pCodecCtx->height == 720) ? 720 :
pCodecCtx->height)*sizeof(uint8_t)*3};
plhs[0] = mxCreateNumericArray(1,dims,mxUINT8_CLASS, mxREAL);
//plhs[0]=mxCreateDoubleMatrix(pCodecCtx->height,pCodecCtx-
>width,mxREAL);
point=mxGetPr(plhs[0]);
memcpy(point, pFrame_BGR24->data[0],(pCodecCtx->width)*(pCodecCtx-
>height)*sizeof(uint8_t)*3);
}

Go to debugger and see your memcpy. I am not sure if it works for all dimensions that you want. Also, there may be more memory problems. For example, try to see what is the value of buffer_BGR24 and pFrame. I bet that sometimes, they do not return right values. Check them out in the code.

AV_PIX_FMT_YUVJ422P to jpeg conversion

i am able to convert image from AV_PIX_FMT_YUVJ422P to jpeg format (below Code) but the resultant image having green shade on complete bottom half plz suggest where i am doing wrong.
Following step i have taken
Initially i have AV_PIX_FMT_UYVY422 image from camera, i have convert it in AV_PIX_FMT_YUVJ422P format and able to see this image on http://rawpixels.net/ the parameters shown by website is size 2448X2050, Bpp1= 8,Bpp2 = 8 and Bpp3 = 8,alignment 1, SubSampling H =2, and SubSampling V = 1, format: YUV422P
so input image is Correct AV_PIX_FMT_YUVJ422P format. & also able to see on "YUV image viewer Software" using YUV422 format.
Now i am trying to convert it in jpeg format using below Code and attached is the resultant Image having green shade on complete bottom half.
AVFormatContext* pFormatCtx;
AVOutputFormat* fmt;
AVStream* video_st;
AVCodecContext* pCodecCtx;
AVCodec* pCodec;
uint8_t* picture_buf;
AVFrame* picture;
AVPacket pkt;
int y_size;
int size;
int got_picture=0;
int ret=0;
int main( int argc, char* argv[] )
{
FILE *in_file = NULL;
unsigned int in_width = 2448;
unsigned int in_height = 2050;
const char* out_file = "encoded_pic.jpg";
in_file = fopen("c:\\test_Planar.yuv","rb");
if(in_file == NULL) { printf("\n\tFile Opening error...!!"); exit(1); }
else printf("\n\tYUV File Open Sucessfully...!!\n\n");
av_register_all(); // Loads the whole database of available codecs and formats.
pFormatCtx = avformat_alloc_context();
fmt = NULL;
fmt = av_guess_format("mjpeg",NULL,NULL);
pFormatCtx->oformat = fmt;
//------Output URL-------------------------
if (avio_open(&pFormatCtx->pb,out_file, AVIO_FLAG_READ_WRITE) < 0)
{
printf("Couldn't open output file.");
return -1;
}
video_st = avformat_new_stream(pFormatCtx, 0);
if (video_st==NULL) return -1;
pCodecCtx = video_st->codec;
pCodecCtx->codec_id = fmt->video_codec;
pCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
pCodecCtx->pix_fmt = AV_PIX_FMT_YUVJ422P;
//--------------------------MY SOURCE PIXEL FORMAT--------------
pCodecCtx->width = in_width;
pCodecCtx->height = in_height;
pCodecCtx->time_base.num = 1;
pCodecCtx->time_base.den = 1;//25;
//Output some information
av_dump_format(pFormatCtx, 0, out_file, 1);
// Determine if desired video encoder is installed
pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
if (!pCodec)
{
printf("Codec not found.");
return -1;
}
printf("\nCodec Identified done\n");
if (avcodec_open2(pCodecCtx, pCodec,NULL) < 0){
printf("Could not open codec.\n");
return -1;
}
picture = av_frame_alloc();
size = avpicture_get_size(pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height);
picture_buf = (uint8_t *)av_malloc(size);
if (!picture_buf) return -1;
avpicture_fill((AVPicture *)picture, picture_buf, pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height);
printf("\t\nWrite Header..");
avformat_write_header(pFormatCtx,NULL);
y_size = pCodecCtx->width * pCodecCtx->height;
av_new_packet(&pkt,y_size*3);
//Read YUV
if (fread(picture_buf, 1, y_size*3/2, in_file) <=0)
{
printf("Could not read input file.");
return -1;
}
//--------------------------------------------input image format UYVY
picture->data[0] = picture_buf; // Y
picture->data[1] = picture_buf+ y_size; // U
picture->data[2] = picture_buf+ y_size*5/4; // V
//-----------------------------------------------
printf("\t\n Encode the image..\n");
ret = avcodec_encode_video2(pCodecCtx, &pkt,picture, &got_picture);
if(ret < 0)
{
printf("Encode Error.\n");
return -1;
}
if (got_picture==1)
{
pkt.stream_index = video_st->index;
ret = av_write_frame(pFormatCtx, &pkt);
}
av_free_packet(&pkt);
//Write Trailer
av_write_trailer(pFormatCtx);
printf("Encode Successful.\n");
if (video_st)
{
avcodec_close(video_st->codec);
av_free(picture);
av_free(picture_buf);
}
avio_close(pFormatCtx->pb);
avformat_free_context(pFormatCtx);
fclose(in_file);
printf("\n\tYUV File Close Sucessfully...!!");
}
Resultant output jpeg encoded image from yuvj422p image having green shade

Changing the input Pixel format from AV_PIX_FMT_YUVJ422P to AV_PIX_FMT_YUVJ420P resolve the issue.

Transcoding audio using xuggler

I am trying to convert an audio file with the header
Opening audio decoder: [pcm] Uncompressed PCM audio decoder
AUDIO: 44100 Hz, 2 ch, s16le, 1411.2 kbit/100.00% (ratio: 176400->176400)
Selected audio codec: [pcm] afm: pcm (Uncompressed PCM)
I want to transcode this file to mp3 format. I have following code snippet but its not working well. I have written it using XUGGLER code snippet for transcoding audio and video.
Audio decoder is
audioDecoder = IStreamCoder.make(IStreamCoder.Direction.DECODING, ICodec.findDecodingCodec(ICodec.ID.CODEC_ID_PCM_S16LE));
audioDecoder.setSampleRate(44100);
audioDecoder.setBitRate(176400);
audioDecoder.setChannels(2);
audioDecoder.setTimeBase(IRational.make(1,1000));
if (audioDecoder.open(IMetaData.make(), IMetaData.make()) < 0)
return false;
return true;
Audio encoder is
outContainer = IContainer.make();
outContainerFormat = IContainerFormat.make();
outContainerFormat.setOutputFormat("mp3", urlOut, null);
int retVal = outContainer.open(urlOut, IContainer.Type.WRITE, outContainerFormat);
if (retVal < 0) {
System.out.println("Could not open output container");
return false;
}
outAudioCoder = IStreamCoder.make(IStreamCoder.Direction.ENCODING, ICodec.findEncodingCodec(ICodec.ID.CODEC_ID_MP3));
outAudioStream = outContainer.addNewStream(outAudioCoder);
outAudioCoder.setSampleRate(new Integer(44100));
outAudioCoder.setChannels(2);
retVal = outAudioCoder.open(IMetaData.make(), IMetaData.make());
if (retVal < 0) {
System.out.println("Could not open audio coder");
return false;
}
retVal = outContainer.writeHeader();
if (retVal < 0) {
System.out.println("Could not write output FLV header: ");
return false;
}
return true;
And here is encode method where i send packets of 32 byte to transcode
public void encode(byte[] audioFrame){
//duration of 1 video frame
long lastVideoPts = 0;
IPacket packet_out = IPacket.make();
int lastPos = 0;
int lastPos_out = 0;
IAudioSamples audioSamples = IAudioSamples.make(48000, audioDecoder.getChannels());
IAudioSamples audioSamples_resampled = IAudioSamples.make(48000, audioDecoder.getChannels());
//we always have 32 bytes/sample
int pos = 0;
int audioFrameLength = audioFrame.length;
int audioFrameCnt = 1;
iBuffer = IBuffer.make(null, audioFrame, 0, audioFrameLength);
IPacket packet = IPacket.make(iBuffer);
//packet.setKeyPacket(true);
packet.setTimeBase(IRational.make(1,1000));
packet.setDuration(20);
packet.setDts(audioFrameCnt*20);
packet.setPts(audioFrameCnt*20);
packet.setStreamIndex(1);
packet.setPosition(lastPos);
lastPos+=audioFrameLength;
int pksz = packet.getSize();
packet.setComplete(true, pksz);
/*
* A packet can actually contain multiple samples
*/
int offset = 0;
int retVal;
while(offset < packet.getSize())
{
int bytesDecoded = audioDecoder.decodeAudio(audioSamples, packet, offset);
if (bytesDecoded < 0)
throw new RuntimeException("got error decoding audio ");
offset += bytesDecoded;
if (audioSamples.isComplete())
{
int samplesConsumed = 0;
while (samplesConsumed < audioSamples.getNumSamples()) {
retVal = outAudioCoder.encodeAudio(packet_out, audioSamples, samplesConsumed);
if (retVal <= 0)
throw new RuntimeException("Could not encode audio");
samplesConsumed += retVal;
if (packet_out.isComplete()) {
packet_out.setPosition(lastPos_out);
packet_out.setStreamIndex(1);
lastPos_out+=packet_out.getSize();
retVal = outContainer.writePacket(packet_out);
if(retVal < 0){
throw new RuntimeException("Could not write data packet");
}
}
}
}
}
}
I get an output file but it doesnt get played. I have very little experience of audio encoding and sampling. Thanks in advance.

Finally I am able to live transcode a pcm stream to mp3 stream.
There were couple of issues in the code :
I was trying to transcode only one thing i.e audio and the code snippet was transcoding audio as well as video so there was an issue in setting stream index.
packet_out.setStreamIndex(1); packet_out.setStreamIndex(0)
Second thing was calculations ffmpeg guid
channel * bits * sampling rate = bit rate
This thing was miscalculated at my end.
Number of samples in audio samples depends upon your sampling rate. That was wrong in my code.
NOTE : this is a pretty old code
byte[] data = new byte[418];
public void encode(byte[] audioFrame) {
IPacket packet_out = IPacket.make();
int lastPos_out = 0;
IAudioSamples audioSamples = IAudioSamples.make(11025, audioDecoder.getChannels());
//IAudioSamples audioSamples_resampled = IAudioSamples.make(48000, audioDecoder.getChannels());
//we always have 32 bytes/sample
int pos = 0;
int audioFrameLength = audioFrame.length;
int audioFrameCnt = 1;
iBuffer = IBuffer.make(null, audioFrame, 0, audioFrameLength);
IPacket packet = IPacket.make(iBuffer);
//packet.setKeyPacket(true);
packet.setTimeBase(IRational.make(1, 1000));
packet.setStreamIndex(0);
int pksz = packet.getSize();
packet.setComplete(true, pksz);
/*
* A packet can actually contain multiple samples
*/
int offset = 0;
int retVal;
while (offset < packet.getSize()) {
int bytesDecoded = audioDecoder.decodeAudio(audioSamples, packet, offset);
if (bytesDecoded < 0)
throw new RuntimeException("got error decoding audio ");
offset += bytesDecoded;
if (audioSamples.isComplete()) {
/*audioResampler.resample(audioSamples_resampled, audioSamples, audioSamples.getNumSamples());
audioSamples_resampled.setPts(Global.NO_PTS);*/
int samplesConsumed = 0;
while (samplesConsumed < audioSamples.getNumSamples()) {
retVal = outAudioCoder.encodeAudio(packet_out, audioSamples, samplesConsumed);
if (retVal <= 0)
throw new RuntimeException("Could not encode audio");
samplesConsumed += retVal;
if (packet_out.isComplete()) {
packet_out.setPosition(lastPos_out);
packet_out.setStreamIndex(0);
lastPos_out += packet_out.getSize();
System.out.println("size" + packet_out.getSize());
packet_out.getByteBuffer().get(data,0,packet_out.getSize());
try {
fo.write(data);
} catch (IOException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
packet_out.reset();
//retVal = outContainer.writePacket(packet_out);
if (retVal < 0) {
throw new RuntimeException("Could not write data packet");
}
}
}
}
}
}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Convert from NV12 to RGB/YUV420P using libswscale - ffmpeg

Related

Extending Media Foundation Encoder to support 10-bit video encoding

libx264 Input picture width (640) is greater than stride (0)

Output black when I decode h264 720p with ffmpeg

AV_PIX_FMT_YUVJ422P to jpeg conversion

Transcoding audio using xuggler

Categories

Resources