Converting 3gp (amr) to mp3 using ffmpeg api calls
I try to use libavformat (ffmpeg) to build my own function that converts 3gp audio files (recorded with an android mobile device) into mp3 files.
I use av_read_frame() to read a frame from the input file and use avcodec_decode_audio3() to decode the data
into a buffer and use this buffer to encode the data into mp3 with avcodec_encode_audio.
This seems to give me a correct result for converting wav to mp3 and mp3 to wav (Or decode one mp3 and encode to another mp3) but not for amr to mp3.
My resulting mp3 file seems to has the right length but only consists of noise.
In another post I read that amr-decoder does not use the same sample format than mp3 does.
AMR uses FLT and mp3 S16 or S32 und that I have to do resampling.
So I call av_audio_resample_init() and audio_resample for each frame that has been decoded.
But that does not solve my problem completely. Now I can hear my recorded voice and unsterstand what I was saying, but the quality is very low and there is still a lot of noise.
I am not sure if I set the parameters of av_audio_resample correctly, especially the last 4 parameters (I think not) or if I miss something else.
ReSampleContext* reSampleContext = av_audio_resample_init(1, 1, 44100, 8000, AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT, 0, 0, 0, 0.0);
while(1)
{
if(av_read_frame(ic, &avpkt) < 0)
{
break;
}
out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
int count;
count = avcodec_decode_audio3(audio_stream->codec, (short *)decodedBuffer, &out_size, &avpkt);
if(count < 0)
{
break;
}
if((audio_resample(reSampleContext, (short *)resampledBuffer, (short *)decodedBuffer, out_size / 4)) < 0)
{
fprintf(stderr, "Error\n");
exit(1);
}
out_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
pktOut.size = avcodec_encode_audio(c, outbuf, out_size, (short *)resampledBuffer);
if(c->coded_frame && c->coded_frame->pts != AV_NOPTS_VALUE)
{
pktOut.pts = av_rescale_q(c->coded_frame->pts, c->time_base, outStream->time_base);
//av_res
}
pktOut.pts = AV_NOPTS_VALUE;
pktOut.dts = AV_NOPTS_VALUE;
pktOut.flags |= AV_PKT_FLAG_KEY;
pktOut.stream_index = audio_stream->index;
pktOut.data = outbuf;
if(av_write_frame(oc, &pktOut) != 0)
{
fprintf(stderr, "Error while writing audio frame\n");
exit(1);
}
}
Related
I have managed to create a rtsp stream using libav* and directX texture (which I am obtaining from GDI API using Bitblit method). Here's my approach for creating live rtsp stream:
Create output context and stream (skipping the checks here)
avformat_alloc_output_context2(&ofmt_ctx, NULL, "rtsp", rtsp_url); //RTSP
vid_codec = avcodec_find_encoder(ofmt_ctx->oformat->video_codec);
vid_stream = avformat_new_stream(ofmt_ctx,vid_codec);
vid_codec_ctx = avcodec_alloc_context3(vid_codec);
Set codec params
codec_ctx->codec_tag = 0;
codec_ctx->codec_id = ofmt_ctx->oformat->video_codec;
//codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
codec_ctx->width = width; codec_ctx->height = height;
codec_ctx->gop_size = 12;
//codec_ctx->gop_size = 40;
//codec_ctx->max_b_frames = 3;
codec_ctx->pix_fmt = target_pix_fmt; // AV_PIX_FMT_YUV420P
codec_ctx->framerate = { stream_fps, 1 };
codec_ctx->time_base = { 1, stream_fps};
if (fctx->oformat->flags & AVFMT_GLOBALHEADER)
{
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
Initialize video stream
if (avcodec_parameters_from_context(stream->codecpar, codec_ctx) < 0)
{
Debug::Error("Could not initialize stream codec parameters!");
return false;
}
AVDictionary* codec_options = nullptr;
if (codec->id == AV_CODEC_ID_H264) {
av_dict_set(&codec_options, "profile", "high", 0);
av_dict_set(&codec_options, "preset", "fast", 0);
av_dict_set(&codec_options, "tune", "zerolatency", 0);
}
// open video encoder
int ret = avcodec_open2(codec_ctx, codec, &codec_options);
if (ret<0) {
Debug::Error("Could not open video encoder: ", avcodec_get_name(codec->id), " error ret: ", AVERROR(ret));
return false;
}
stream->codecpar->extradata = codec_ctx->extradata;
stream->codecpar->extradata_size = codec_ctx->extradata_size;
Start streaming
// Create new frame and allocate buffer
AVFrame* AllocateFrameBuffer(AVCodecContext* codec_ctx, double width, double height)
{
AVFrame* frame = av_frame_alloc();
std::vector<uint8_t> framebuf(av_image_get_buffer_size(codec_ctx->pix_fmt, width, height, 1));
av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), codec_ctx->pix_fmt, width, height, 1);
frame->width = width;
frame->height = height;
frame->format = static_cast<int>(codec_ctx->pix_fmt);
//Debug::Log("framebuf size: ", framebuf.size(), " frame format: ", frame->format);
return frame;
}
void RtspStream(AVFormatContext* ofmt_ctx, AVStream* vid_stream, AVCodecContext* vid_codec_ctx, char* rtsp_url)
{
printf("Output stream info:\n");
av_dump_format(ofmt_ctx, 0, rtsp_url, 1);
const int width = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetTextureWidth();
const int height = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetTextureHeight();
//DirectX BGRA to h264 YUV420p
SwsContext* conversion_ctx = sws_getContext(width, height, src_pix_fmt,
vid_stream->codecpar->width, vid_stream->codecpar->height, target_pix_fmt,
SWS_BICUBIC | SWS_BITEXACT, nullptr, nullptr, nullptr);
if (!conversion_ctx)
{
Debug::Error("Could not initialize sample scaler!");
return;
}
AVFrame* frame = AllocateFrameBuffer(vid_codec_ctx,vid_codec_ctx->width,vid_codec_ctx->height);
if (!frame) {
Debug::Error("Could not allocate video frame\n");
return;
}
if (avformat_write_header(ofmt_ctx, NULL) < 0) {
Debug::Error("Error occurred when writing header");
return;
}
if (av_frame_get_buffer(frame, 0) < 0) {
Debug::Error("Could not allocate the video frame data\n");
return;
}
int frame_cnt = 0;
//av start time in microseconds
int64_t start_time_av = av_gettime();
AVRational time_base = vid_stream->time_base;
AVRational time_base_q = { 1, AV_TIME_BASE };
// frame pixel data info
int data_size = width * height * 4;
uint8_t* data = new uint8_t[data_size];
// AVPacket* pkt = av_packet_alloc();
while (RtspStreaming::IsStreaming())
{
/* make sure the frame data is writable */
if (av_frame_make_writable(frame) < 0)
{
Debug::Error("Can't make frame writable");
break;
}
//get copy/ref of the texture
//uint8_t* data = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetBuffer();
if (!WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetPixels(data, 0, 0, width, height))
{
Debug::Error("Failed to get frame buffer. ID: ", RtspStreaming::WindowId());
std::this_thread::sleep_for (std::chrono::seconds(2));
continue;
}
//printf("got pixels data\n");
// convert BGRA to yuv420 pixel format
int srcStrides[1] = { 4 * width };
if (sws_scale(conversion_ctx, &data, srcStrides, 0, height, frame->data, frame->linesize) < 0)
{
Debug::Error("Unable to scale d3d11 texture to frame. ", frame_cnt);
break;
}
//Debug::Log("frame pts: ", frame->pts, " time_base:", av_rescale_q(1, vid_codec_ctx->time_base, vid_stream->time_base));
frame->pts = frame_cnt++;
//frame_cnt++;
//printf("scale conversion done\n");
//encode to the video stream
int ret = avcodec_send_frame(vid_codec_ctx, frame);
if (ret < 0)
{
Debug::Error("Error sending frame to codec context! ",frame_cnt);
break;
}
AVPacket* pkt = av_packet_alloc();
//av_init_packet(pkt);
ret = avcodec_receive_packet(vid_codec_ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
//av_packet_unref(pkt);
av_packet_free(&pkt);
continue;
}
else if (ret < 0)
{
Debug::Error("Error during receiving packet: ",AVERROR(ret));
//av_packet_unref(pkt);
av_packet_free(&pkt);
break;
}
if (pkt->pts == AV_NOPTS_VALUE)
{
//Write PTS
//Duration between 2 frames (us)
int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(vid_stream->r_frame_rate);
//Parameters
pkt->pts = (double)(frame_cnt * calc_duration) / (double)(av_q2d(time_base) * AV_TIME_BASE);
pkt->dts = pkt->pts;
pkt->duration = (double)calc_duration / (double)(av_q2d(time_base) * AV_TIME_BASE);
}
int64_t pts_time = av_rescale_q(pkt->dts, time_base, time_base_q);
int64_t now_time = av_gettime() - start_time_av;
if (pts_time > now_time)
av_usleep(pts_time - now_time);
//pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
//pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
//pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
//pkt->pos = -1;
//write frame and send
if (av_interleaved_write_frame(ofmt_ctx, pkt)<0)
{
Debug::Error("Error muxing packet, frame number:",frame_cnt);
break;
}
//Debug::Log("RTSP streaming...");
//sstd::this_thread::sleep_for(std::chrono::milliseconds(1000/20));
//av_packet_unref(pkt);
av_packet_free(&pkt);
}
//av_free_packet(pkt);
delete[] data;
/* Write the trailer, if any. The trailer must be written before you
* close the CodecContexts open when you wrote the header; otherwise
* av_write_trailer() may try to use memory that was freed on
* av_codec_close(). */
av_write_trailer(ofmt_ctx);
av_frame_unref(frame);
av_frame_free(&frame);
printf("streaming thread CLOSED!\n");
}
Now, this allows me to connect to my rtsp server and maintain the connection. However, on the rtsp client side I am getting either gray or single static frame as shown below:
Would appreciate if you can help with following questions:
Firstly, why the stream is not working in spite of continued connection to the server and updating frames?
Video codec. By default rtsp format uses Mpeg4 codec, is it possible to use h264? When I manually set it to AV_CODEC_ID_H264 the program fails at avcodec_open2 with return value of -22.
Do I need to create and allocate new "AVFrame" and "AVPacket" for every frame? Or can I just reuse global variable for this?
Do I need to explicitly define some code for real-time streaming? (Like in ffmpeg we use "-re" flag).
Would be great if you can point out some example code for creating livestream. I have checked following resources:
https://github.com/FFmpeg/FFmpeg/blob/master/doc/examples/encode_video.c
streaming FLV to RTMP with FFMpeg using H264 codec and C++ API to flv.js
https://medium.com/swlh/streaming-video-with-ffmpeg-and-directx-11-7395fcb372c4
Update
While test I found that I am able to play the stream using ffplay, while it's getting stuck on VLC player. Here is snapshot on the ffplay log
The basic construct and initialization seems to be okay. Find below responses to your questions
why the stream is not working in spite of continued connection to the server and updating frames?
If you're getting an error or broken stream, you might wanna check into your presentation and decompression timestamps (pts/dts) of your packet.
In your code, I notice that you're taking time_base from video stream object which is not guranteed to be same as codec->time_base value and usually varies depending upon active stream.
AVRational time_base = vid_stream->time_base;
AVRational time_base_q = { 1, AV_TIME_BASE };
Video codec. By default rtsp format uses Mpeg4 codec, is it possible to use h264?
I don't see why not... RTSP is just a protocol for carrying your packets over the network. So you should be able use AV_CODEC_ID_H264 for encoding the stream.
Do I need to create and allocate new "AVFrame" and "AVPacket" for every frame? Or can I just reuse global variable for this?
In libav during encoding process a single packet is used for encoding a video frame, while there can be multiple audio frames in a single packet. I should reference this, but can't seem to find any source at the moment. But anyways the point is you would need to create new packet every time.
Do I need to explicitly define some code for real-time streaming? (Like in ffmpeg we use "-re" flag).
You don't need to add anything else for real time streaming. Although you might wanna implement it to limit the number of frame updates that you pass to encoder and save some performance.
for me the difference between ffplay good capture and VLC bad capture (for UDP packets) was pkt_size=xxx attribute (ffmpeg -re -i test.mp4 -f mpegts udp://127.0.0.1:23000?pkt_size=1316) (VLC open media network tab udp://#:23000:pkt_size=1316). So only if pkt_size is defined (and equal) VLC is able to capture.
I'm trying to remake the doc/examples/transcoding.c so that in encodes opus audio. How can I do that?
This is what I have right now:
encoder = avcodec_find_encoder(AV_CODEC_ID_OPUS);
if (!encoder) {
av_log(NULL, AV_LOG_FATAL, "Necessary encoder not found\n");
return AVERROR_INVALIDDATA;
}
enc_ctx = avcodec_alloc_context3(encoder);
if (!enc_ctx) {
av_log(NULL, AV_LOG_FATAL, "Failed to allocate the encoder context\n");
return AVERROR(ENOMEM);
}
enc_ctx->thread_count = 1;
/* enc_ctx->sample_rate = dec_ctx->sample_rate; */
enc_ctx->sample_rate = 48000;
/* enc_ctx->channel_layout = dec_ctx->channel_layout; */
enc_ctx->channel_layout = AV_CH_LAYOUT_MONO;
enc_ctx->channels = av_get_channel_layout_nb_channels(enc_ctx->channel_layout);
/* take first format from list of supported formats */
enc_ctx->sample_fmt = encoder->sample_fmts[0];
enc_ctx->time_base = (AVRational){1, enc_ctx->sample_rate};
enc_ctx->bit_rate = 32000;
Should I convert to 48000 Hz and to mono with filters, or encoder will figure it out by itself?
How to copy audio from one packet to another? (It looks like each packet should contain exactly 20ms of audio, that's 960 samples). This page https://ffmpeg.org/doxygen/trunk/structAVFrame.html says something about AVFrame.buf, how to copy all samples from there?
[libopus # 02ec1d60] more samples than frame size (avcodec_encode_audio2)
I’m trying to transcode a video on my iOS app using FFMpeg/LibAv.
What I’m trying to accomplish is to transcode a video in order to resize each frame and possibly lower the bitrate in order to save valuable MB in the device.
The resulting video must be playable on all iPhone5+ devices.
After reading the documentation I found out that:
I do not need to encode/decode the audio stream -> I’ll copy as-is to the output file
I need to encode the video using the h264 codec (LibX264) with a profile supported by iOS (baseline profile with level 3.0 - https://trac.ffmpeg.org/wiki/Encode/H.264#Compatibility)
I’m also setting the picture format to YUV planar since it’s the only one supported by iOS
For the sake of testing I’m not using any filter (I’m using a dummy/passthrough) at all or even trying to lower the bitrate, I’m just trying to decode the video stream and encode it again
Most of the code is based on the transcoding.c and filtering.c available on the FFMpeg examples directory
FFMpeg-wise what I’m trying to achieve with LibAv is:
ffmpeg -i INPUT.MOV -c:v libx264 -preset ultrafast -profile:v baseline -level 3.0 -c:a copy output.MOV
(the resulting file - which can be found below - is playable on QuickTime if it’s generated by FFMpeg through the command line)
The original video was generated with a regular iPhone using iOS 8.2 but the problem is not device specific or iOS specific, it occurs on all videos generated with LibAv.
Although both resulting files are playable by VideoLan (VLC) the one I generated through LibAv is not playable by QuickTime even though I can’t find anything wrong with it.
As you can see below, I create the video stream with the proper video codec on the call to avformat_new_stream:
AVStream *out_stream; // output stream
AVStream *in_stream; // input stream
AVCodecContext *dec_ctx, *enc_ctx; // codec context for the stream
AVCodec *encoder; // codec used
int ret;
unsigned int i;
ofmt_ctx = NULL;
// Allocate an AVFormatContext for an output format. This will be the file header (similar to avformat_open_input but with an zero'ed memory)
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, filename);
if (!ofmt_ctx) {
av_log(NULL, AV_LOG_ERROR, "Could not create output context\n");
[self errorWith:kErrorCreatingOutputContext and:#"Could not create output context"];
return AVERROR_UNKNOWN;
}
// we must not use the AVCodecContext from the video stream directly! So we have to use avcodec_copy_context() to copy the context to a new location (after allocating memory for it, of course).
// iterate over all input streams
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
in_stream = ifmt_ctx->streams[i]; // input stream
dec_ctx = in_stream->codec; // get the codec context for the decoder
if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
// lets use h264
encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
if (!encoder) {
[self errorWith:kErrorCodecNotFound and:#"H264 Codec Not Found"];
return AVERROR_UNKNOWN;
}
out_stream = avformat_new_stream(ofmt_ctx, encoder); // create a new stream with h264 codec
if (!out_stream) {
av_log(NULL, AV_LOG_ERROR, "Failed allocating output stream\n");
[self errorWith:kErrorAllocateOutputStream and:#"Failed allocating output stream"];
return AVERROR_UNKNOWN;
}
enc_ctx = out_stream->codec; // pointer to the stream codec context
/* we transcode to same properties (picture size,
* sample rate etc.). These properties can be changed for output
* streams easily using filters */
if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
enc_ctx->width = dec_ctx->width;
enc_ctx->height = dec_ctx->height;
enc_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
enc_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
enc_ctx->time_base = dec_ctx->time_base;
av_opt_set(enc_ctx->priv_data, "preset", "ultrafast", 0);
av_opt_set(enc_ctx->priv_data, "profile", "baseline", 0);
av_opt_set(enc_ctx->priv_data, "level", "3.0", 0);
}
out_stream->time_base = in_stream->time_base;
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(in_stream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
printf("%s=%s\n", tag->key, tag->value);
char *k = av_strdup(tag->key); // if your strings are already allocated,
char *v = av_strdup(tag->value); // you can avoid copying them like this
av_dict_set(&out_stream->metadata, k, v, 0);
}
ret = avcodec_open2(enc_ctx, encoder, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open video encoder for stream #%u\n", i);
[self errorWith:kErrorCantOpenOutputFile and:[NSString stringWithFormat:#"Cannot open video encoder for stream #%u",i]];
return ret;
}
}
else if(dec_ctx->codec_type == AVMEDIA_TYPE_UNKNOWN) {
// if we cant figure out the stream type, fail
av_log(NULL, AV_LOG_FATAL, "Elementary stream #%d is of unknown type, cannot proceed\n", i);
[self errorWith:kErrorUnknownStream and:[NSString stringWithFormat:#"Elementary stream #%d is of unknown type, cannot proceed",i]];
return AVERROR_INVALIDDATA;
}
else {
out_stream = avformat_new_stream(ofmt_ctx, NULL);
if (!out_stream) {
av_log(NULL, AV_LOG_ERROR, "Failed allocating output stream\n");
[self errorWith:kErrorAllocateOutputStream and:#"Failed allocating output stream"];
return AVERROR_UNKNOWN;
}
enc_ctx = out_stream->codec;
/* this stream must be remuxed */
// copies ifmt_ctx->streams[i]->codec into ofmt_ctx->streams[i]->codec - Copy the settings of the source AVCodecContext into the destination AVCodecContext.
ret = avcodec_copy_context(ofmt_ctx->streams[i]->codec,
ifmt_ctx->streams[i]->codec);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Copying stream context failed\n");
[self errorWith:kErrorCopyStreamFailed and:#"Copying stream context failed"];
return ret;
}
}
// dunno what this is for
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
if (!(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
// Create and initialize a AVIOContext for accessing the
// resource indicated by url.
ret = avio_open(&ofmt_ctx->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not open output file '%s'", filename);
[self errorWith:kErrorCantOpenOutputFile and:[NSString stringWithFormat:#"Could not open output file '%s'", filename]];
return ret;
}
}
/* init muxer, write output file header */
// Allocate the stream private data and write the stream header to an output media file.
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error occurred when opening output file\n");
[self errorWith:kErrorOutFileCantWriteHeader and:#"Error occurred when opening output file"];
return ret;
}
return 0;
You can find the files here:
Original final: https://www.dropbox.com/s/2jjs1uy2pu2veyy/IMG_5705.MOV?dl=0
File generated with FFMpeg - https://www.dropbox.com/s/9hfmq3fcifgpfqc/local-ffmpeg.MOV?dl=0
File generated by code - https://www.dropbox.com/s/rttvny39rj7ejpf/generated-by-Ze.MOV?dl=0
Thank you so much,
Ze
I am using FFMpeg To decode live video and stream it using Live555.i am able to decode video and getting the output AVPackets.
1. Convert the BGR Image to YUV422P format using FFMpeg's SWScale
// initilize a BGR To RGB converter using FFMpeg
ctx = sws_getContext(codecContext->width, codecContext->height, AV_PIX_FMT_BGR24, codecContext->width, codecContext->height, AV_PIX_FMT_YUV422P, SWS_BICUBIC, 0, 0, 0);
tempFrame = av_frame_alloc();
int num_bytes = avpicture_get_size(PIX_FMT_BGR24, codecContext->width, codecContext->height);
uint8_t* frame2_buffer = (uint8_t*)av_malloc(num_bytes*sizeof(uint8_t));
avpicture_fill((AVPicture*)tempFrame, frame2_buffer, PIX_FMT_BGR24, codecContext->width, codecContext->height);
// inside the loop of where frames are being encoded where rawFrame is a BGR image
tempFrame->data[0] = reinterpret_cast<uint8_t*>(rawFrame->_data);
sws_scale(ctx, tempFrame->data, tempFrame->linesize, 0, frame->height, frame->data, frame->linesize);
For decoding each Frame
ret = avcodec_encode_video2(codecContext, &packet, frame, &got_output);
if(ret < 0)
{
fprintf(stderr, "Error in encoding frame\n");
exit(1);
}
if(got_output)
{
//printf("Received frame! pushing to queue\n");
OutputFrame *outFrame = new OutputFrame();
outFrame->_data = packet.buf->data;
outFrame->_bufferSize = packet.buf->size;
outputQueue.push_back(outFrame);
}
Till here it works fine. i am able to write these frames to file and play it using VLC. after this i have to pass the output frame to Live555.i think AVPackets i am getting here doesn't need to be a single H264 Nal unit which is required by Live555.
How to break a AVPacket into Nal units which can be passed to Live555?
H264VideoStreamDiscreateFramer expect data without the start code '\x00\x00\x00\x01'.
It is needed to remove the 4 first bytes either in your LiveDeviceSource or inserting a FramedFilter to do this job.
Perhaps you can tried to use an H264VideoStreamFramer, like the testH264VideoStreamer test program.
If it could help, you can find one of my tries with live555 implementing an RTSP server feed from V4L2 capture https://github.com/mpromonet/h264_v4l2_rtspserver
I want to realize an application that firstly decode a multi-media file(such as test.mp4 file, video codec id is H264), get a video stream and an audio stream, then make some different in the audio stream, at last encode the video stream(use libx264) and audio stream into a result file(result.mp4). To promote the efficiency, i omitted the decode and encode of video stream, i get the video packet via function "av_read_frame", then output it directly into the result file via function "av_write_frame". But there is no picture in the output file, and the size of output file is fairly small.
I tracked the ffmpeg code and found that in the function "av_write_frame->mov_write_packet->ff_mov_write_packet", it will call function "ff_avc_parse_nal_units" to obtain the size of nal unit, but the return value is very small(such as 208 bytes).
I find that the H264 stream in the MP4 file is not stored in Annex-B format, so it can't find start code(0x000001), now my problem is how can I change the H264 stream to Annex-B format, and make it work?
I added start code at the beginning of every frame manually, but it still not work.
Anyone can give me any hint?Thanks very much.
Following is the codes similar with my:
// write the stream header, if any
av_write_header(pFormatCtxEnc);
.........
/**
* Init of Encoder and Decoder
*/
bool KeyFlag = false;
bool KeyFlagEx = false;
// Read frames and save frames to disk
int iPts = 1;
av_init_packet(&packet);
while(av_read_frame(pFormatCtxDec, &packet)>=0)
{
if (packet.flags == 1)
KeyFlag = true;
if (!KeyFlag)
continue;
if (m_bStop)
{
break;
}
// Is this a packet from the video stream?
if(packet.stream_index == videoStream)
{
currentframeNum ++;
if (progressCB != NULL && currentframeNum%20 == 0)
{
float fpercent = (float)currentframeNum/frameNum;
progressCB(fpercent,m_pUser);
}
if (currentframeNum >= beginFrame && currentframeNum <= endFrane)
{
if (packet.flags == 1)
KeyFlagEx = true;
if (!KeyFlagEx)
continue;
packet.dts = iPts ++;
av_write_frame(pFormatCtxEnc, &packet);
}
}
// Free the packet that was allocated by av_read_frame
}
// write the trailer, if any
av_write_trailer(pFormatCtxEnc);
/**
* Release of encoder and decoder
*/
return true;
You might try this: libavcodec/h264_mp4toannexb_bsf.c. It converts bitstream without start codes to bitstream with start codes.
Using your source file, does ffmpeg -i src.mp4 -vcodec copy -an dst.mp4 work? Does it work if you add -bsf h264_mp4toannexb? (all using the same version/build of ffmpeg as you are trying to use programmatically of course)