Record and Playback Video in a byte format - media

Exactly as it is explained in the title I need to have an application that uses Media Foundation and get stream of data in a raw format save it with out any container then play it back.
I need the easiest solution for that I don't want to customize any plugin.

The sample below uses the MF H264Encoder MFT to demonstrate how to access the raw bytes from the encoder, full project here. The sample does use a SinkWriter to save the encoded samples to an MP4 file but the SinkWriter stage could be removed and replaced by writing to a file directly but then you'll need to come up with your own mechanism for delimiting frames and keeping track of frame duration's and times.
#include <stdio.h>
#include <tchar.h>
#include <evr.h>
#include <mfapi.h>
#include <mfplay.h>
#include <mfreadwrite.h>
#include <mferror.h>
#include <wmcodecdsp.h>
#include "..\Common\MFUtility.h"
#pragma comment(lib, "mf.lib")
#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mfplay.lib")
#pragma comment(lib, "mfreadwrite.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid.lib")
int _tmain(int argc, _TCHAR* argv[])
{
const int WEBCAM_DEVICE_INDEX = 1; // <--- Set to 0 to use default system webcam.
const WCHAR *CAPTURE_FILENAME = L"sample.mp4";
const int SAMPLE_COUNT = 25;
IMFMediaSource *videoSource = NULL;
UINT32 videoDeviceCount = 0;
IMFAttributes *videoConfig = NULL;
IMFActivate **videoDevices = NULL;
IMFSourceReader *videoReader = NULL;
WCHAR *webcamFriendlyName;
IMFMediaType *videoSourceOutputType = NULL, *pSrcOutMediaType = NULL;
IUnknown *spTransformUnk = NULL;
IMFTransform *pTransform = NULL; //< this is H264 Encoder MFT
IWMResamplerProps *spResamplerProps = NULL;
IMFMediaType *pMFTInputMediaType = NULL, *pMFTOutputMediaType = NULL;
IMFSinkWriter *pWriter;
IMFMediaType *pVideoOutType = NULL;
DWORD writerVideoStreamIndex = 0;
DWORD mftStatus = 0;
UINT8 blob[] = { 0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0xc0, 0x1e, 0x96, 0x54, 0x05, 0x01,
0xe9, 0x80, 0x80, 0x40, 0x00, 0x00, 0x00, 0x01, 0x68, 0xce, 0x3c, 0x80 };
CoInitializeEx(NULL, COINIT_APARTMENTTHREADED | COINIT_DISABLE_OLE1DDE);
MFStartup(MF_VERSION);
// Get the first available webcam.
CHECK_HR(MFCreateAttributes(&videoConfig, 1), "Error creating video configuation.\n");
// Request video capture devices.
CHECK_HR(videoConfig->SetGUID(
MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE,
MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID), "Error initialising video configuration object.");
CHECK_HR(MFEnumDeviceSources(videoConfig, &videoDevices, &videoDeviceCount), "Error enumerating video devices.\n");
CHECK_HR(videoDevices[WEBCAM_DEVICE_INDEX]->GetAllocatedString(MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME, &webcamFriendlyName, NULL), "Error retrieving vide device friendly name.\n");
wprintf(L"First available webcam: %s\n", webcamFriendlyName);
CHECK_HR(videoDevices[WEBCAM_DEVICE_INDEX]->ActivateObject(IID_PPV_ARGS(&videoSource)), "Error activating video device.\n");
// Create a source reader.
CHECK_HR(MFCreateSourceReaderFromMediaSource(
videoSource,
videoConfig,
&videoReader), "Error creating video source reader.\n");
CHECK_HR(videoReader->GetCurrentMediaType(
(DWORD)MF_SOURCE_READER_FIRST_VIDEO_STREAM,
&videoSourceOutputType), "Error retrieving current media type from first video stream.\n");
// Note the webcam needs to support this media type. The list of media types supported can be obtained using the ListTypes function in MFUtility.h.
MFCreateMediaType(&pSrcOutMediaType);
pSrcOutMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
pSrcOutMediaType->SetGUID(MF_MT_SUBTYPE, WMMEDIASUBTYPE_I420);
MFSetAttributeSize(pSrcOutMediaType, MF_MT_FRAME_SIZE, 640, 480);
CHECK_HR(videoReader->SetCurrentMediaType(0, NULL, pSrcOutMediaType), "Failed to set media type on source reader.\n");
// Create H.264 encoder.
CHECK_HR(CoCreateInstance(CLSID_CMSH264EncoderMFT, NULL, CLSCTX_INPROC_SERVER,
IID_IUnknown, (void**)&spTransformUnk), "Failed to create H264 encoder MFT.\n");
CHECK_HR(spTransformUnk->QueryInterface(IID_PPV_ARGS(&pTransform)), "Failed to get IMFTransform interface from H264 encoder MFT object.\n");
MFCreateMediaType(&pMFTOutputMediaType);
pMFTOutputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
pMFTOutputMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264);
pMFTOutputMediaType->SetUINT32(MF_MT_AVG_BITRATE, 240000);
CHECK_HR(MFSetAttributeSize(pMFTOutputMediaType, MF_MT_FRAME_SIZE, 640, 480), "Failed to set frame size on H264 MFT out type.\n");
CHECK_HR(MFSetAttributeRatio(pMFTOutputMediaType, MF_MT_FRAME_RATE, 30, 1), "Failed to set frame rate on H264 MFT out type.\n");
CHECK_HR(MFSetAttributeRatio(pMFTOutputMediaType, MF_MT_PIXEL_ASPECT_RATIO, 1, 1), "Failed to set aspect ratio on H264 MFT out type.\n");
pMFTOutputMediaType->SetUINT32(MF_MT_INTERLACE_MODE, 2);
pMFTOutputMediaType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);
CHECK_HR(pTransform->SetOutputType(0, pMFTOutputMediaType, 0), "Failed to set output media type on H.264 encoder MFT.\n");
MFCreateMediaType(&pMFTInputMediaType);
pMFTInputMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
pMFTInputMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_IYUV);
CHECK_HR(MFSetAttributeSize(pMFTInputMediaType, MF_MT_FRAME_SIZE, 640, 480), "Failed to set frame size on H264 MFT out type.\n");
CHECK_HR(MFSetAttributeRatio(pMFTInputMediaType, MF_MT_FRAME_RATE, 30, 1), "Failed to set frame rate on H264 MFT out type.\n");
CHECK_HR(MFSetAttributeRatio(pMFTInputMediaType, MF_MT_PIXEL_ASPECT_RATIO, 1, 1), "Failed to set aspect ratio on H264 MFT out type.\n");
pMFTInputMediaType->SetUINT32(MF_MT_INTERLACE_MODE, 2);
CHECK_HR(pTransform->SetInputType(0, pMFTInputMediaType, 0), "Failed to set input media type on H.264 encoder MFT.\n");
CHECK_HR(pTransform->GetInputStatus(0, &mftStatus), "Failed to get input status from H.264 MFT.\n");
if (MFT_INPUT_STATUS_ACCEPT_DATA != mftStatus) {
printf("E: ApplyTransform() pTransform->GetInputStatus() not accept data.\n");
goto done;
}
CHECK_HR(pTransform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL), "Failed to process FLUSH command on H.264 MFT.\n");
CHECK_HR(pTransform->ProcessMessage(MFT_MESSAGE_NOTIFY_BEGIN_STREAMING, NULL), "Failed to process BEGIN_STREAMING command on H.264 MFT.\n");
CHECK_HR(pTransform->ProcessMessage(MFT_MESSAGE_NOTIFY_START_OF_STREAM, NULL), "Failed to process START_OF_STREAM command on H.264 MFT.\n");
// Create the MP4 sink writer.
CHECK_HR(MFCreateSinkWriterFromURL(
CAPTURE_FILENAME,
NULL,
NULL,
&pWriter), "Error creating mp4 sink writer.");
CHECK_HR(MFTRegisterLocalByCLSID(
__uuidof(CColorConvertDMO),
MFT_CATEGORY_VIDEO_PROCESSOR,
L"",
MFT_ENUM_FLAG_SYNCMFT,
0,
NULL,
0,
NULL
), "Error registering colour converter DSP.\n");
// Configure the output video type on the sink writer.
CHECK_HR(MFCreateMediaType(&pVideoOutType), "Configure encoder failed to create media type for video output sink.");
CHECK_HR(pVideoOutType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video), "Failed to set video writer attribute, media type.");
CHECK_HR(pVideoOutType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264), "Failed to set video writer attribute, video format (H.264).");
CHECK_HR(pVideoOutType->SetUINT32(MF_MT_AVG_BITRATE, 240 * 1000), "Failed to set video writer attribute, bit rate.");
CHECK_HR(CopyAttribute(videoSourceOutputType, pVideoOutType, MF_MT_FRAME_SIZE), "Failed to set video writer attribute, frame size.");
CHECK_HR(CopyAttribute(videoSourceOutputType, pVideoOutType, MF_MT_FRAME_RATE), "Failed to set video writer attribute, frame rate.");
CHECK_HR(CopyAttribute(videoSourceOutputType, pVideoOutType, MF_MT_PIXEL_ASPECT_RATIO), "Failed to set video writer attribute, aspect ratio.");
CHECK_HR(CopyAttribute(videoSourceOutputType, pVideoOutType, MF_MT_INTERLACE_MODE), "Failed to set video writer attribute, interlace mode.");
// See http://stackoverflow.com/questions/24411737/media-foundation-imfsinkwriterfinalize-method-fails-under-windows-7-when-mux
CHECK_HR(pVideoOutType->SetBlob(MF_MT_MPEG_SEQUENCE_HEADER, blob, 24), "Failed to set MF_MT_MPEG_SEQUENCE_HEADER.\n");
CHECK_HR(pWriter->AddStream(pVideoOutType, &writerVideoStreamIndex), "Failed to add the video stream to the sink writer.");
pVideoOutType->Release();
//CHECK_HR(pWriter->SetInputMediaType(writerVideoStreamIndex, videoSourceOutputType, NULL), "Error setting the sink writer video input type.\n");
// Ready to go.
CHECK_HR(pWriter->BeginWriting(), "Sink writer begin writing call failed.\n");
printf("Reading video samples from webcam.\n");
MFT_OUTPUT_DATA_BUFFER outputDataBuffer;
DWORD processOutputStatus = 0;
IMFSample *videoSample = NULL;
DWORD streamIndex, flags;
LONGLONG llVideoTimeStamp, llSampleDuration;
HRESULT mftProcessInput = S_OK;
HRESULT mftProcessOutput = S_OK;
MFT_OUTPUT_STREAM_INFO StreamInfo;
IMFSample *mftOutSample = NULL;
IMFMediaBuffer *pBuffer = NULL;
//DWORD cbOutBytes = 0;
int sampleCount = 0;
DWORD mftOutFlags;
memset(&outputDataBuffer, 0, sizeof outputDataBuffer);
while (sampleCount <= SAMPLE_COUNT)
{
CHECK_HR(videoReader->ReadSample(
MF_SOURCE_READER_FIRST_VIDEO_STREAM,
0, // Flags.
&streamIndex, // Receives the actual stream index.
&flags, // Receives status flags.
&llVideoTimeStamp, // Receives the time stamp.
&videoSample // Receives the sample or NULL.
), "Error reading video sample.");
if (flags & MF_SOURCE_READERF_STREAMTICK)
{
printf("Stream tick.\n");
pWriter->SendStreamTick(0, llVideoTimeStamp);
}
if (videoSample)
{
CHECK_HR(videoSample->SetSampleTime(llVideoTimeStamp), "Error setting the video sample time.\n");
CHECK_HR(videoSample->GetSampleDuration(&llSampleDuration), "Error getting video sample duration.\n");
// Pass the video sample to the H.264 transform.
CHECK_HR(pTransform->ProcessInput(0, videoSample, 0), "The resampler H264 ProcessInput call failed.\n");
CHECK_HR(pTransform->GetOutputStatus(&mftOutFlags), "H264 MFT GetOutputStatus failed.\n");
if (mftOutFlags == MFT_OUTPUT_STATUS_SAMPLE_READY)
{
CHECK_HR(pTransform->GetOutputStreamInfo(0, &StreamInfo), "Failed to get output stream info from H264 MFT.\n");
while (true)
{
CHECK_HR(MFCreateSample(&mftOutSample), "Failed to create MF sample.\n");
CHECK_HR(MFCreateMemoryBuffer(StreamInfo.cbSize, &pBuffer), "Failed to create memory buffer.\n");
CHECK_HR(mftOutSample->AddBuffer(pBuffer), "Failed to add sample to buffer.\n");
outputDataBuffer.dwStreamID = 0;
outputDataBuffer.dwStatus = 0;
outputDataBuffer.pEvents = NULL;
outputDataBuffer.pSample = mftOutSample;
mftProcessOutput = pTransform->ProcessOutput(0, 1, &outputDataBuffer, &processOutputStatus);
if (mftProcessOutput != MF_E_TRANSFORM_NEED_MORE_INPUT)
{
CHECK_HR(outputDataBuffer.pSample->SetSampleTime(llVideoTimeStamp), "Error setting MFT sample time.\n");
CHECK_HR(outputDataBuffer.pSample->SetSampleDuration(llSampleDuration), "Error setting MFT sample duration.\n");
IMFMediaBuffer *buf = NULL;
DWORD bufLength;
CHECK_HR(mftOutSample->ConvertToContiguousBuffer(&buf), "ConvertToContiguousBuffer failed.\n");
CHECK_HR(buf->GetCurrentLength(&bufLength), "Get buffer length failed.\n");
printf("Writing sample %i, sample time %I64d, sample duration %I64d, sample size %i.\n", sampleCount, llVideoTimeStamp, llSampleDuration, bufLength);
CHECK_HR(pWriter->WriteSample(writerVideoStreamIndex, outputDataBuffer.pSample), "The stream sink writer was not happy with the sample.\n");
}
else {
break;
}
pBuffer->Release();
mftOutSample->Release();
}
}
}
sampleCount++;
}
printf("Finalising the capture.\n");
if (pWriter)
{
// See http://stackoverflow.com/questions/24411737/media-foundation-imfsinkwriterfinalize-method-fails-under-windows-7-when-mux for why the Finalize call can fail with MF_E_ATTRIBUTENOTFOUND .
CHECK_HR(pWriter->Finalize(), "Error finalising H.264 sink writer.\n");
}
done:
printf("finished.\n");
getchar();
return 0;
}

Related

Encoding of raw frames (D3D11Texture2D) to an rtsp stream using libav*

I have managed to create a rtsp stream using libav* and directX texture (which I am obtaining from GDI API using Bitblit method). Here's my approach for creating live rtsp stream:
Create output context and stream (skipping the checks here)
avformat_alloc_output_context2(&ofmt_ctx, NULL, "rtsp", rtsp_url); //RTSP
vid_codec = avcodec_find_encoder(ofmt_ctx->oformat->video_codec);
vid_stream = avformat_new_stream(ofmt_ctx,vid_codec);
vid_codec_ctx = avcodec_alloc_context3(vid_codec);
Set codec params
codec_ctx->codec_tag = 0;
codec_ctx->codec_id = ofmt_ctx->oformat->video_codec;
//codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
codec_ctx->width = width; codec_ctx->height = height;
codec_ctx->gop_size = 12;
//codec_ctx->gop_size = 40;
//codec_ctx->max_b_frames = 3;
codec_ctx->pix_fmt = target_pix_fmt; // AV_PIX_FMT_YUV420P
codec_ctx->framerate = { stream_fps, 1 };
codec_ctx->time_base = { 1, stream_fps};
if (fctx->oformat->flags & AVFMT_GLOBALHEADER)
{
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
Initialize video stream
if (avcodec_parameters_from_context(stream->codecpar, codec_ctx) < 0)
{
Debug::Error("Could not initialize stream codec parameters!");
return false;
}
AVDictionary* codec_options = nullptr;
if (codec->id == AV_CODEC_ID_H264) {
av_dict_set(&codec_options, "profile", "high", 0);
av_dict_set(&codec_options, "preset", "fast", 0);
av_dict_set(&codec_options, "tune", "zerolatency", 0);
}
// open video encoder
int ret = avcodec_open2(codec_ctx, codec, &codec_options);
if (ret<0) {
Debug::Error("Could not open video encoder: ", avcodec_get_name(codec->id), " error ret: ", AVERROR(ret));
return false;
}
stream->codecpar->extradata = codec_ctx->extradata;
stream->codecpar->extradata_size = codec_ctx->extradata_size;
Start streaming
// Create new frame and allocate buffer
AVFrame* AllocateFrameBuffer(AVCodecContext* codec_ctx, double width, double height)
{
AVFrame* frame = av_frame_alloc();
std::vector<uint8_t> framebuf(av_image_get_buffer_size(codec_ctx->pix_fmt, width, height, 1));
av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), codec_ctx->pix_fmt, width, height, 1);
frame->width = width;
frame->height = height;
frame->format = static_cast<int>(codec_ctx->pix_fmt);
//Debug::Log("framebuf size: ", framebuf.size(), " frame format: ", frame->format);
return frame;
}
void RtspStream(AVFormatContext* ofmt_ctx, AVStream* vid_stream, AVCodecContext* vid_codec_ctx, char* rtsp_url)
{
printf("Output stream info:\n");
av_dump_format(ofmt_ctx, 0, rtsp_url, 1);
const int width = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetTextureWidth();
const int height = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetTextureHeight();
//DirectX BGRA to h264 YUV420p
SwsContext* conversion_ctx = sws_getContext(width, height, src_pix_fmt,
vid_stream->codecpar->width, vid_stream->codecpar->height, target_pix_fmt,
SWS_BICUBIC | SWS_BITEXACT, nullptr, nullptr, nullptr);
if (!conversion_ctx)
{
Debug::Error("Could not initialize sample scaler!");
return;
}
AVFrame* frame = AllocateFrameBuffer(vid_codec_ctx,vid_codec_ctx->width,vid_codec_ctx->height);
if (!frame) {
Debug::Error("Could not allocate video frame\n");
return;
}
if (avformat_write_header(ofmt_ctx, NULL) < 0) {
Debug::Error("Error occurred when writing header");
return;
}
if (av_frame_get_buffer(frame, 0) < 0) {
Debug::Error("Could not allocate the video frame data\n");
return;
}
int frame_cnt = 0;
//av start time in microseconds
int64_t start_time_av = av_gettime();
AVRational time_base = vid_stream->time_base;
AVRational time_base_q = { 1, AV_TIME_BASE };
// frame pixel data info
int data_size = width * height * 4;
uint8_t* data = new uint8_t[data_size];
// AVPacket* pkt = av_packet_alloc();
while (RtspStreaming::IsStreaming())
{
/* make sure the frame data is writable */
if (av_frame_make_writable(frame) < 0)
{
Debug::Error("Can't make frame writable");
break;
}
//get copy/ref of the texture
//uint8_t* data = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetBuffer();
if (!WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetPixels(data, 0, 0, width, height))
{
Debug::Error("Failed to get frame buffer. ID: ", RtspStreaming::WindowId());
std::this_thread::sleep_for (std::chrono::seconds(2));
continue;
}
//printf("got pixels data\n");
// convert BGRA to yuv420 pixel format
int srcStrides[1] = { 4 * width };
if (sws_scale(conversion_ctx, &data, srcStrides, 0, height, frame->data, frame->linesize) < 0)
{
Debug::Error("Unable to scale d3d11 texture to frame. ", frame_cnt);
break;
}
//Debug::Log("frame pts: ", frame->pts, " time_base:", av_rescale_q(1, vid_codec_ctx->time_base, vid_stream->time_base));
frame->pts = frame_cnt++;
//frame_cnt++;
//printf("scale conversion done\n");
//encode to the video stream
int ret = avcodec_send_frame(vid_codec_ctx, frame);
if (ret < 0)
{
Debug::Error("Error sending frame to codec context! ",frame_cnt);
break;
}
AVPacket* pkt = av_packet_alloc();
//av_init_packet(pkt);
ret = avcodec_receive_packet(vid_codec_ctx, pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
{
//av_packet_unref(pkt);
av_packet_free(&pkt);
continue;
}
else if (ret < 0)
{
Debug::Error("Error during receiving packet: ",AVERROR(ret));
//av_packet_unref(pkt);
av_packet_free(&pkt);
break;
}
if (pkt->pts == AV_NOPTS_VALUE)
{
//Write PTS
//Duration between 2 frames (us)
int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(vid_stream->r_frame_rate);
//Parameters
pkt->pts = (double)(frame_cnt * calc_duration) / (double)(av_q2d(time_base) * AV_TIME_BASE);
pkt->dts = pkt->pts;
pkt->duration = (double)calc_duration / (double)(av_q2d(time_base) * AV_TIME_BASE);
}
int64_t pts_time = av_rescale_q(pkt->dts, time_base, time_base_q);
int64_t now_time = av_gettime() - start_time_av;
if (pts_time > now_time)
av_usleep(pts_time - now_time);
//pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
//pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
//pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
//pkt->pos = -1;
//write frame and send
if (av_interleaved_write_frame(ofmt_ctx, pkt)<0)
{
Debug::Error("Error muxing packet, frame number:",frame_cnt);
break;
}
//Debug::Log("RTSP streaming...");
//sstd::this_thread::sleep_for(std::chrono::milliseconds(1000/20));
//av_packet_unref(pkt);
av_packet_free(&pkt);
}
//av_free_packet(pkt);
delete[] data;
/* Write the trailer, if any. The trailer must be written before you
* close the CodecContexts open when you wrote the header; otherwise
* av_write_trailer() may try to use memory that was freed on
* av_codec_close(). */
av_write_trailer(ofmt_ctx);
av_frame_unref(frame);
av_frame_free(&frame);
printf("streaming thread CLOSED!\n");
}
Now, this allows me to connect to my rtsp server and maintain the connection. However, on the rtsp client side I am getting either gray or single static frame as shown below:
Would appreciate if you can help with following questions:
Firstly, why the stream is not working in spite of continued connection to the server and updating frames?
Video codec. By default rtsp format uses Mpeg4 codec, is it possible to use h264? When I manually set it to AV_CODEC_ID_H264 the program fails at avcodec_open2 with return value of -22.
Do I need to create and allocate new "AVFrame" and "AVPacket" for every frame? Or can I just reuse global variable for this?
Do I need to explicitly define some code for real-time streaming? (Like in ffmpeg we use "-re" flag).
Would be great if you can point out some example code for creating livestream. I have checked following resources:
https://github.com/FFmpeg/FFmpeg/blob/master/doc/examples/encode_video.c
streaming FLV to RTMP with FFMpeg using H264 codec and C++ API to flv.js
https://medium.com/swlh/streaming-video-with-ffmpeg-and-directx-11-7395fcb372c4
Update
While test I found that I am able to play the stream using ffplay, while it's getting stuck on VLC player. Here is snapshot on the ffplay log
The basic construct and initialization seems to be okay. Find below responses to your questions
why the stream is not working in spite of continued connection to the server and updating frames?
If you're getting an error or broken stream, you might wanna check into your presentation and decompression timestamps (pts/dts) of your packet.
In your code, I notice that you're taking time_base from video stream object which is not guranteed to be same as codec->time_base value and usually varies depending upon active stream.
AVRational time_base = vid_stream->time_base;
AVRational time_base_q = { 1, AV_TIME_BASE };
Video codec. By default rtsp format uses Mpeg4 codec, is it possible to use h264?
I don't see why not... RTSP is just a protocol for carrying your packets over the network. So you should be able use AV_CODEC_ID_H264 for encoding the stream.
Do I need to create and allocate new "AVFrame" and "AVPacket" for every frame? Or can I just reuse global variable for this?
In libav during encoding process a single packet is used for encoding a video frame, while there can be multiple audio frames in a single packet. I should reference this, but can't seem to find any source at the moment. But anyways the point is you would need to create new packet every time.
Do I need to explicitly define some code for real-time streaming? (Like in ffmpeg we use "-re" flag).
You don't need to add anything else for real time streaming. Although you might wanna implement it to limit the number of frame updates that you pass to encoder and save some performance.
for me the difference between ffplay good capture and VLC bad capture (for UDP packets) was pkt_size=xxx attribute (ffmpeg -re -i test.mp4 -f mpegts udp://127.0.0.1:23000?pkt_size=1316) (VLC open media network tab udp://#:23000:pkt_size=1316). So only if pkt_size is defined (and equal) VLC is able to capture.

FFmpeg avcodec_decode_video2 decode RTSP H264 HD-video packet to video picture with error

I used FFmpeg library version 4.0 to have simple C++ program, in witch is a thread to receive RTSP H264 video data from IP-camera and display it in program window.
Code of this thread is follow:
DWORD WINAPI GrabbProcess(LPVOID lpParam)
// Grabbing thread
{
DWORD i;
int ret = 0, nPacket=0;
FILE *pktFile;
// Open video file
pFormatCtx = avformat_alloc_context();
if(avformat_open_input(&pFormatCtx, nameVideoStream, NULL, NULL)!=0)
fGrabb=-1; // Couldn't open file
else
// Retrieve stream information
if(avformat_find_stream_info(pFormatCtx, NULL)<0)
fGrabb=-2; // Couldn't find stream information
else
{
// Find the first video stream
videoStream=-1;
for(i=0; i<pFormatCtx->nb_streams; i++)
if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO)
{
videoStream=i;
break;
}
if(videoStream==-1)
fGrabb=-3; // Didn't find a video stream
else
{
// Get a pointer to the codec context for the video stream
pCodecCtxOrig=pFormatCtx->streams[videoStream]->codec;
// Find the decoder for the video stream
pCodec=avcodec_find_decoder(pCodecCtxOrig->codec_id);
if(pCodec==NULL)
fGrabb=-4; // Codec not found
else
{
// Copy context
pCodecCtx = avcodec_alloc_context3(pCodec);
if(avcodec_copy_context(pCodecCtx, pCodecCtxOrig) != 0)
fGrabb=-5; // Error copying codec context
else
{
// Open codec
if(avcodec_open2(pCodecCtx, pCodec, NULL)<0)
fGrabb=-6; // Could not open codec
else
// Allocate video frame for input
pFrame=av_frame_alloc();
// Determine required buffer size and allocate buffer
numBytes=avpicture_get_size(pCodecCtx->pix_fmt, pCodecCtx->width,
pCodecCtx->height);
buffer=(uint8_t *)av_malloc(numBytes*sizeof(uint8_t));
// Assign appropriate parts of buffer to image planes in pFrame
// Note that pFrame is an AVFrame, but AVFrame is a superset
// of AVPicture
avpicture_fill((AVPicture *)pFrame, buffer, pCodecCtx->pix_fmt,
pCodecCtx->width, pCodecCtx->height);
// Allocate video frame for display
pFrameRGB=av_frame_alloc();
// Determine required buffer size and allocate buffer
numBytes=avpicture_get_size(AV_PIX_FMT_RGB24, pCodecCtx->width,
pCodecCtx->height);
bufferRGB=(uint8_t *)av_malloc(numBytes*sizeof(uint8_t));
// Assign appropriate parts of buffer to image planes in pFrameRGB
// Note that pFrameRGB is an AVFrame, but AVFrame is a superset
// of AVPicture
avpicture_fill((AVPicture *)pFrameRGB, bufferRGB, AV_PIX_FMT_RGB24,
pCodecCtx->width, pCodecCtx->height);
// initialize SWS context for software scaling to FMT_RGB24
sws_ctx_to_RGB = sws_getContext(pCodecCtx->width,
pCodecCtx->height,
pCodecCtx->pix_fmt,
pCodecCtx->width,
pCodecCtx->height,
AV_PIX_FMT_RGB24,
SWS_BILINEAR,
NULL,
NULL,
NULL);
// Allocate video frame (grayscale YUV420P) for processing
pFrameYUV=av_frame_alloc();
// Determine required buffer size and allocate buffer
numBytes=avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx->width,
pCodecCtx->height);
bufferYUV=(uint8_t *)av_malloc(numBytes*sizeof(uint8_t));
// Assign appropriate parts of buffer to image planes in pFrameYUV
// Note that pFrameYUV is an AVFrame, but AVFrame is a superset
// of AVPicture
avpicture_fill((AVPicture *)pFrameYUV, bufferYUV, AV_PIX_FMT_YUV420P,
pCodecCtx->width, pCodecCtx->height);
// initialize SWS context for software scaling to FMT_YUV420P
sws_ctx_to_YUV = sws_getContext(pCodecCtx->width,
pCodecCtx->height,
pCodecCtx->pix_fmt,
pCodecCtx->width,
pCodecCtx->height,
AV_PIX_FMT_YUV420P,
SWS_BILINEAR,
NULL,
NULL,
NULL);
RealBsqHdr.biWidth = pCodecCtx->width;
RealBsqHdr.biHeight = -pCodecCtx->height;
}
}
}
}
while ((fGrabb==1)||(fGrabb==100))
{
// Grabb a frame
if (av_read_frame(pFormatCtx, &packet) >= 0)
{
// Is this a packet from the video stream?
if(packet.stream_index==videoStream)
{
// Decode video frame
int len = avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);
nPacket++;
// Did we get a video frame?
if(frameFinished)
{
// Convert the image from its native format to YUV
sws_scale(sws_ctx_to_YUV, (uint8_t const * const *)pFrame->data,
pFrame->linesize, 0, pCodecCtx->height,
pFrameYUV->data, pFrameYUV->linesize);
// Convert the image from its native format to RGB
sws_scale(sws_ctx_to_RGB, (uint8_t const * const *)pFrame->data,
pFrame->linesize, 0, pCodecCtx->height,
pFrameRGB->data, pFrameRGB->linesize);
HDC hdc=GetDC(hWndM);
SetDIBitsToDevice(hdc, 0, 0, pCodecCtx->width, pCodecCtx->height,
0, 0, 0, pCodecCtx->height,pFrameRGB->data[0], (LPBITMAPINFO)&RealBsqHdr, DIB_RGB_COLORS);
ReleaseDC(hWndM,hdc);
av_frame_unref(pFrame);
}
}
// Free the packet that was allocated by av_read_frame
av_free_packet(&packet);
}
}
// Free the org frame
av_frame_free(&pFrame);
// Free the RGB frame
av_frame_free(&pFrameRGB);
// Free the YUV frame
av_frame_free(&pFrameYUV);
// Close the codec
avcodec_close(pCodecCtx);
avcodec_close(pCodecCtxOrig);
// Close the video file
avformat_close_input(&pFormatCtx);
avformat_free_context(pFormatCtx);
if (fGrabb==1)
sprintf(tmpstr,"Grabbing Completed %d frames", nCntTotal);
else if (fGrabb==2)
sprintf(tmpstr,"User break on %d frames", nCntTotal);
else if (fGrabb==3)
sprintf(tmpstr,"Can't Grabb at frame %d", nCntTotal);
else if (fGrabb==-1)
sprintf(tmpstr,"Couldn't open file");
else if (fGrabb==-2)
sprintf(tmpstr,"Couldn't find stream information");
else if (fGrabb==-3)
sprintf(tmpstr,"Didn't find a video stream");
else if (fGrabb==-4)
sprintf(tmpstr,"Codec not found");
else if (fGrabb==-5)
sprintf(tmpstr,"Error copying codec context");
else if (fGrabb==-6)
sprintf(tmpstr,"Could not open codec");
i=(UINT) fGrabb;
fGrabb=0;
SetWindowText(hWndM,tmpstr);
ExitThread(i);
return 0;
}
// End Grabbing thread
When program receive RTSP H264 video data with resolution 704x576 then decoded video pictures are OK. When receive RTSP H264 HD-video data with resolution 1280x720 it look like that first video picture is decoded OK and then video pictures are decoded but always with some error.
Please help me to fix this problem!
Here is problems brief :
I have an IP camera model HI3518E_50H10L_S39 (product of China).
Camera can provide H264 video stream both at resolution 704x576 (with RTSP URI "rtsp://192.168.1.18:554/user=admin_password=tlJwpbo6_channel=1_stream=1.sdp?real_stream") or 1280x720 (with RTSP URI "rtsp://192.168.1.18:554/user=admin_password=tlJwpbo6_channel=1_stream=0.sdp?real_stream").
Using FFplay utility I can access and display them with good picture quality.
For testing of grabbing from this camera, I have a simple (above mentioned) program in VC-2005. In "Grabbing thread" program use FFmpeg library version 4.0 for opening camera RTSP stream, retrieve stream information, find the first video stream... and prepare some variables.
Center of this thread is loop: Grab a frame (function av_read_frame) - Decode it if it's video (function avcodec_decode_video2) - Convert to RGB format (function sws_scale) - Display to program window (GDI function SetDIBitsToDevice).
When proram run with camera RTSP stream at resolution 704x576, I have good video picture. Here is a sample:
704x576 sample
When program run with camera RTSP stream at resolution 1280x720, first video picture is good:
First good at res.1280x720
but then not good:
not good at res.1280x720
Its seem to be my FFmpeg function call to avcodec_decode_video2 can't fully decode certain packet for some reasons.

Transcoding videos with LibAvFormat for playback on iOS devices

I’m trying to transcode a video on my iOS app using FFMpeg/LibAv.
What I’m trying to accomplish is to transcode a video in order to resize each frame and possibly lower the bitrate in order to save valuable MB in the device.
The resulting video must be playable on all iPhone5+ devices.
After reading the documentation I found out that:
I do not need to encode/decode the audio stream -> I’ll copy as-is to the output file
I need to encode the video using the h264 codec (LibX264) with a profile supported by iOS (baseline profile with level 3.0 - https://trac.ffmpeg.org/wiki/Encode/H.264#Compatibility)
I’m also setting the picture format to YUV planar since it’s the only one supported by iOS
For the sake of testing I’m not using any filter (I’m using a dummy/passthrough) at all or even trying to lower the bitrate, I’m just trying to decode the video stream and encode it again
Most of the code is based on the transcoding.c and filtering.c available on the FFMpeg examples directory
FFMpeg-wise what I’m trying to achieve with LibAv is:
ffmpeg -i INPUT.MOV -c:v libx264 -preset ultrafast -profile:v baseline -level 3.0 -c:a copy output.MOV
(the resulting file - which can be found below - is playable on QuickTime if it’s generated by FFMpeg through the command line)
The original video was generated with a regular iPhone using iOS 8.2 but the problem is not device specific or iOS specific, it occurs on all videos generated with LibAv.
Although both resulting files are playable by VideoLan (VLC) the one I generated through LibAv is not playable by QuickTime even though I can’t find anything wrong with it.
As you can see below, I create the video stream with the proper video codec on the call to avformat_new_stream:
AVStream *out_stream; // output stream
AVStream *in_stream; // input stream
AVCodecContext *dec_ctx, *enc_ctx; // codec context for the stream
AVCodec *encoder; // codec used
int ret;
unsigned int i;
ofmt_ctx = NULL;
// Allocate an AVFormatContext for an output format. This will be the file header (similar to avformat_open_input but with an zero'ed memory)
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, filename);
if (!ofmt_ctx) {
av_log(NULL, AV_LOG_ERROR, "Could not create output context\n");
[self errorWith:kErrorCreatingOutputContext and:#"Could not create output context"];
return AVERROR_UNKNOWN;
}
// we must not use the AVCodecContext from the video stream directly! So we have to use avcodec_copy_context() to copy the context to a new location (after allocating memory for it, of course).
// iterate over all input streams
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
in_stream = ifmt_ctx->streams[i]; // input stream
dec_ctx = in_stream->codec; // get the codec context for the decoder
if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
// lets use h264
encoder = avcodec_find_encoder(AV_CODEC_ID_H264);
if (!encoder) {
[self errorWith:kErrorCodecNotFound and:#"H264 Codec Not Found"];
return AVERROR_UNKNOWN;
}
out_stream = avformat_new_stream(ofmt_ctx, encoder); // create a new stream with h264 codec
if (!out_stream) {
av_log(NULL, AV_LOG_ERROR, "Failed allocating output stream\n");
[self errorWith:kErrorAllocateOutputStream and:#"Failed allocating output stream"];
return AVERROR_UNKNOWN;
}
enc_ctx = out_stream->codec; // pointer to the stream codec context
/* we transcode to same properties (picture size,
* sample rate etc.). These properties can be changed for output
* streams easily using filters */
if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
enc_ctx->width = dec_ctx->width;
enc_ctx->height = dec_ctx->height;
enc_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
enc_ctx->pix_fmt = AV_PIX_FMT_YUV420P;
enc_ctx->time_base = dec_ctx->time_base;
av_opt_set(enc_ctx->priv_data, "preset", "ultrafast", 0);
av_opt_set(enc_ctx->priv_data, "profile", "baseline", 0);
av_opt_set(enc_ctx->priv_data, "level", "3.0", 0);
}
out_stream->time_base = in_stream->time_base;
AVDictionaryEntry *tag = NULL;
while ((tag = av_dict_get(in_stream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
printf("%s=%s\n", tag->key, tag->value);
char *k = av_strdup(tag->key); // if your strings are already allocated,
char *v = av_strdup(tag->value); // you can avoid copying them like this
av_dict_set(&out_stream->metadata, k, v, 0);
}
ret = avcodec_open2(enc_ctx, encoder, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open video encoder for stream #%u\n", i);
[self errorWith:kErrorCantOpenOutputFile and:[NSString stringWithFormat:#"Cannot open video encoder for stream #%u",i]];
return ret;
}
}
else if(dec_ctx->codec_type == AVMEDIA_TYPE_UNKNOWN) {
// if we cant figure out the stream type, fail
av_log(NULL, AV_LOG_FATAL, "Elementary stream #%d is of unknown type, cannot proceed\n", i);
[self errorWith:kErrorUnknownStream and:[NSString stringWithFormat:#"Elementary stream #%d is of unknown type, cannot proceed",i]];
return AVERROR_INVALIDDATA;
}
else {
out_stream = avformat_new_stream(ofmt_ctx, NULL);
if (!out_stream) {
av_log(NULL, AV_LOG_ERROR, "Failed allocating output stream\n");
[self errorWith:kErrorAllocateOutputStream and:#"Failed allocating output stream"];
return AVERROR_UNKNOWN;
}
enc_ctx = out_stream->codec;
/* this stream must be remuxed */
// copies ifmt_ctx->streams[i]->codec into ofmt_ctx->streams[i]->codec - Copy the settings of the source AVCodecContext into the destination AVCodecContext.
ret = avcodec_copy_context(ofmt_ctx->streams[i]->codec,
ifmt_ctx->streams[i]->codec);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Copying stream context failed\n");
[self errorWith:kErrorCopyStreamFailed and:#"Copying stream context failed"];
return ret;
}
}
// dunno what this is for
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
if (!(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
// Create and initialize a AVIOContext for accessing the
// resource indicated by url.
ret = avio_open(&ofmt_ctx->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not open output file '%s'", filename);
[self errorWith:kErrorCantOpenOutputFile and:[NSString stringWithFormat:#"Could not open output file '%s'", filename]];
return ret;
}
}
/* init muxer, write output file header */
// Allocate the stream private data and write the stream header to an output media file.
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error occurred when opening output file\n");
[self errorWith:kErrorOutFileCantWriteHeader and:#"Error occurred when opening output file"];
return ret;
}
return 0;
You can find the files here:
Original final: https://www.dropbox.com/s/2jjs1uy2pu2veyy/IMG_5705.MOV?dl=0
File generated with FFMpeg - https://www.dropbox.com/s/9hfmq3fcifgpfqc/local-ffmpeg.MOV?dl=0
File generated by code - https://www.dropbox.com/s/rttvny39rj7ejpf/generated-by-Ze.MOV?dl=0
Thank you so much,
Ze

Convert YUV frames into RGBA frames with FFMPEG

I would like to develop an application which would be able to convert YUV frames into RGBA frames using the ffmpeg library.
I have begun writing this code:
void Decode::video_encode_example(const char *filename, int codec_id)
{
AVCodec *codec;
AVCodecContext *c= NULL;
int i, ret, x, y, got_output;
FILE *f;
AVFrame *frame;
AVPacket pkt;
uint8_t endcode[] = { 0, 0, 1, 0xb7 };
printf("Encode video file %s\n", filename);
/* find the mpeg1 video encoder */
codec = avcodec_find_encoder((enum AVCodecID)codec_id);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(2);
}
/* put sample parameters */
c->bit_rate = 400000;
/* resolution must be a multiple of two */
c->width = 352; // Avant c'était du 352x288
c->height = 288;
/* frames per second */
c->time_base = (AVRational){1,25};
/* emit one intra frame every ten frames
* check frame pict_type before passing frame
* to encoder, if frame->pict_type is AV_PICTURE_TYPE_I
* then gop_size is ignored and the output of encoder
* will always be I frame irrespective to gop_size
*/
c->gop_size = 10;
c->max_b_frames = 1;
printf("Avant\n");
c->pix_fmt = PIX_FMT_RGBA;// Avant c'était AV_PIX_FMT_YUV420P
printf("Après\n");
if (codec_id == AV_CODEC_ID_H264)
av_opt_set(c->priv_data, "preset", "slow", 0);
/* open it */
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(3);
}
f = fopen(filename, "wb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(4);
}
frame = avcodec_alloc_frame();// Dans une version plus récente c'est av_frame_alloc
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(5);
}
frame->format = c->pix_fmt;
frame->width = c->width;
frame->height = c->height;
However, each time I run this application, the following error appears in my Linux terminal:
[mpeg2video # 0x10c7040] Specified pix_fmt is not supported
Could you help me please ?
I'm not sure how you believe your code is relevant to your question; your question suggests you'd like to do a pixel format conversion from YUV to RGB, for which you could e.g. use ffmpeg's libswscale. However, your code is creating a MPEG-1/2 encoder object and tries to encode RGB input data into MPEG-1/2. This is not possible, ffmpeg's MPEG-1/2 encoders only support YUV420P. I'm not quite sure what to recommend other than to figure out whether you want to encode MPEG-1/2 video, in which case your input should be YUV420P, not RGBA, or whether you want to do pixel format conversion, in which case you should use libswscale...

How to fill audio AVFrame (ffmpeg) with the data obtained from CMSampleBufferRef (AVFoundation)?

I am writing program for streaming live audio and video from webcamera to rtmp-server. I work in MacOS X 10.8, so I use AVFoundation framework for obtaining audio and video frames from input devices. This frames come into delegate:
-(void) captureOutput:(AVCaptureOutput*)captureOutput didOutputSampleBuffer: (CMSampleBufferRef)sampleBuffer fromConnection:(AVCaptureConnection*)connection ,
where sampleBuffer contains audio or video data.
When I recieve audio data in the sampleBuffer, I'm trying to convert this data into AVFrame and encode AVFramewith libavcodec:
aframe = avcodec_alloc_frame(); //AVFrame *aframe;
int got_packet, ret;
CMItemCount numSamples = CMSampleBufferGetNumSamples(sampleBuffer); //CMSampleBufferRef
NSUInteger channelIndex = 0;
CMBlockBufferRef audioBlockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer);
size_t audioBlockBufferOffset = (channelIndex * numSamples * sizeof(SInt16));
size_t lengthAtOffset = 0;
size_t totalLength = 0;
SInt16 *samples = NULL;
CMBlockBufferGetDataPointer(audioBlockBuffer, audioBlockBufferOffset, &lengthAtOffset, &totalLength, (char **)(&samples));
const AudioStreamBasicDescription *audioDescription = CMAudioFormatDescriptionGetStreamBasicDescription(CMSampleBufferGetFormatDescription(sampleBuffer));
aframe->nb_samples =(int) numSamples;
aframe->channels=audioDescription->mChannelsPerFrame;
aframe->sample_rate=(int)audioDescription->mSampleRate;
//my webCamera configured to produce 16bit 16kHz LPCM mono, so sample format hardcoded here, and seems to be correct
avcodec_fill_audio_frame(aframe, aframe->channels, AV_SAMPLE_FMT_S16,
(uint8_t *)samples,
aframe->nb_samples *
av_get_bytes_per_sample(AV_SAMPLE_FMT_S16) *
aframe->channels, 0);
//encoding audio
ret = avcodec_encode_audio2(c, &pkt, aframe, &got_packet);
if (ret < 0) {
fprintf(stderr, "Error encoding audio frame: %s\n", av_err2str(ret));
exit(1);
}
The problem is that when I get so formed frames, I can hear the wanted sound, but it is slowing down and discontinuous (as if after each data frame comes the same frame of silence). It seems that something is wrong in the transformation from CMSampleBuffer to AVFrame , because the preview from the microphone created with AVFoundation from the same sample buffers played normally.
I would be grateful for your help.
UPD: Creating and initializing the AVCodceContext structure
audio_codec= avcodec_find_encoder(AV_CODEC_ID_AAC);
if (!(audio_codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(AV_CODEC_ID_AAC));
exit(1);
}
audio_st = avformat_new_stream(oc, audio_codec); //AVFormatContext *oc;
if (!audio_st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
audio_st->id=1;
audio_st->codec->sample_fmt= AV_SAMPLE_FMT_S16;
audio_st->codec->bit_rate = 64000;
audio_st->codec->sample_rate= 16000;
audio_st->codec->channels=1;
audio_st->codec->codec_type= AVMEDIA_TYPE_AUDIO;

Resources