I use FFMPEG to decode H264 stream. After I get decoded YUV420 frames I want to convert them into RGB24.
struct SwsContext * ctx = NULL;
// frame is AVFrame in YUV420 obtained from decoder. It has all three strides and seem to be valid.
if (ctx == NULL)
{
ctx = sws_getContext(frame->width, frame->height, frame->format, frame->width, frame->height,
AV_PIX_FMT_RGB24, SWS_BICUBIC, 0, 0, 0);
}
AVFrame* frame2 = av_frame_alloc();
int num_bytes = av_image_get_buffer_size(AV_PIX_FMT_RGB24, frame->width, frame->height, 32);
uint8_t* frame2_buffer = (uint8_t *)av_malloc(num_bytes * sizeof(uint8_t));
int size = av_image_fill_arrays(frame2->data, frame2->linesize, frame2_buffer, AV_PIX_FMT_RGB24, frame->width, frame->height, 32);
int height_of_output = sws_scale(ctx, frame->data, frame->linesize, 0, frame->height, frame2->data, frame2->linesize);
callbackFullRGB(state, frameIndex, 0, frame2->data[0], num_bytes, (__int32)frame2->format, (__int32)frame2->width, (__int32)frame2->height);
av_frame_free(&frame2);
However frame2 has no resolution set, pixel format is -1 and data buffer is empty. I have 1280x720 input, stride length is set to 3840 for output frame which is correct. sws_scale also returns 720 as a result - no errors, no exceptions.
What might be wrong?
Related
I have to encode a series of frames from CAIRO_FORMAT_ARGB32 to AV_PIX_FMT_YUV420P with sws_scale. From the ffmpeg docs I came to know the AV equivalent of the source format is AV_PIX_FMT_ARGB so here is my code:
// Set up conversion context
img->sws_ctx = sws_getCachedContext(
img->sws_ctx,
img->video_size[0],
img->video_size[1],
AV_PIX_FMT_ARGB,
img->video_size[0],
img->video_size[1],
AV_PIX_FMT_YUV420P,
SWS_BILINEAR,
NULL,
NULL,
NULL);
width = cairo_image_surface_get_width( surface );
height = cairo_image_surface_get_height( surface );
stride = cairo_image_surface_get_stride( surface );
pix = cairo_image_surface_get_data( surface );
const int in_linesize[1] = { stride };
sws_scale( img->sws_ctx, (const uint8_t * const *) &pix, in_linesize, 0,
img->video_size[1], img->video_frame->data, img->video_frame->linesize);
img->video_frame->pts++;
Sadly the video doesn't play and VLC shows a bunch of these useless messages:
[h264 # 0x7f6ce0cbc1c0] mmco: unref short failure
[h264 # 0x7f6ce0c39a80] co located POCs unavailable
[h264 # 0x7f6ce0c82800] co located POCs unavailable
[h264 # 0x7f6ce0c9f400] mmco: unref short failure
The encoding process runs just fine. I also tried with const int in_linesize[1] = { 3 * width }; Where am I wrong?
The following answer shows how to use sws_scale for converting ARGB to YUV420p.
You have to make some adaptations for integration the conversion in your code.
The code sample is "stand alone" sws_scale example, that doesn't use CAIRO.
Create BGRA input sample using FFmpeg (command line tool):
ffmpeg -y -f lavfi -i testsrc=size=192x108:rate=1 -vcodec rawvideo -pix_fmt argb -frames 1 -f rawvideo argb_image.bin
The following code sample applies the following stages:
Read ARGB (sample) input from binary file.
Allocate memory buffers from storing the YUV420p output.
Get SWS context.
Apply color conversion.
Write YUV420p output image to binary file (for testing).
Free allocated memory.
C++ code sample:
#include <stdio.h>
#include <string.h>
#include <stdint.h>
extern "C"
{
#include <libswscale/swscale.h>
#include <libavutil/imgutils.h>
}
int main()
{
//Use FFmpeg for building raw ARGB image (used as input).
//ffmpeg -y -f lavfi -i testsrc=size=192x108:rate=1 -vcodec rawvideo -pix_fmt argb -frames 1 -f rawvideo argb_image.bin
const int width = 192;
const int height = 108;
unsigned char* argb_in = new uint8_t[width * height * 4]; //Allocate 4 bytes per pixel (applies ARGB)
const enum AVPixelFormat out_pix_fmt = AV_PIX_FMT_YUV420P;
//Read input image for binary file (for testing)
////////////////////////////////////////////////////////////////////////////
FILE* f = fopen("argb_image.bin", "rb"); //For using fopen in Visual Studio, define: _CRT_SECURE_NO_WARNINGS (or use fopen_s).
fread(argb_in, 1, width * height * 4, f);
fclose(f);
////////////////////////////////////////////////////////////////////////////
//Allocate output buffers:
////////////////////////////////////////////////////////////////////////////
// YUV420p data is separated in three planes
// 1. Y - intensity plane, resolution: width x height
// 2. U - Color plane, resolution: width/2 x height/2
// 3. V - Color plane, resolution: width/2 x height/2
int out_linesize[4] = {0, 0, 0, 0};
uint8_t* out_planes[4] = { nullptr, nullptr, nullptr, nullptr };
int sts = av_image_alloc(out_planes, //uint8_t * pointers[4],
out_linesize, //int linesizes[4],
width, //int w,
height, //int h,
out_pix_fmt, //enum AVPixelFormat pix_fmt,
32); //int align); //Align to 32 bytes address may result faster execution time compared to 1 byte aligenment.
if (sts < 0)
{
printf("Error: av_image_alloc response = %d\n", sts);
return -1;
}
////////////////////////////////////////////////////////////////////////////
//Get SWS context
////////////////////////////////////////////////////////////////////////////
struct SwsContext* sws_context = nullptr;
sws_context = sws_getCachedContext(sws_context, //struct SwsContext *context,
width, //int srcW,
height, //int srcH,
AV_PIX_FMT_ARGB, //enum AVPixelFormat srcFormat,
width, //int dstW,
height, //int dstH,
out_pix_fmt, //enum AVPixelFormat dstFormat,
SWS_BILINEAR, //int flags,
nullptr, //SwsFilter *srcFilter,
nullptr, //SwsFilter *dstFilter,
nullptr); //const double *param);
if (sws_context == nullptr)
{
printf("Error: sws_getCachedContext returned nullptr\n");
return -1;
}
////////////////////////////////////////////////////////////////////////////
//Apply color conversion
////////////////////////////////////////////////////////////////////////////
const int in_linesize[1] = { 4 * width }; // ARGB stride (4 bytes per pixel - assume data is continuous).
const uint8_t* in_planes[1] = { argb_in };
int response = sws_scale(sws_context, //struct SwsContext *c,
in_planes, //const uint8_t *const srcSlice[],
in_linesize, //const int srcStride[],
0, //int srcSliceY,
height, //int srcSliceH,
out_planes, //uint8_t *const dst[],
out_linesize); //const int dstStride[]);
if (response < 0)
{
printf("Error: sws_scale response = %d\n", response);
return -1;
}
////////////////////////////////////////////////////////////////////////////
//Write YUV420p output image to binary file (for testing)
//You may execute FFmpeg after conversion for testing the output:
//ffmpeg -y -f rawvideo -s 192x108 -pixel_format yuv420p -i yuv420p_image.bin rgb.png
////////////////////////////////////////////////////////////////////////////
f = fopen("yuv420p_image.bin", "wb");
fwrite(out_planes[0], 1, width * height, f);
fwrite(out_planes[1], 1, width * height / 4, f);
fwrite(out_planes[2], 1, width * height / 4, f);
fclose(f);
////////////////////////////////////////////////////////////////////////////
//Free allocated memory
////////////////////////////////////////////////////////////////////////////
av_freep(out_planes);
sws_freeContext(sws_context);
delete[] argb_in;
////////////////////////////////////////////////////////////////////////////
return 0;
}
For testing the output, convert yuv420p_image.bin to PNG image using FFmpeg:
ffmpeg -y -f rawvideo -s 192x108 -pixel_format yuv420p -i yuv420p_image.bin rgb.png
rgb.png (result of FFmpeg conversion):
I'm making progress developing a '3d desktop' directx app that needs to display the current contents of a desktop window (e.g. "Calculator") as a 2D texture on a rectangular surface in directx (11). I'm sooo close but really struggling with the screenshot BMP -> Texture2D step. I do have screenshot->HBITMAP and DDSFile->rendered texture successfully working but can't complete the screenshot->rendered texture.
So far I have working the 'capture the window as a screenshot' bit:
RECT user_window_rectangle;
HWND user_window = FindWindow(NULL, TEXT("Calculator"));
GetClientRect(user_window, &user_window_rectangle);
HDC hdcScreen = GetDC(NULL);
HDC hdc = CreateCompatibleDC(hdcScreen);
UINT screenshot_width = user_window_rectangle.right - user_window_rectangle.left;
UINT screenshot_height = user_window_rectangle.bottom - user_window_rectangle.top;
hbmp = CreateCompatibleBitmap(hdcScreen, screenshot_width, screenshot_height);
SelectObject(hdc, hbmp);
PrintWindow(user_window, hdc, PW_CLIENTONLY);
At this point I have the window bitmap referenced by HBITMAP hbmp.
Also working is my code to render a DDS file as a texture on a directx/3d rectangle:
ID3D11Device *dev;
ID3D11DeviceContext *dev_context;
...
dev_context->PSSetShaderResources(0, 1, &shader_resource_view);
dev_context->PSSetSamplers(0, 1, &tex_sampler_state);
...
DirectX::TexMetadata tex_metadata;
DirectX::ScratchImage image;
hr = LoadFromDDSFile(L"Earth.dds", DirectX::DDS_FLAGS_NONE, &tex_metadata, image);
hr = CreateShaderResourceView(dev, image.GetImages(), image.GetImageCount(), tex_metadata, &shader_resource_view);
Pixel shader is:
Texture2D ObjTexture
SamplerState ObjSamplerState
float4 PShader(float4 pos : SV_POSITION, float4 color : COLOR, float2 tex : TEXCOORD) : SV_TARGET\
{
return ObjTexture.Sample( ObjSamplerState, tex );
}
The samplerstate (defaulting to linear) is:
D3D11_SAMPLER_DESC sampler_desc;
ZeroMemory(&sampler_desc, sizeof(sampler_desc));
sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_WRAP;
sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_WRAP;
sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_WRAP;
sampler_desc.MinLOD = 0;
sampler_desc.MaxLOD = D3D11_FLOAT32_MAX;
hr = dev->CreateSamplerState(&sampler_desc, &tex_sampler_state);
Question: how do I replace the LoadFromDDSFile bit with some equivalent that takes the HBITMAP from the windows screencapture and ends up with it on the graphics card as ObjTexture ?
Below is my best shot of bridging from the screenshot HBITMAP hbmp to the shader resource screenshot_texture, but it gives a memory access violation from the graphics driver (I think due to my "data.pSysmem = &bmp.bmBits", but no idea really):
GetObject(hbmp, sizeof(BITMAP), (LPSTR)&bmp)
D3D11_TEXTURE2D_DESC screenshot_desc = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_R8G8B8A8_UNORM, bmp.bmWidth, bmp.bmHeight, 1,
1,
D3D11_BIND_SHADER_RESOURCE
);
int bytes_per_pixel = 4;
D3D11_SUBRESOURCE_DATA data;
ZeroMemory(&data, sizeof(D3D11_SUBRESOURCE_DATA));
data.pSysMem = &bmp.bmBits; //pixel buffer
data.SysMemPitch = bytes_per_pixel * bmp.bmWidth;// line size in byte
data.SysMemSlicePitch = bytes_per_pixel * bmp.bmWidth * bmp.bmHeight;// total buffer size in byte
hr = dev->CreateTexture2D(
&screenshot_desc, //texture format
&data, // pixel buffer use to fill the texture
&screenshot_texture // created texture
);
:::::::::::::::::::::::::SOLUTION::::::::::::::::::::::::::::::::::::::::::
The main issue was trying to use &bmp.bmBits directly as a pixel buffer caused memory conflicts within the graphics driver - this was resolved by using 'malloc' to allocate an appropriately sized block of memory to store the pixel data. Thanks to Chuck Walbourn for helping with my poking around in the dark to work out how the pixel data is actually stored (it was actually 32 bits/pixel by default). It's still possible/likely some of code is relying on luck to read the pixel data correctly, but it's been improved with Chuck's input.
My basic technique was;
FindWindow to get the client window on the desktop
CreateCompatibleBitmap and SelectObject and PrintWindow to get a HBITMAP to the snapshot
malloc to allocate the correct amount of space for a (byte*)pixel buffer
GetDIBits to populate the (byte*)pixel buffer from the HBITMAP
CreateTexture2D to build the texture buffer
CreateShaderResourceView to map the texture to the graphics pixel shader
So working code to screenshot a windows desktop window and pass that as a texture to a direct3d app is:
RECT user_window_rectangle;
HWND user_window = FindWindow(NULL, TEXT("Calculator")); //the window can't be min
if (user_window == NULL)
{
MessageBoxA(NULL, "Can't find Calculator", "Camvas", MB_OK);
return;
}
GetClientRect(user_window, &user_window_rectangle);
//create
HDC hdcScreen = GetDC(NULL);
HDC hdc = CreateCompatibleDC(hdcScreen);
UINT screenshot_width = user_window_rectangle.right - user_window_rectangle.left;
UINT screenshot_height = user_window_rectangle.bottom - user_window_rectangle.top;
hbmp = CreateCompatibleBitmap(hdcScreen, screenshot_width, screenshot_height);
SelectObject(hdc, hbmp);
//Print to memory hdc
PrintWindow(user_window, hdc, PW_CLIENTONLY);
BITMAPINFOHEADER bmih;
ZeroMemory(&bmih, sizeof(BITMAPINFOHEADER));
bmih.biSize = sizeof(BITMAPINFOHEADER);
bmih.biPlanes = 1;
bmih.biBitCount = 32;
bmih.biWidth = screenshot_width;
bmih.biHeight = 0-screenshot_height;
bmih.biCompression = BI_RGB;
bmih.biSizeImage = 0;
int bytes_per_pixel = bmih.biBitCount / 8;
BYTE *pixels = (BYTE*)malloc(bytes_per_pixel * screenshot_width * screenshot_height);
BITMAPINFO bmi = { 0 };
bmi.bmiHeader = bmih;
int row_count = GetDIBits(hdc, hbmp, 0, screenshot_height, pixels, &bmi, DIB_RGB_COLORS);
D3D11_TEXTURE2D_DESC screenshot_desc = CD3D11_TEXTURE2D_DESC(
DXGI_FORMAT_B8G8R8A8_UNORM, // format
screenshot_width, // width
screenshot_height, // height
1, // arraySize
1, // mipLevels
D3D11_BIND_SHADER_RESOURCE, // bindFlags
D3D11_USAGE_DYNAMIC, // usage
D3D11_CPU_ACCESS_WRITE, // cpuaccessFlags
1, // sampleCount
0, // sampleQuality
0 // miscFlags
);
D3D11_SUBRESOURCE_DATA data;
ZeroMemory(&data, sizeof(D3D11_SUBRESOURCE_DATA));
data.pSysMem = pixels; // texArray; // &bmp.bmBits; //pixel buffer
data.SysMemPitch = bytes_per_pixel * screenshot_width;// line size in byte
data.SysMemSlicePitch = bytes_per_pixel * screenshot_width * screenshot_height;
hr = dev->CreateTexture2D(
&screenshot_desc, //texture format
&data, // pixel buffer use to fill the texture
&screenshot_texture // created texture
);
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc;
srvDesc.Format = screenshot_desc.Format;
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MostDetailedMip = 0;
srvDesc.Texture2D.MostDetailedMip = screenshot_desc.MipLevels;
dev->CreateShaderResourceView(screenshot_texture, NULL, &shader_resource_view);
You are making a lot of assumptions here that the BITMAP returned is actually in 32-bit RGBA form. It is likely not at all in that format, and in any case you need to validate the contents of bmPlanes to be 1 and bmBitsPixel to be 32 if you are assuming it is 4-bytes per pixel. You should read more about the BMP format.
BMPs uses BGRA order, so you can use DXGI_FORMAT_B8G8R8A8_UNORM for the case of bmBitsPixel being 32.
Secondly, you need to derive pitch from bmWidthBytes and not bmWidth.
data.pSysMem = &bmp.bmBits; //pixel buffer
data.SysMemPitch = bmp.bmWidthBytes;// line size in byte
data.SysMemSlicePitch = bmp.bmWidthBytes * bmp.bmHeight;// total buffer size in byte
If bmBitsPixel is 24, there is no DXGI format equivalent to that. You have to copy the data to a 32-bit format such as DXGI_FORMAT_B8G8R8X8_UNORM.
If bmBitsPixel is 15 or 16, you can use DXGI_FORMAT_B5G5R5A1_UNORM on a system with Direct3D 11.1, but remember that 16-bit DXGI formats are not always supported depending on the driver. Otherwise you'll have to convert this data to something else.
For bmBitsPixel values of 1, 2, 4, or 8 you have to convert them as there are no DXGI texture formats that are equivalent.
The main issue was trying to use &bmp.bmBits directly as a pixel buffer caused memory conflicts within the graphics driver - this was resolved by using 'malloc' to allocate an appropriately sized block of memory to store the pixel data. Thanks to Chuck Walbourn for helping with my poking around in the dark to work out how the pixel data is actually stored (it was actually 32 bits/pixel by default). It's still possible/likely some of code is relying on luck to read the pixel data correctly, but it's been improved with Chuck's input.
My basic technique was;
FindWindow to get the client window on the desktop
CreateCompatibleBitmap and SelectObject and PrintWindow to get a HBITMAP to the snapshot
malloc to allocate the correct amount of space for a (byte*)pixel buffer
GetDIBits to populate the (byte*)pixel buffer from the HBITMAP
CreateTexture2D to build the texture buffer
CreateShaderResourceView to map the texture to the graphics pixel shader
So working code to screenshot a windows desktop window and pass that as a texture to a direct3d app is:
RECT user_window_rectangle;
HWND user_window = FindWindow(NULL, TEXT("Calculator")); //the window can't be min
if (user_window == NULL)
{
MessageBoxA(NULL, "Can't find Calculator", "Camvas", MB_OK);
return;
}
GetClientRect(user_window, &user_window_rectangle);
//create
HDC hdcScreen = GetDC(NULL);
HDC hdc = CreateCompatibleDC(hdcScreen);
UINT screenshot_width = user_window_rectangle.right - user_window_rectangle.left;
UINT screenshot_height = user_window_rectangle.bottom - user_window_rectangle.top;
hbmp = CreateCompatibleBitmap(hdcScreen, screenshot_width, screenshot_height);
SelectObject(hdc, hbmp);
//Print to memory hdc
PrintWindow(user_window, hdc, PW_CLIENTONLY);
BITMAPINFOHEADER bmih;
ZeroMemory(&bmih, sizeof(BITMAPINFOHEADER));
bmih.biSize = sizeof(BITMAPINFOHEADER);
bmih.biPlanes = 1;
bmih.biBitCount = 32;
bmih.biWidth = screenshot_width;
bmih.biHeight = 0-screenshot_height;
bmih.biCompression = BI_RGB;
bmih.biSizeImage = 0;
int bytes_per_pixel = bmih.biBitCount / 8;
BYTE *pixels = (BYTE*)malloc(bytes_per_pixel * screenshot_width * screenshot_height);
BITMAPINFO bmi = { 0 };
bmi.bmiHeader = bmih;
int row_count = GetDIBits(hdc, hbmp, 0, screenshot_height, pixels, &bmi, DIB_RGB_COLORS);
D3D11_TEXTURE2D_DESC screenshot_desc = CD3D11_TEXTURE2D_DESC(
DXGI_FORMAT_B8G8R8A8_UNORM, // format
screenshot_width, // width
screenshot_height, // height
1, // arraySize
1, // mipLevels
D3D11_BIND_SHADER_RESOURCE, // bindFlags
D3D11_USAGE_DYNAMIC, // usage
D3D11_CPU_ACCESS_WRITE, // cpuaccessFlags
1, // sampleCount
0, // sampleQuality
0 // miscFlags
);
D3D11_SUBRESOURCE_DATA data;
ZeroMemory(&data, sizeof(D3D11_SUBRESOURCE_DATA));
data.pSysMem = pixels; // texArray; // &bmp.bmBits; //pixel buffer
data.SysMemPitch = bytes_per_pixel * screenshot_width;// line size in byte
data.SysMemSlicePitch = bytes_per_pixel * screenshot_width * screenshot_height;
hr = dev->CreateTexture2D(
&screenshot_desc, //texture format
&data, // pixel buffer use to fill the texture
&screenshot_texture // created texture
);
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc;
srvDesc.Format = screenshot_desc.Format;
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
srvDesc.Texture2D.MostDetailedMip = 0;
srvDesc.Texture2D.MostDetailedMip = screenshot_desc.MipLevels;
dev->CreateShaderResourceView(screenshot_texture, NULL, &shader_resource_view);
I am trying to convert RGB frames to YUV420P format in ffmpeg/libav. Following is the code for conversion and also the images before and after conversion. The converted image loses all color information and also the scale changes significantly. Does anybody have idea how to handle this? I am completely new to ffmpeg/libav!
// Did we get a video frame?
if(frameFinished)
{
i++;
sws_scale(img_convert_ctx, (const uint8_t * const *)pFrame->data,
pFrame->linesize, 0, pCodecCtx->height,
pFrameRGB->data, pFrameRGB->linesize);
//==============================================================
AVFrame *pFrameYUV = avcodec_alloc_frame();
// Determine required buffer size and allocate buffer
int numBytes2 = avpicture_get_size(PIX_FMT_RGB24, pCodecCtx->width,
pCodecCtx->height);
uint8_t *buffer = (uint8_t *)av_malloc(numBytes2*sizeof(uint8_t));
avpicture_fill((AVPicture *)pFrameYUV, buffer, PIX_FMT_RGB24,
pCodecCtx->width, pCodecCtx->height);
rgb_to_yuv_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,
PIX_FMT_RGB24,
pCodecCtx->width,pCodecCtx->height,
PIX_FMT_RGB24,
SWS_BICUBIC, NULL,NULL,NULL);
sws_scale(rgb_to_yuv_ctx, pFrameRGB->data, pFrameRGB->linesize, 0,
pCodecCtx->height, pFrameYUV->data, pFrameYUV->linesize);
sws_freeContext(rgb_to_yuv_ctx);
SaveFrame(pFrameYUV, pCodecCtx->width, pCodecCtx->height, i);
av_free(buffer);
av_free(pFrameYUV);
}
Well for starters I will assume where you have:
rgb_to_yuv_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,
PIX_FMT_RGB24,
pCodecCtx->width,pCodecCtx->height,
PIX_FMT_RGB24,
SWS_BICUBIC, NULL,NULL,NULL);
You really intended:
rgb_to_yuv_ctx = sws_getContext(pCodecCtx->width, pCodecCtx->height,
PIX_FMT_RGB24,
pCodecCtx->width,pCodecCtx->height,
PIX_FMT_YUV420P,
SWS_BICUBIC, NULL,NULL,NULL);
I'm also not sure why you are calling swscale twice!
YUV is a planar format. This means all three channels are stored independently. Whre RGB is stored like:
RGBRGBRGB
YUV420P is stores like:
YYYYYYYYYYYYYYYY..UUUUUUUUUU..VVVVVVVV
So swscale required you give it three pointers.
Next, You want your line stride to be a multiple of 16, or 32 so the vector units of the processor can be used. And finally the dimensions of the Y plane need to be divisible by two (because the U and V planes are a quarter size of the Y plane).
So, lets rewrite this:
#define RNDTO2(X) ( ( (X) & 0xFFFFFFFE )
#define RNDTO32(X) ( ( (X) % 32 ) ? ( ( (X) + 32 ) & 0xFFFFFFE0 ) : (X) )
if(frameFinished)
{
static SwsContext *swsCtx = NULL;
int width = RNDTO2 ( pCodecCtx->width );
int height = RNDTO2 ( pCodecCtx->height );
int ystride = RNDTO32 ( width );
int uvstride = RNDTO32 ( width / 2 );
int ysize = ystride * height;
int vusize = uvstride * ( height / 2 );
int size = ysize + ( 2 * vusize )
void * pFrameYUV = malloc( size );
void *plane[] = { pFrameYUV, pFrameYUV + ysize, pFrameYUV + ysize + vusize, 0 };
int *stride[] = { ystride, vustride, vustride, 0 };
swsCtx = sws_getCachedContext ( swsCtx, pCodecCtx->width, pCodecCtx->height,
pCodecCtx->pixfmt, width, height, AV_PIX_FMT_YUV420P,
SWS_LANCZOS | SWS_ACCURATE_RND , NULL, NULL, NULL );
sws_scale ( swsCtx, pFrameRGB->data, pFrameRGB->linesize, 0,
pFrameRGB->height, plane, stride );
}
I also switched your algorithm to use SWS_LANCZOS | SWS_ACCURATE_RND. This will give you better looking images. Change it back if it is to slow. I also used the pixel format from the source frame instead of assuming it RGB all the time.
I'm trying to dump a YUV420 data into the AVFrame structure of FFMPEG. From the below link:
http://ffmpeg.org/doxygen/trunk/structAVFrame.html, i can derive that i need to put my data into
data[AV_NUM_DATA_POINTERS]
using
linesize [AV_NUM_DATA_POINTERS].
The YUV data i'm trying to dump is YUV420 and the picture size is 416x240. So how do i dump/map this yuv data to AVFrame structures variable? Iknow that linesize represents the stride i.e. i suppose the width of my picture, I have tried with some combinations but do not get the output.I kindly request you to help me map the buffer. Thanks in advance.
AVFrame can be interpreted as an AVPicture to fill the data and linesize fields. The easiest way to fill these field is to the use the avpicture_fill function.
To fill in the AVFrame's Y U and V buffers, it depends on your input data and what you want to do with the frame (do you want to write into the AVFrame and erase the initial data? or keep a copy).
If the buffer is large enough (at least linesize[0] * height for Y data, linesize[1 or 2] * height/2 for U/V data), you can directly use input buffers:
// Initialize the AVFrame
AVFrame* frame = avcodec_alloc_frame();
frame->width = width;
frame->height = height;
frame->format = AV_PIX_FMT_YUV420P;
// Initialize frame->linesize
avpicture_fill((AVPicture*)frame, NULL, frame->format, frame->width, frame->height);
// Set frame->data pointers manually
frame->data[0] = inputBufferY;
frame->data[1] = inputBufferU;
frame->data[2] = inputBufferV;
// Or if your Y, U, V buffers are contiguous and have the correct size, simply use:
// avpicture_fill((AVPicture*)frame, inputBufferYUV, frame->format, frame->width, frame->height);
If you want/need to manipulate a copy of input data, you need to compute the needed buffer size, and copy input data in it.
// Initialize the AVFrame
AVFrame* frame = avcodec_alloc_frame();
frame->width = width;
frame->height = height;
frame->format = AV_PIX_FMT_YUV420P;
// Allocate a buffer large enough for all data
int size = avpicture_get_size(frame->format, frame->width, frame->height);
uint8_t* buffer = (uint8_t*)av_malloc(size);
// Initialize frame->linesize and frame->data pointers
avpicture_fill((AVPicture*)frame, buffer, frame->format, frame->width, frame->height);
// Copy data from the 3 input buffers
memcpy(frame->data[0], inputBufferY, frame->linesize[0] * frame->height);
memcpy(frame->data[1], inputBufferU, frame->linesize[1] * frame->height / 2);
memcpy(frame->data[2], inputBufferV, frame->linesize[2] * frame->height / 2);
Once you are done with the AVFrame, do not forget to free it with av_frame_free (and any buffer allocated by av_malloc).
FF_API int ff_get_format_plane_size(int fmt, int plane, int scanLine, int height)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
if (desc)
{
int h = height;
if (plane == 1 || plane == 2)
{
h = FF_CEIL_RSHIFT(height, desc->log2_chroma_h);
}
return h*scanLine;
}
else
return AVERROR(EINVAL);
}
How would you draw text on a IMFMediaBuffer object, and write it out to another IMFMediaBuffer object?
The context is that I'm building an MFT, and initially I tried using Direct2D and Direct3D11 to achieve this, but to no avail.
I was able to accomplish this with Windows GDI calls. The MFT I created has RGB32 input/output types, which allows me to copy to/from a Bitmap object. I copy the part of the frame I want to overlay with text into a Windows bitmap I create, draw the text, then copy back to the IMFMediaBuffer. Here's my code:
#define BREAK_ON_FAIL(val) { if ( FAILED(hr = (val)) ) { break; } }
HRESULT AddOverlay(IMFSample* pSample)
{
HRESULT hr = S_OK;
IMFMediaBuffer * pBuffer;
HDC hDC, hMemDC;
HBITMAP hBitmap, hOldBitmap;
do
{
BYTE * pBufferData;
UINT32 nWidth, nHeight;
BITMAPV5HEADER bi;
BYTE * pBitmapData;
UINT32 nXOffset, nYOffset;
RECT OverlayRect;
OverlayRect.left = 0;
OverlayRect.top = 0;
OverlayRect.right = 400;
OverlayRect.bottom = 32;
DWORD nOverlayWidth = OverlayRect.right - OverlayRect.left;
DWORD nOverlayHeight = OverlayRect.bottom - OverlayRect.top;
LONG lFrameStride;
// Get the frame dimensions and stride
MFGetAttributeSize(m_pInputType, MF_MT_FRAME_SIZE, &nWidth, &nHeight);
m_pInputType->GetUINT32(MF_MT_DEFAULT_STRIDE, (UINT32*)&lFrameStride))
// Setup offset for the overlay area into the video frame
nXOffset = (nWidth - nOverlayWidth) / 2;
nYOffset = nOverlayHeight-1;
// Set up the bitmap header
ZeroMemory(&bi, sizeof(BITMAPV5HEADER));
bi.bV5Size = sizeof(BITMAPV5HEADER);
bi.bV5Width = nOverlayWidth;
// If the stride is negative, the bitmap is bottom-up, which is designated by a negative height
bi.bV5Height = (lFrameStride > 0) ? nOverlayHeight : -(LONG)nOverlayHeight;
bi.bV5Planes = 1;
bi.bV5BitCount = 32;
bi.bV5Compression = BI_RGB;
// The following mask specification specifies a supported 32 BPP
// alpha format for Windows XP.
bi.bV5RedMask = 0x00FF0000;
bi.bV5GreenMask = 0x0000FF00;
bi.bV5BlueMask = 0x000000FF;
bi.bV5AlphaMask = 0xFF000000;
// Create a DIB section with an alpha channel, along with
// a memory device context
hDC = GetDC(NULL);
hBitmap = CreateDIBSection(hDC, (BITMAPINFO*)&bi, DIB_RGB_COLORS, (void**)&pBitmapData, NULL, 0);
hMemDC = CreateCompatibleDC(hDC);
ReleaseDC(NULL, hDC);
// Lock the media buffer for our use
BREAK_ON_FAIL( pSample->GetBufferByIndex(0, &pBuffer) );
BREAK_ON_FAIL( pBuffer->Lock(&pBufferData, NULL, NULL) );
// Copy the video frame to the bitmap (to support transparency)
MFCopyImage(pBitmapData, nOverlayWidth*sizeof(RGBQUAD),
pBufferData + nYOffset*abs(lFrameStride) + nXOffset*sizeof(RGBQUAD), lFrameStride,
nOverlayWidth*sizeof(RGBQUAD), nOverlayHeight);
// Draw on the bitmap
hOldBitmap = (HBITMAP)SelectObject(hMemDC, hBitmap);
//FillRect(hMemDC, &OverlayRect, WHITE_BRUSH);
SetTextColor(hMemDC, RGB(255,0,0));
SetBkMode(hMemDC, TRANSPARENT);
DrawText(hMemDC, _T("Hello World!"), 12, &OverlayRect, DT_CENTER);
SelectObject(hMemDC, hOldBitmap);
// Copy the bitmap to the buffer
MFCopyImage(pBufferData + nYOffset*abs(lFrameStride) + nXOffset*sizeof(RGBQUAD), lFrameStride,
pBitmapData, nOverlayWidth*sizeof(RGBQUAD),
nOverlayWidth*sizeof(RGBQUAD), nOverlayHeight);
BREAK_ON_FAIL( pBuffer->Unlock() );
} while(false);
DeleteDC(hMemDC);
DeleteObject(hBitmap);
SafeRelease(&pBuffer);
return hr;
}
References:
http://support.microsoft.com/kb/318876