I am attempting to write some code that connects the Windows::Graphics::Capture API to IMFSinkWriter in order to capture the desktop to an MP4 file. I find that the IMFSinkWriter WriteSample function always returns 0x80070057 and I'm trying to understand why. I suspect there is a somewhat obvious mistake as I am not extremely familiar with COM, WinRT, DirectX, etc. Any ideas?
#include <iostream>
#include <Windows.h>
// XXX workaround bug in platform headers where this has a circular declaration
#include "winrt/base.h"
namespace winrt::impl
{
template <typename Async>
auto wait_for(Async const& async, Windows::Foundation::TimeSpan const& timeout);
}
// XXX
#include <dxgi.h>
#include <inspectable.h>
#include <dxgi1_2.h>
#include <d3d11.h>
#include <mfapi.h>
#include <mfidl.h>
#include <mfreadwrite.h>
#include <codecapi.h>
#include <strmif.h>
#include <winrt/Windows.Foundation.h>
#include <winrt/Windows.System.h>
#include <winrt/Windows.Graphics.Capture.h>
#include <windows.graphics.capture.interop.h>
#include <windows.graphics.directx.direct3d11.interop.h>
#pragma comment(lib, "Mfuuid.lib")
#pragma comment(lib, "Mfplat.lib")
#pragma comment(lib, "mfreadwrite.lib")
#pragma comment(lib, "Mf.lib")
winrt::com_ptr<IMFSinkWriter> sinkWriter;
std::chrono::steady_clock::time_point firstFrameTime;
std::chrono::steady_clock::time_point lastFrameTime;
bool recordedFirstFrame = false;
void OnFrameArrived(winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool const& sender, winrt::Windows::Foundation::IInspectable const &) {
winrt::Windows::Graphics::Capture::Direct3D11CaptureFrame frame = sender.TryGetNextFrame();
std::chrono::steady_clock::time_point frameTime = std::chrono::steady_clock::now();
LONGLONG duration = 0;
LONGLONG frametime100ns;
if (!recordedFirstFrame) {
recordedFirstFrame = true;
firstFrameTime = frameTime;
frametime100ns = 0;
}
else {
frametime100ns = std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::steady_clock::now() - firstFrameTime).count() / 100;
duration = std::chrono::duration_cast<std::chrono::milliseconds>(frameTime - lastFrameTime).count();
}
auto surface = frame.Surface();
auto access = surface.as<Windows::Graphics::DirectX::Direct3D11::IDirect3DDxgiInterfaceAccess>();
winrt::com_ptr<ID3D11Texture2D> texture;
winrt::check_hresult(access->GetInterface(winrt::guid_of<ID3D11Texture2D>(), texture.put_void()));
IMFMediaBuffer* buffer;
MFCreateDXGISurfaceBuffer(__uuidof(ID3D11Texture2D), texture.get(), 0, FALSE, &buffer);
IMFSample *sample;
winrt::check_hresult(MFCreateSample(&sample));
HRESULT hr = sample->AddBuffer(buffer);
printf("add buffer! %x\n", hr);
hr = sample->SetSampleTime(frametime100ns);
printf("set sample time (%lld) %d\n", frametime100ns, hr);
hr = sample->SetSampleDuration(duration);
printf("set sample duration (%lld) %d\n", duration, hr);
hr = sinkWriter->WriteSample(0 /* video stream index */, sample);
printf("wrote sample %x\n", hr);
lastFrameTime = frameTime;
}
int main()
{
winrt::init_apartment(winrt::apartment_type::multi_threaded);
winrt::check_hresult(MFStartup(MF_VERSION, MFSTARTUP_NOSOCKET));
// get a list of monitor handles
std::vector<HMONITOR> monitors;
EnumDisplayMonitors(
nullptr, nullptr,
[](HMONITOR hmon, HDC, LPRECT, LPARAM lparam) {
auto& monitors = *reinterpret_cast<std::vector<HMONITOR>*>(lparam);
monitors.push_back(hmon);
return TRUE;
},
reinterpret_cast<LPARAM>(&monitors)
);
//get GraphicsCaptureItem for first monitor
auto interop_factory = winrt::get_activation_factory<winrt::Windows::Graphics::Capture::GraphicsCaptureItem, IGraphicsCaptureItemInterop>();
winrt::Windows::Graphics::Capture::GraphicsCaptureItem item = { nullptr };
winrt::check_hresult(
interop_factory->CreateForMonitor(
monitors[0],
winrt::guid_of<ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>(),
winrt::put_abi(item)
)
);
// Create Direct 3D Device
winrt::com_ptr<ID3D11Device> d3dDevice;
winrt::check_hresult(D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_BGRA_SUPPORT,
nullptr, 0, D3D11_SDK_VERSION, d3dDevice.put(), nullptr, nullptr));
winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice device;
const auto dxgiDevice = d3dDevice.as<IDXGIDevice>();
{
winrt::com_ptr<::IInspectable> inspectable;
winrt::check_hresult(CreateDirect3D11DeviceFromDXGIDevice(dxgiDevice.get(), inspectable.put()));
device = inspectable.as<winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice>();
}
auto idxgiDevice2 = dxgiDevice.as<IDXGIDevice2>();
winrt::com_ptr<IDXGIAdapter> adapter;
winrt::check_hresult(idxgiDevice2->GetParent(winrt::guid_of<IDXGIAdapter>(), adapter.put_void()));
winrt::com_ptr<IDXGIFactory2> factory;
winrt::check_hresult(adapter->GetParent(winrt::guid_of<IDXGIFactory2>(), factory.put_void()));
ID3D11DeviceContext* d3dContext = nullptr;
d3dDevice->GetImmediateContext(&d3dContext);
// setup swap chain
DXGI_SWAP_CHAIN_DESC1 desc = {};
desc.Width = static_cast<uint32_t>(item.Size().Width);
desc.Height = static_cast<uint32_t>(item.Size().Height);
desc.Format = static_cast<DXGI_FORMAT>(winrt::Windows::Graphics::DirectX::DirectXPixelFormat::R16G16B16A16Float);
desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.BufferCount = 2;
desc.Scaling = DXGI_SCALING_STRETCH;
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
desc.AlphaMode = DXGI_ALPHA_MODE_PREMULTIPLIED;
winrt::com_ptr<IDXGISwapChain1> swapchain;
winrt::check_hresult(factory->CreateSwapChainForComposition(d3dDevice.get(), &desc, nullptr, swapchain.put()));
auto framepool = winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool::CreateFreeThreaded(device, winrt::Windows::Graphics::DirectX::DirectXPixelFormat::R16G16B16A16Float, 2, item.Size());
auto session = framepool.CreateCaptureSession(item);
framepool.FrameArrived(OnFrameArrived);
//Setup MF output stream
winrt::com_ptr<IMFDXGIDeviceManager> devManager;
UINT resetToken;
winrt::check_hresult(MFCreateDXGIDeviceManager(&resetToken, devManager.put()));
winrt::check_hresult(devManager->ResetDevice(d3dDevice.get(), resetToken));
winrt::com_ptr<IMFByteStream> outputStream;
winrt::check_hresult(MFCreateFile(MF_ACCESSMODE_READWRITE, MF_OPENMODE_DELETE_IF_EXIST, MF_FILEFLAGS_NONE, L"C:\\test.mp4", outputStream.put()));
//configure MF output media type
winrt::com_ptr<IMFMediaType> videoMediaType;
//winrt::com_ptr<IMFMediaType> audioMediaType;
//for video
winrt::check_hresult(MFCreateMediaType(videoMediaType.put()));
winrt::check_hresult(videoMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
winrt::check_hresult(videoMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264));
winrt::check_hresult(videoMediaType->SetUINT32(MF_MT_AVG_BITRATE, 2000000));
winrt::check_hresult(videoMediaType->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive));
winrt::check_hresult(videoMediaType->SetUINT32(MF_MT_MPEG2_PROFILE, eAVEncH264VProfile_Main));
winrt::check_hresult(videoMediaType->SetUINT32(MF_MT_YUV_MATRIX, MFVideoTransferMatrix_BT601));
winrt::check_hresult(MFSetAttributeSize(videoMediaType.get(), MF_MT_FRAME_SIZE, item.Size().Width, item.Size().Height));
winrt::check_hresult(MFSetAttributeRatio(videoMediaType.get(), MF_MT_FRAME_RATE, 30, 1));
winrt::check_hresult(MFSetAttributeRatio(videoMediaType.get(), MF_MT_PIXEL_ASPECT_RATIO, 1, 1));
//Creates a streaming writer
winrt::com_ptr<IMFMediaSink> mp4StreamSink;
winrt::check_hresult(MFCreateMPEG4MediaSink(outputStream.get(), videoMediaType.get(), NULL, mp4StreamSink.put()));
//setup MF Input stream
winrt::com_ptr<IMFMediaType> inputVideoMediaType;
HRESULT hr = S_OK;
GUID majortype = { 0 };
MFRatio par = { 0 };
hr = videoMediaType->GetMajorType(&majortype);
if (majortype != MFMediaType_Video)
{
throw new winrt::hresult_invalid_argument();
}
// Create a new media type and copy over all of the items.
// This ensures that extended color information is retained.
winrt::check_hresult(MFCreateMediaType(inputVideoMediaType.put()));
winrt::check_hresult(videoMediaType->CopyAllItems(inputVideoMediaType.get()));
// Set the subtype.
winrt::check_hresult(inputVideoMediaType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_ARGB32));
// Uncompressed means all samples are independent.
winrt::check_hresult(inputVideoMediaType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE));
// Fix up PAR if not set on the original type.
hr = MFGetAttributeRatio(
inputVideoMediaType.get(),
MF_MT_PIXEL_ASPECT_RATIO,
(UINT32*)&par.Numerator,
(UINT32*)&par.Denominator
);
// Default to square pixels.
if (FAILED(hr))
{
winrt::check_hresult(MFSetAttributeRatio(
inputVideoMediaType.get(),
MF_MT_PIXEL_ASPECT_RATIO,
1, 1
));
}
winrt::check_hresult(MFSetAttributeSize(inputVideoMediaType.get(), MF_MT_FRAME_SIZE, item.Size().Width, item.Size().Height));
inputVideoMediaType->SetUINT32(MF_MT_VIDEO_ROTATION, MFVideoRotationFormat_0); //XXX where do we get the rotation from?
winrt::com_ptr<IMFAttributes> attributes;
winrt::check_hresult(MFCreateAttributes(attributes.put(), 6));
winrt::check_hresult(attributes->SetGUID(MF_TRANSCODE_CONTAINERTYPE, MFTranscodeContainerType_MPEG4));
winrt::check_hresult(attributes->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, 1));
winrt::check_hresult(attributes->SetUINT32(MF_MPEG4SINK_MOOV_BEFORE_MDAT, 1));
winrt::check_hresult(attributes->SetUINT32(MF_LOW_LATENCY, FALSE)); ///XXX should we?
winrt::check_hresult(attributes->SetUINT32(MF_SINK_WRITER_DISABLE_THROTTLING, FALSE)); //XX shuold we?
// Add device manager to attributes. This enables hardware encoding.
winrt::check_hresult(attributes->SetUnknown(MF_SINK_WRITER_D3D_MANAGER, devManager.get()));
//winrt::com_ptr<IMFSinkWriter> sinkWriter;
winrt::check_hresult(MFCreateSinkWriterFromMediaSink(mp4StreamSink.get(), attributes.get(), sinkWriter.put()));
sinkWriter->SetInputMediaType(0, inputVideoMediaType.get(), nullptr);
winrt::com_ptr<ICodecAPI> encoder;
sinkWriter->GetServiceForStream(0 /* video stream index */, GUID_NULL, IID_PPV_ARGS(encoder.put()));
VARIANT var;
VariantInit(&var);
var.vt = VT_UI4;
var.ulVal = eAVEncCommonRateControlMode_Quality;
winrt::check_hresult(encoder->SetValue(&CODECAPI_AVEncCommonRateControlMode, &var));
var.ulVal = 70;
winrt::check_hresult(encoder->SetValue(&CODECAPI_AVEncCommonQuality, &var));
winrt::check_hresult(sinkWriter->BeginWriting());
session.StartCapture();
std::cout << "Hello World!\n";
Sleep(1000);
session.Close();
sinkWriter->Flush(0);
sinkWriter->Finalize();
}
I was able to track down the problem. The above code had two issues:
Need to call SetCurrentLength() on the IMFMediaBuffer object. It seems silly since the way to get the length is to get the IMF2DBuffer interface from the IMFMediaBuffer object and call GetContiguousLength(), but it works.
Taking the texture straight from the OnFrameArrived() callback and passing it into the IMF sink is also wrong. This will exhaust the framepool (which is declared as having 2 frames) and hang the encoder. One possible solution is to copy the data out into a new texture before passing it to the encoder.
Related
In broad strokes, what I'm trying to accomplish is capture (part of) the screen and transform the capture into a digital image format. The following steps outline what I believe to be the solution:
Set up a Direct3D11CaptureFramePool and subscribe to its FrameArrived event
Gain access to the pixel data in the FrameArrived event delegate
Pass image data into the Windows Imaging Component to do the encoding
My issue is with step 2: While I can get the captured frame, gaining CPU read access to the surface fails. This my FrameArrived event delegate implementation (full repro below):
void on_frame_arrived(Direct3D11CaptureFramePool const& frame_pool, winrt::Windows::Foundation::IInspectable const&)
{
if (auto const frame = frame_pool.TryGetNextFrame())
{
if (auto const surface = frame.Surface())
{
if (auto const interop = surface.as<::Windows::Graphics::DirectX::Direct3D11::IDirect3DDxgiInterfaceAccess>())
{
com_ptr<IDXGISurface> dxgi_surface { nullptr };
check_hresult(interop->GetInterface(IID_PPV_ARGS(&dxgi_surface)));
DXGI_MAPPED_RECT info = {};
// Fails with `E_INVALIDARG`
check_hresult(dxgi_surface->Map(&info, DXGI_MAP_READ));
}
}
}
}
The Map() call is failing with E_INVALIDARG, and the debug layer offers additional, helpful error diagnostics:
DXGI ERROR: IDXGISurface::Map: This object was not created with CPUAccess flags that allow CPU access. [ MISCELLANEOUS ERROR #42: ]
So, now that I know what's wrong, how do I solve this? Specifically, how do I pull the pixel data out of a surface created with GPU access only?
Following is a full repro. It was originally created using the "Windows Console Application (C++/WinRT)" project template. The only change applied is "Precompiled Header: Use (/Yu)" → "Precompiled Header: Not Using Precompiled Headers", to keep this a single file.
It creates a command line application that expects a window handle as its only argument, in decimal, hex, or octal.
#include <winrt/Windows.Foundation.h>
#include <winrt/Windows.Graphics.Capture.h>
#include <winrt/Windows.Graphics.DirectX.Direct3D11.h>
#include <winrt/Windows.Graphics.DirectX.h>
#include <Windows.Graphics.Capture.Interop.h>
#include <windows.graphics.capture.h>
#include <windows.graphics.directx.direct3d11.interop.h>
#include <Windows.h>
#include <d3d11.h>
#include <dxgi.h>
#include <cstdint>
#include <stdio.h>
#include <string>
using namespace winrt;
using namespace winrt::Windows::Graphics::Capture;
using namespace winrt::Windows::Graphics::DirectX;
using namespace winrt::Windows::Graphics::DirectX::Direct3D11;
void on_frame_arrived(Direct3D11CaptureFramePool const& frame_pool, winrt::Windows::Foundation::IInspectable const&)
{
wprintf(L"Frame arrived.\n");
if (auto const frame = frame_pool.TryGetNextFrame())
{
if (auto const surface = frame.Surface())
{
if (auto const interop = surface.as<::Windows::Graphics::DirectX::Direct3D11::IDirect3DDxgiInterfaceAccess>())
{
com_ptr<IDXGISurface> dxgi_surface { nullptr };
check_hresult(interop->GetInterface(IID_PPV_ARGS(&dxgi_surface)));
DXGI_MAPPED_RECT info = {};
// This is failing with `E_INVALIDARG`
check_hresult(dxgi_surface->Map(&info, DXGI_MAP_READ));
}
}
}
}
int wmain(int argc, wchar_t const* argv[])
{
init_apartment(apartment_type::single_threaded);
// Validate input
if (argc != 2)
{
wprintf(L"Usage: %s <HWND>\n", argv[0]);
return 1;
}
auto const target = reinterpret_cast<HWND>(static_cast<intptr_t>(std::stoi(argv[1], nullptr, 0)));
// Get `GraphicsCaptureItem` for `HWND`
auto interop = get_activation_factory<GraphicsCaptureItem, IGraphicsCaptureItemInterop>();
::ABI::Windows::Graphics::Capture::IGraphicsCaptureItem* capture_item_abi { nullptr };
check_hresult(interop->CreateForWindow(target, IID_PPV_ARGS(&capture_item_abi)));
// Move raw pointer into smart pointer
GraphicsCaptureItem const capture_item { capture_item_abi, take_ownership_from_abi };
// Create D3D device and request the `IDXGIDevice` interface...
com_ptr<ID3D11Device> device = { nullptr };
check_hresult(::D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr,
D3D11_CREATE_DEVICE_BGRA_SUPPORT | D3D11_CREATE_DEVICE_DEBUG, nullptr, 0,
D3D11_SDK_VERSION, device.put(), nullptr, nullptr));
auto dxgi_device = device.as<IDXGIDevice>();
// ... so that we can get an `IDirect3DDevice` (the capture frame pool
// speaks WinRT only)
com_ptr<IInspectable> d3d_device_interop { nullptr };
check_hresult(::CreateDirect3D11DeviceFromDXGIDevice(dxgi_device.get(), d3d_device_interop.put()));
auto d3d_device = d3d_device_interop.as<IDirect3DDevice>();
// Create a capture frame pool and capture session
auto const pool = Direct3D11CaptureFramePool::Create(d3d_device, DirectXPixelFormat::B8G8R8A8UIntNormalized, 1,
capture_item.Size());
auto const session = pool.CreateCaptureSession(capture_item);
[[maybe_unused]] auto const event_guard = pool.FrameArrived(auto_revoke, &on_frame_arrived);
// Start capturing
session.StartCapture();
// Have the system spin up a message loop for us
::MessageBoxW(nullptr, L"Stop capturing", L"Capturing...", MB_OK);
}
You must create a 2D texture that can be accessed by the CPU and copy the source frame into this 2D texture, which you can then Map. For example:
void on_frame_arrived(Direct3D11CaptureFramePool const& frame_pool, winrt::Windows::Foundation::IInspectable const&)
{
wprintf(L"Frame arrived.\n");
if (auto const frame = frame_pool.TryGetNextFrame())
{
if (auto const surface = frame.Surface())
{
if (auto const interop = surface.as<::Windows::Graphics::DirectX::Direct3D11::IDirect3DDxgiInterfaceAccess>())
{
com_ptr<IDXGISurface> surface;
check_hresult(interop->GetInterface(IID_PPV_ARGS(&surface)));
// get surface dimensions
DXGI_SURFACE_DESC desc;
check_hresult(surface->GetDesc(&desc));
// create a CPU-readable texture
// note: for max perf, the texture creation
// should be done once per surface size
// or allocate a big enough texture (like adapter-sized) and copy portions
D3D11_TEXTURE2D_DESC texDesc{};
texDesc.Width = desc.Width;
texDesc.Height = desc.Height;
texDesc.ArraySize = 1;
texDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
texDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
texDesc.MipLevels = 1;
texDesc.SampleDesc.Count = 1;
texDesc.Usage = D3D11_USAGE_STAGING;
com_ptr<ID3D11Device> device;
check_hresult(surface->GetDevice(IID_PPV_ARGS(&device))); // or get the one from D3D11CreateDevice
com_ptr<ID3D11Texture2D> tex;
check_hresult(device->CreateTexture2D(&texDesc, nullptr, tex.put()));
com_ptr<ID3D11Resource> input;
check_hresult(interop->GetInterface(IID_PPV_ARGS(&input)));
com_ptr<ID3D11DeviceContext> dc;
device->GetImmediateContext(dc.put()); // or get the one from D3D11CreateDevice
// copy frame into CPU-readable resource
// this and the Map call can be done at each frame
dc->CopyResource(tex.get(), input.get());
D3D11_MAPPED_SUBRESOURCE map;
check_hresult(dc->Map(tex.get(), 0, D3D11_MAP_READ, 0, &map));
// TODO do something with map
dc->Unmap(tex.get(), 0);
}
}
}
}
I'm trying to take a screenshot of a particular window (HWND) on Windows using C++. The following code works on Notepad but not on another specific process. Instead, the code returns a completely different screenshot for the other process:
#include <Windows.h>
HBITMAP dump_client_window(const HWND window_handle)
{
RECT window_handle_rectangle;
GetClientRect(window_handle, &window_handle_rectangle);
const HDC hdc_screen = GetDC(nullptr);
const HDC hdc = CreateCompatibleDC(hdc_screen);
const auto cx = window_handle_rectangle.right - window_handle_rectangle.left;
const auto cy = window_handle_rectangle.bottom - window_handle_rectangle.top;
const HBITMAP bitmap = CreateCompatibleBitmap(hdc_screen, cx, cy);
SelectObject(hdc, bitmap);
const auto old_bitmap = SelectObject(hdc, bitmap);
PrintWindow(window_handle, hdc, PW_CLIENTONLY);
// Cleanup
SelectObject(hdc, old_bitmap);
DeleteDC(hdc);
ReleaseDC(nullptr, hdc_screen);
return bitmap;
}
What could be the reason for it? If I use DirectX11 for taking the screenshot of the window, it works correctly for both processes:
#include <dxgi.h>
#include <inspectable.h>
#include <dxgi1_2.h>
#include <d3d11.h>
#include <winrt/Windows.System.h>
#include <winrt/Windows.Graphics.Capture.h>
#include <Windows.Graphics.Capture.Interop.h>
#include <windows.graphics.directx.direct3d11.interop.h>
#include <roerrorapi.h>
#include <ShlObj_core.h>
#include <dwmapi.h>
#include <filesystem>
#include "ImageFormatConversion.hpp"
#pragma comment(lib, "Dwmapi.lib")
#pragma comment(lib, "windowsapp.lib")
void capture_window(HWND window_handle, const std::wstring& output_file_path)
{
// Init COM
init_apartment(winrt::apartment_type::multi_threaded);
// Create Direct 3D Device
winrt::com_ptr<ID3D11Device> d3d_device;
winrt::check_hresult(D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_BGRA_SUPPORT,
nullptr, 0, D3D11_SDK_VERSION, d3d_device.put(), nullptr, nullptr));
winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice device;
const auto dxgiDevice = d3d_device.as<IDXGIDevice>();
{
winrt::com_ptr<IInspectable> inspectable;
winrt::check_hresult(CreateDirect3D11DeviceFromDXGIDevice(dxgiDevice.get(), inspectable.put()));
device = inspectable.as<winrt::Windows::Graphics::DirectX::Direct3D11::IDirect3DDevice>();
}
auto idxgi_device2 = dxgiDevice.as<IDXGIDevice2>();
winrt::com_ptr<IDXGIAdapter> adapter;
winrt::check_hresult(idxgi_device2->GetParent(winrt::guid_of<IDXGIAdapter>(), adapter.put_void()));
winrt::com_ptr<IDXGIFactory2> factory;
winrt::check_hresult(adapter->GetParent(winrt::guid_of<IDXGIFactory2>(), factory.put_void()));
ID3D11DeviceContext* d3d_context = nullptr;
d3d_device->GetImmediateContext(&d3d_context);
RECT rect{};
DwmGetWindowAttribute(window_handle, DWMWA_EXTENDED_FRAME_BOUNDS, &rect, sizeof(RECT));
const auto size = winrt::Windows::Graphics::SizeInt32{ rect.right - rect.left, rect.bottom - rect.top };
winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool m_frame_pool =
winrt::Windows::Graphics::Capture::Direct3D11CaptureFramePool::Create(
device,
winrt::Windows::Graphics::DirectX::DirectXPixelFormat::B8G8R8A8UIntNormalized,
2,
size);
const auto activation_factory = winrt::get_activation_factory<
winrt::Windows::Graphics::Capture::GraphicsCaptureItem>();
auto interop_factory = activation_factory.as<IGraphicsCaptureItemInterop>();
winrt::Windows::Graphics::Capture::GraphicsCaptureItem capture_item = { nullptr };
interop_factory->CreateForWindow(window_handle, winrt::guid_of<ABI::Windows::Graphics::Capture::IGraphicsCaptureItem>(),
winrt::put_abi(capture_item));
auto is_frame_arrived = false;
winrt::com_ptr<ID3D11Texture2D> texture;
const auto session = m_frame_pool.CreateCaptureSession(capture_item);
m_frame_pool.FrameArrived([&](auto& frame_pool, auto&)
{
if (is_frame_arrived)
{
return;
}
auto frame = frame_pool.TryGetNextFrame();
struct __declspec(uuid("A9B3D012-3DF2-4EE3-B8D1-8695F457D3C1"))
IDirect3DDxgiInterfaceAccess : ::IUnknown
{
virtual HRESULT __stdcall GetInterface(GUID const& id, void** object) = 0;
};
auto access = frame.Surface().as<IDirect3DDxgiInterfaceAccess>();
access->GetInterface(winrt::guid_of<ID3D11Texture2D>(), texture.put_void());
is_frame_arrived = true;
return;
});
session.StartCapture();
// Message pump
MSG message;
while (!is_frame_arrived)
{
if (PeekMessage(&message, nullptr, 0, 0, PM_REMOVE) > 0)
{
DispatchMessage(&message);
}
}
session.Close();
D3D11_TEXTURE2D_DESC captured_texture_desc;
texture->GetDesc(&captured_texture_desc);
captured_texture_desc.Usage = D3D11_USAGE_STAGING;
captured_texture_desc.BindFlags = 0;
captured_texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
captured_texture_desc.MiscFlags = 0;
winrt::com_ptr<ID3D11Texture2D> user_texture = nullptr;
winrt::check_hresult(d3d_device->CreateTexture2D(&captured_texture_desc, nullptr, user_texture.put()));
d3d_context->CopyResource(user_texture.get(), texture.get());
D3D11_MAPPED_SUBRESOURCE resource;
winrt::check_hresult(d3d_context->Map(user_texture.get(), NULL, D3D11_MAP_READ, 0, &resource));
BITMAPINFO l_bmp_info;
// BMP 32 bpp
ZeroMemory(&l_bmp_info, sizeof(BITMAPINFO));
l_bmp_info.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
l_bmp_info.bmiHeader.biBitCount = 32;
l_bmp_info.bmiHeader.biCompression = BI_RGB;
l_bmp_info.bmiHeader.biWidth = captured_texture_desc.Width;
l_bmp_info.bmiHeader.biHeight = captured_texture_desc.Height;
l_bmp_info.bmiHeader.biPlanes = 1;
l_bmp_info.bmiHeader.biSizeImage = captured_texture_desc.Width * captured_texture_desc.Height * 4;
std::unique_ptr<BYTE> p_buf(new BYTE[l_bmp_info.bmiHeader.biSizeImage]);
UINT l_bmp_row_pitch = captured_texture_desc.Width * 4;
auto sptr = static_cast<BYTE*>(resource.pData);
auto dptr = p_buf.get() + l_bmp_info.bmiHeader.biSizeImage - l_bmp_row_pitch;
UINT l_row_pitch = std::min<UINT>(l_bmp_row_pitch, resource.RowPitch);
for (size_t h = 0; h < captured_texture_desc.Height; ++h)
{
memcpy_s(dptr, l_bmp_row_pitch, sptr, l_row_pitch);
sptr += resource.RowPitch;
dptr -= l_bmp_row_pitch;
}
// Save bitmap buffer into the file
WCHAR l_my_doc_path[MAX_PATH];
winrt::check_hresult(SHGetFolderPathW(nullptr, CSIDL_PERSONAL, nullptr, SHGFP_TYPE_CURRENT, l_my_doc_path));
FILE* lfile = nullptr;
if (auto lerr = _wfopen_s(&lfile, output_file_path.c_str(), L"wb"); lerr != 0)
{
return;
}
if (lfile != nullptr)
{
BITMAPFILEHEADER bmp_file_header;
bmp_file_header.bfReserved1 = 0;
bmp_file_header.bfReserved2 = 0;
bmp_file_header.bfSize = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER) + l_bmp_info.bmiHeader.biSizeImage;
bmp_file_header.bfType = 'MB';
bmp_file_header.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);
fwrite(&bmp_file_header, sizeof(BITMAPFILEHEADER), 1, lfile);
fwrite(&l_bmp_info.bmiHeader, sizeof(BITMAPINFOHEADER), 1, lfile);
fwrite(p_buf.get(), l_bmp_info.bmiHeader.biSizeImage, 1, lfile);
fclose(lfile);
convert_image_encoding(output_file_path, L"png");
}
}
Why is the DirectX11 code so complex/long and slow (about 800ms - 1s per call including cold start initialization)? Also, the latter version causes blinking borders around the captured window which I might want to get rid of. I also seem to have to take the more inefficient route of storing the BMP image to the disk and then loading it back in order to convert it to PNG and then storing it again to produce the final result on the disk which I like to have.
Any suggestions or help with any of these things are welcome, especially why the first screenshot capture code can yield unexpected images depending on the window being captured. Other than that, I like the first version for its speed, brevity and simplicity.
I'm writing a thumbnail handler for a custom file type. When I register it, it returns with a success message. The "thumbnail" is shown as completely blank, but it does not say "no thumbnail available". The problem is, the actual thumbnail handler never gets called. I know this, because I put a series of statements in my handler under handler::initialize, handler::queryinterface and handler::getthumbnail. I have gone through most of this documentation., but all I can find is that I need to use initialize and getthumbnail. Here is my code:
Header:
#pragma once
#include <windows.h>
#include <thumbcache.h> // For IThumbnailProvider
#include <wincodec.h> // Windows Imaging Codecs
#include <fstream>
#include <iostream>
#pragma comment(lib, "windowscodecs.lib")
class ThumbnailProvider :
public IInitializeWithStream,
public IThumbnailProvider
{
public:
// IUnknown
IFACEMETHODIMP QueryInterface(REFIID riid, void **ppv);
IFACEMETHODIMP_(ULONG) AddRef();
IFACEMETHODIMP_(ULONG) Release();
// IInitializeWithStream
IFACEMETHODIMP Initialize(IStream *pStream, DWORD grfMode);
// IThumbnailProvider
IFACEMETHODIMP GetThumbnail(UINT cx, HBITMAP *phbmp, WTS_ALPHATYPE *pdwAlpha);
ThumbnailProvider();
protected:
~ ThumbnailProvider();
private:
// Reference count of component.
long m_cRef;
// Provided during initialization.
IStream *m_pStream;
std::ofstream output;
void stripImageFrom (IStream *stream, HBITMAP *phbmp);
};
Body:
#include " ThumbnailProvider.h"
#include <Shlwapi.h>
#include <Wincrypt.h> // For CryptStringToBinary.
#include <msxml6.h>
#include <atlimage.h>
#include <fstream>
#pragma comment(lib, "Shlwapi.lib")
#pragma comment(lib, "Crypt32.lib")
#pragma comment(lib, "msxml6.lib")
extern HINSTANCE g_hInst;
extern long g_cDllRef;
ThumbnailProvider:: ThumbnailProvider() : m_cRef(1), m_pStream(NULL)
{
std::ofstream st;
st.open("C:\\Users\\labs\\Desktop\\Output\\out.txt", std::ios_base::app);
st << "Made provider";
st.close();
InterlockedIncrement(&g_cDllRef);
}
ThumbnailProvider::~ ThumbnailProvider()
{
InterlockedDecrement(&g_cDllRef);
}
#pragma region IUnknown
// Query to the interface the component supported.
IFACEMETHODIMP ThumbnailProvider::QueryInterface(REFIID riid, void **ppv)
{
std::ofstream st;
st.open("C:\\Users\\labs\\Desktop\\Output\\out.txt", std::ios_base::app);
st << "Querying interface";
st.close();
static const QITAB qit[] =
{
QITABENT( ThumbnailProvider, IThumbnailProvider),
QITABENT( ThumbnailProvider, IInitializeWithStream),
{ 0 },
};
return QISearch(this, qit, riid, ppv);
}
// Increase the reference count for an interface on an object.
IFACEMETHODIMP_(ULONG) ThumbnailProvider::AddRef()
{
return InterlockedIncrement(&m_cRef);
}
// Decrease the reference count for an interface on an object.
IFACEMETHODIMP_(ULONG) ThumbnailProvider::Release()
{
ULONG cRef = InterlockedDecrement(&m_cRef);
if (0 == cRef)
{
delete this;
}
return cRef;
}
#pragma endregion
#pragma region IInitializeWithStream
// Initializes the thumbnail handler with a stream.
IFACEMETHODIMP ThumbnailProvider::Initialize(IStream *pStream, DWORD grfMode)
{
std::ofstream st;
st.open("C:\\Users\\labs\\Desktop\\Output\\out.txt", std::ios_base::app);
st << "Got to initialization";
st.close();
// A handler instance should be initialized only once in its lifetime.
HRESULT hr = HRESULT_FROM_WIN32(ERROR_ALREADY_INITIALIZED);
if (m_pStream == NULL)
{
// Take a reference to the stream if it has not been initialized yet.
hr = pStream->QueryInterface(&m_pStream);
}
return hr;
}
#pragma endregion
#pragma region IThumbnailProvider
// Gets a thumbnail image and alpha type. The GetThumbnail is called with the
// largest desired size of the image, in pixels. Although the parameter is
// called cx, this is used as the maximum size of both the x and y dimensions.
// If the retrieved thumbnail is not square, then the longer axis is limited
// by cx and the aspect ratio of the original image respected. On exit,
// GetThumbnail provides a handle to the retrieved image. It also provides a
// value that indicates the color at of the image and whether it has
// valid alpha in ation.
IFACEMETHODIMP ThumbnailProvider::GetThumbnail(UINT cx, HBITMAP *phbmp,
WTS_ALPHATYPE *pdwAlpha) {
std::ofstream st;
st.open("C:\\Users\\labs\\Desktop\\Output\\out.txt", std::ios_base::app);
st << "Getting thumbnail";
st.close();
ThumbnailProvider::stripImageFrom (m_pStream, phbmp);
cx = 1024 * 1024;
*pdwAlpha = WTSAT_UNKNOWN;
return S_OK;
}
#pragma endregion
#pragma region Helper Functions
// The PNG signature is 137 80 78 71 13 10 26 10. This does not make sense to do backward. I will do it forward.
void ThumbnailProvider::stripImageFrom (IStream *stream, HBITMAP *phbmp) {
unsigned long numBytes = 0;
unsigned long *numBytesPtr = &numBytes;
char *chptr = nullptr;
byte vals[8] = { 0 };
STATSTG *stat = nullptr;
DWORD temp = NULL;
stream->Stat(stat, temp);
unsigned long long length = stat->cbSize.QuadPart;
unsigned long long i;
for (i = 0; i<length; i++) {
stream->Read(chptr, 1, numBytesPtr);
if (*chptr == 137) {
vals[0] = 1;
}
else if (*chptr == 80 && vals[0]) {
vals[1] = 1;
}
else if (*chptr == 78 && vals[1]) {
vals[2] = 1;
}
else if (*chptr == 71 && vals[2]) {
vals[3] = 1;
}
else if (*chptr == 13 && vals[3]) {
vals[4] = 1;
}
else if (*chptr == 10 && vals[4] && !vals[5]) {
vals[5] = 1;
}
else if (*chptr == 26 && vals[5]) {
vals[6] = 1;
}
else if (*chptr == 10 && vals[6]) {
vals[7] = 1;
i -= 7;
break;
}
else {
memset(vals, 0, 8 * sizeof(vals[0]));
}
}
if (vals[7]) {
IStream *imgstream = nullptr;
stream->Read(imgstream, length-i, numBytesPtr);
CImage *img = nullptr;
img->Load(imgstream);
*phbmp = *img;
}
}
#pragma endregion
Everything else is just edited from one of Microsoft's examples, so I am pretty sure that is ok. The example is here.
If you're having this problem, make sure you're building for the right architecture. That solved it for me.
I have been trying to make a program that takses the active window, and displays it in its window.
I have successfully exceeded my goal. But the problem is, it uses a lot of ram, and it keeps using more every frame update(20fps).
Here is the source code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <gdk/gdkx.h>
#include <gtk/gtk.h>
int funcfinished = 1;
GtkWidget *window;
GdkPixbuf *fupdate_pixbuf;
GtkStyle *fupdate_style;
GdkPixmap *fupdate_background;
gint fupdate_xorig;
gint fupdate_yorig;
gint fupdate_width;
gint fupdate_height;
GdkPixbuf *fupdate_screenshot;
GdkWindow *fupdate_window;
gboolean frameupdate()
{
if(funcfinished == 1)
{
/*********[FuncFinish]*********/
funcfinished = 0;
fupdate_pixbuf = NULL;
fupdate_style = NULL;
fupdate_background = NULL;
fupdate_screenshot = NULL;
fupdate_window = NULL;
fupdate_xorig = 0;
fupdate_yorig = 0;
fupdate_width = 0;
fupdate_height = 0;
/*********[Func]*********/
fupdate_window = gdk_screen_get_active_window(gdk_screen_get_default());
gdk_drawable_get_size(fupdate_window, &fupdate_width, &fupdate_height);
fupdate_pixbuf = gdk_pixbuf_get_from_drawable(NULL, fupdate_window, NULL, 0, 0, 0, 0, fupdate_width, fupdate_height);
gdk_pixbuf_render_pixmap_and_mask(fupdate_pixbuf, &fupdate_background, NULL, 0);
fupdate_style = gtk_style_new();
fupdate_style->bg_pixmap[0] = fupdate_background;
gtk_widget_set_style(GTK_WIDGET(window), GTK_STYLE(fupdate_style));
/*********[FuncFinish]*********/
fupdate_pixbuf = NULL;
fupdate_style = NULL;
fupdate_background = NULL;
fupdate_screenshot = NULL;
fupdate_window = NULL;
fupdate_xorig = 0;
fupdate_yorig = 0;
fupdate_width = 0;
fupdate_height = 0;
funcfinished = 1;
}
else
{
printf("Skipped 1 frame update");
}
return TRUE;
}
int main(int argc, char *argv[])
{
gtk_init(&argc, &argv);
window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
gtk_window_set_title(GTK_WINDOW(window), "Hay Day Autobot");
gtk_window_set_default_size(GTK_WINDOW(window), 400, 300);
g_signal_connect(window, "destroy", G_CALLBACK (gtk_main_quit), NULL);
g_timeout_add(50, frameupdate, 0);
gtk_widget_show(window);
gtk_main();
return 0;
}
I also made a video of it in action, showing off the problem:
https://www.youtube.com/watch?v=GNCwNetLLBM
You are not releasing the memory that you create during the frame update function. For each function that you use there, you should look it up in the documentation and see what it says under "return value".
For example, gdk_screen_get_active_window() lists its return value as "transfer full". That means that "full" ownership of the return value is "transferred" to you when you call that function; ownership means that you are responsible for freeing the memory. Usually the documentation will also say how to do that. In this case you can read
The returned window should be unrefed using g_object_unref() when no longer needed.
On the other hand, gdk_screen_get_default() is "transfer none", so you don't need to do anything there.
I am new to SAPI, and I would really appreciate if any of you can provide me a speech to text Hello World example in SAPI. I know MS got some examples like "Dictation" etc, but I would like to start with a very small one. Glad if you can help.
I played a bit with Windows Voice Recognition using SAPI, it really isn't user friendly. Here is an example of code I wrote (in C++) :
#include <sphelper.h>
#include <sapi.h>
#include <iostream>
#include <string>
const ULONGLONG grammarId = 0;
const wchar_t* ruleName1 = L"ruleName1";
int start_listening(const std::string& word);
ISpRecoGrammar* init_grammar(ISpRecoContext* recoContext, const std::string& command);
void get_text(ISpRecoContext* reco_context);
void check_result(const HRESULT& result);
int main(int argc, char** argv)
{
start_listening("Hello");
return EXIT_SUCCESS;
}
// This function exits when the word passed as parameter is said by the user
int start_listening(const std::string& word)
{
// Initialize COM library
if (FAILED(::CoInitialize(nullptr))) {
return EXIT_FAILURE;
}
std::cout << "You should start Windows Recognition" << std::endl;
std::cout << "Just say \""<< word << "\"" << std::endl;
HRESULT hr;
ISpRecognizer* recognizer;
hr = CoCreateInstance(CLSID_SpSharedRecognizer,
nullptr, CLSCTX_ALL, IID_ISpRecognizer,
reinterpret_cast<void**>(&recognizer));
check_result(hr);
ISpRecoContext* recoContext;
hr = recognizer->CreateRecoContext(&recoContext);
check_result(hr);
// Disable context
hr = recoContext->Pause(0);
check_result(hr);
ISpRecoGrammar* recoGrammar = init_grammar(recoContext, word);
hr = recoContext->SetNotifyWin32Event();
check_result(hr);
HANDLE handleEvent;
handleEvent = recoContext->GetNotifyEventHandle();
if(handleEvent == INVALID_HANDLE_VALUE) {
check_result(E_FAIL);
}
ULONGLONG interest;
interest = SPFEI(SPEI_RECOGNITION);
hr = recoContext->SetInterest(interest, interest);
check_result(hr);
// Activate Grammar
hr = recoGrammar->SetRuleState(ruleName1, 0, SPRS_ACTIVE);
check_result(hr);
// Enable context
hr = recoContext->Resume(0);
check_result(hr);
// Wait for reco
HANDLE handles[1];
handles[0] = handleEvent;
WaitForMultipleObjects(1, handles, FALSE, INFINITE);
get_text(recoContext);
std::cout << "Hello user" << std::endl;
recoGrammar->Release();
::CoUninitialize();
system("PAUSE");
return EXIT_SUCCESS;
}
/**
* Create and initialize the Grammar.
* Create a rule for the grammar.
* Add word to the grammar.
*/
ISpRecoGrammar* init_grammar(ISpRecoContext* recoContext, const std::string& command)
{
HRESULT hr;
SPSTATEHANDLE sate;
ISpRecoGrammar* recoGrammar;
hr = recoContext->CreateGrammar(grammarId, &recoGrammar);
check_result(hr);
WORD langId = MAKELANGID(LANG_FRENCH, SUBLANG_FRENCH);
hr = recoGrammar->ResetGrammar(langId);
check_result(hr);
// TODO: Catch error and use default langId => GetUserDefaultUILanguage()
// Create rules
hr = recoGrammar->GetRule(ruleName1, 0, SPRAF_TopLevel | SPRAF_Active, true, &sate);
check_result(hr);
// Add a word
const std::wstring commandWstr = std::wstring(command.begin(), command.end());
hr = recoGrammar->AddWordTransition(sate, NULL, commandWstr.c_str(), L" ", SPWT_LEXICAL, 1, nullptr);
check_result(hr);
// Commit changes
hr = recoGrammar->Commit(0);
check_result(hr);
return recoGrammar;
}
void get_text(ISpRecoContext* reco_context)
{
const ULONG maxEvents = 10;
SPEVENT events[maxEvents];
ULONG eventCount;
HRESULT hr;
hr = reco_context->GetEvents(maxEvents, events, &eventCount);
// Warning hr equal S_FALSE if everything is OK
// but eventCount < requestedEventCount
if(!(hr == S_OK || hr == S_FALSE)) {
check_result(hr);
}
ISpRecoResult* recoResult;
recoResult = reinterpret_cast<ISpRecoResult*>(events[0].lParam);
wchar_t* text;
hr = recoResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, FALSE, &text, NULL);
check_result(hr);
CoTaskMemFree(text);
}
void check_result(const HRESULT& result)
{
if (result == S_OK) {
return;
}
std::string message;
switch(result) {
case E_INVALIDARG:
message = "One or more arguments are invalids.";
case E_ACCESSDENIED:
message = "Acces Denied.";
case E_NOINTERFACE:
message = "Interface does not exist.";
case E_NOTIMPL:
message = "Not implemented method.";
case E_OUTOFMEMORY:
message = "Out of memory.";
case E_POINTER:
message = "Invalid pointer.";
case E_UNEXPECTED:
message = "Unexpecter error.";
case E_FAIL:
message = "Failure";
default:
message = "Unknown : " + std::to_string(result);
}
throw std::exception(message.c_str());
}
As I said, it's a bit complicated. I think you should wrap all that code into a library to make it easier to use.