How to use SCardGetStatusChange correctly on Windows 8? - windows

The smart card service behaves differently on Windows 8 and MSDN hasn't updated their documentation. Can anyone give a code snippet on how to call SCardGetStatusChange correctly to monitor smart card actions on Windows 8? Thanks in advance!

Here is a C++ template function that I wrote for a personal blog project. It uses a library I am developing that is up on github, but you can also just rework the logic into your own context.
template<typename SetContext, typename ClearContext, typename Wait, typename Report>
unique_winerror monitor_smartcard_readers(
SetContext&& setContext,
ClearContext&& clearContext,
Wait&& wait,
Report&& report
)
{
unique_winerror winerror;
std::vector<wchar_t> readernames;
std::vector<SCARD_READERSTATE> readers;
while (winerror)
{
//
// make sure that the scard service has started
// and that the loop has not been cancelled
//
if (!std::forward<Wait>(wait)())
{
return winerror_cast(SCARD_E_CANCELLED);
}
monitor_error_contract(
[&] ()
{
unique_close_scardcontext context;
ON_UNWIND_AUTO(
[&]
{
std::forward<ClearContext>(clearContext)();
}
);
//
// need a fresh context whenever we start over.
// lots of sytem changes could have caused this
// restart including the scard service stopping
// and then restarting.
//
winerror.reset(
SCardEstablishContext(
SCARD_SCOPE_USER,
NULL,
NULL,
context.replace()
)
);
if (!winerror || !context)
{
return;
}
std::forward<SetContext>(setContext)(context.get());
//
// make sure that loop has not been cancelled.
// without this there is a race where the new
// context is not cancelled because the caller
// cancelled at a time when there was no
// context yet.
//
if (!std::forward<Wait>(wait)())
{
winerror = winerror_cast(SCARD_E_CANCELLED);
return;
}
if (readers.empty())
{
//
// add PnP state query
// setting the state to unaware causes SCardGetStatusChange
// to return immediately with the actual pnp state.
//
readers.push_back(make(L"\\\\?PnP?\\Notification"));
}
for(;;)
{
auto readersstaterange = lib::rng::make_range_raw(readers);
winerror.reset(
SCardGetStatusChange(
context.get(),
INFINITE,
readersstaterange.begin(),
lib::rng::size_cast<DWORD>(readersstaterange.size())
)
);
if (!winerror)
{
// exit
return;
}
//
// report changes
//
auto readersrange = lib::rng::make_range_raw(readers, 0, -1);
if (!readersrange.empty())
{
std::forward<Report>(report)(readersrange);
}
//
// record the changes we have reported
//
for (auto& state : readers)
{
state.dwCurrentState = state.dwEventState;
}
if ((readers.back().dwEventState & SCARD_STATE_CHANGED) == SCARD_STATE_CHANGED)
{
// Pnp event - list readers.
break;
}
}
// keep the old allocations for use to build the new list.
std::vector<wchar_t> oldreadernames(std::move(readernames));
std::vector<SCARD_READERSTATE> oldreaders(std::move(readers));
// exclude the pnp reader
auto oldreaderssortedrange = lib::rng::make_range(oldreaders, 0, -1);
LPWSTR concatreaderstrings = nullptr;
ON_UNWIND_AUTO(
[&] { if (concatreaderstrings) {SCardFreeMemory(context.get(), concatreaderstrings);};}
);
DWORD totallength = SCARD_AUTOALLOCATE;
winerror.reset(
SCardListReaders(
context.get(),
nullptr,
reinterpret_cast<LPWSTR>(&concatreaderstrings),
&totallength
)
);
if (winerror == winerror_cast(SCARD_E_NO_READERS_AVAILABLE))
{
// no readers is not an error, loop around to wait
// for a reader to be connected
winerror.suppress().release();
return;
}
else if (!winerror)
{
return;
}
// keep the names around because the state array will have pointers into this
readernames.assign(concatreaderstrings, concatreaderstrings + totallength);
auto readerstateless = [](const SCARD_READERSTATE& lhs, const SCARD_READERSTATE& rhs) -> bool
{
return _wcsicmp(lhs.szReader, rhs.szReader) < 0;
};
//
// all the reader names are concatenated in this array with
// embedded nulls for each and two nulls to mark the end
//
auto cursorreadernames = lib::rng::make_range_raw(readernames);
while(!cursorreadernames.empty() && cursorreadernames.front() != L'\0')
{
// access the current name
auto namerange = lib::rng::make_range(
cursorreadernames,
0,
wcslen(cursorreadernames.begin()) - cursorreadernames.size()
);
// skip to the next name
cursorreadernames = lib::rng::make_range(namerange, namerange.size() + 1, 0);
auto oldreader = std::equal_range(
oldreaderssortedrange.begin(),
oldreaderssortedrange.end(),
make(namerange.begin()),
readerstateless
);
if (oldreader.first != oldreader.second)
{
// keep the old state for this reader
readers.push_back(*oldreader.first);
// must use the new string allocation,
// the old one will be gone soon
readers.back().szReader = namerange.begin();
}
else
{
readers.push_back(make(namerange.begin()));
}
}
// keeping them sorted makes the updates more stable and allows the
// equal_range above instead of a linear find.
std::sort(readers.begin(), readers.end(), readerstateless);
//
// add PnP state query
// keep the existing state, and keep it at the
// end, out of the sorted area.
//
readers.push_back(oldreaders.back());
}
);
}
return winerror;
}
usage looks like this:
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers
#define NOMINMAX
// Windows Header Files:
#include <windows.h>
#include <Unknwn.h>
#include <winscard.h>
#include <ncrypt.h>
#include <Wincrypt.h>
#include <credentialprovider.h>
// TODO: reference additional headers your program requires here
#include <type_traits>
#include <algorithm>
#include <new>
#include <memory>
#include <utility>
#include <limits>
#include <iterator>
#include <thread>
#include <future>
#include <mutex>
#include <vector>
#include <iostream>
#include <iomanip>
int wmain(int argc, WCHAR* argv[])
{
unique_winerror winerror;
for (;;)
{
SCARDCONTEXT context = NULL;
// if you monitor in a separate thread, then add a cancel or shutdown event
// into the waitfor array and handle it in the Wait lambda
HANDLE waitfor[] = {SCardAccessStartedEvent()};
ON_UNWIND_AUTO([] {SCardReleaseStartedEvent();});
winerror = smart_card::monitor_smartcard_readers(
[&](SCARDCONTEXT context)
{
context = context;
},
[&]()
{
context = NULL;
},
[&]() -> bool
{
if (WAIT_OBJECT_0 != WaitForMultipleObjects(lib::rng::size(waitfor), waitfor, FALSE, INFINITE))
{
// monitor_smardcard_readers will return SCARD_E_CANCELLED
return false;
}
return true;
},
[&](lib::rng::range<SCARD_READERSTATE*> readersrange)
{
for (auto& state : readersrange)
{
auto stateChanges = (state.dwCurrentState ^ state.dwEventState) & std::numeric_limits<unsigned short>::max();
std::wcout
<< L"nothread - "
<< state.szReader
<< L" changes: " << std::hex << std::showbase << stateChanges
<< L"["
;
printSCardState(std::wcout, stateChanges)
<< L"] state: " << std::hex << std::showbase << state.dwEventState
<< L"["
;
printSCardState(std::wcout, state.dwEventState)
<< L"]"
<< std::endl
;
if (state.dwCurrentState != SCARD_STATE_UNAWARE &&
((state.dwEventState & SCARD_STATE_PRESENT) != SCARD_STATE_PRESENT ||
stateChanges == SCARD_STATE_INUSE ||
stateChanges == SCARD_STATE_UNPOWERED ||
(state.dwEventState & (SCARD_STATE_UNPOWERED | SCARD_STATE_EMPTY | SCARD_STATE_IGNORE | SCARD_STATE_UNKNOWN | SCARD_STATE_UNAVAILABLE | SCARD_STATE_MUTE)) ||
state.cbAtr == 0))
{
// we have seen this reader before and one of:
// no card
// only flipped INUSE
// only flipped UNPOWERED
// UNPOWERED EMPTY UNKNOWN UNAVAILABLE MUTE
// no atr
//
// don't try to read the card
continue;
}
// read the card in the reader and list the certs on the card
}
}
);
winerror.suppress();
}
return 0;
}

I know I'm over 2 years late, but maybe my reply can help someone nonetheless.
I have some simple code as a starting base for further development. I first created it on Windows 7; AFAICS it works fine on Windows 8 too. This uses only a single reader, but a previous iteration used a list of readers and it worked just as well. The relevant parts are as follows.
Initialization of the reader state structure:
memset(&m_State, 0, sizeof(m_State));
m_State.szReader = _wcsdup(m_ReaderName.c_str());
m_State.dwCurrentState = SCARD_STATE_UNAWARE;
Waiting for events:
bool TSmartCardReader::WaitForEvent(DWORD Timeout, TCardEvent &CardEvent)
{
CardEvent = None;
// Reset reader structure, except the specific fields we need
// (because that's what the docs say: "Important: Each member of each structure
// in this array must be initialized to zero and then set to specific values as
// necessary. If this is not done, the function will fail in situations that
// involve remote card readers.")
const wchar_t *szReader = m_State.szReader;
DWORD dwCurrentState = m_State.dwCurrentState;
memset(&m_State, 0, sizeof(m_State));
m_State.szReader = szReader;
m_State.dwCurrentState = dwCurrentState;
LONG rv = SCardGetStatusChangeW(m_hContext, Timeout, &m_State, 1);
if (rv == SCARD_S_SUCCESS)
{
HandleStatusChange(CardEvent);
// I'm not sure we really need to reset the SCARD_STATE_CHANGED bit
m_State.dwCurrentState = m_State.dwEventState & ~SCARD_STATE_CHANGED;
}
else if (rv == SCARD_E_TIMEOUT)
return false; // No status changes
else if (rv == SCARD_E_NO_READERS_AVAILABLE)
throw ESCNoReaders("No readers available");
else
throw ESCWaitForEvent(GetErrorText(rv));
return CardEvent != None;
}
As far as I understand the documentation, the key thing is that you set set dwCurrentState to what you believe is the current state of the reader. SCardGetStatusChange() takes that current state into account to decide what constitutes a state change.

Related

How to use IcmpSendEcho2() with APC callbacks in main thread?

First, the documentation for IcmpSendEcho2() contradicts itself:
It says:
The IcmpSendEcho2 function is called synchronously if the ApcRoutine or Event parameters are NULL
Then it says:
The IcmpSendEcho2 function is called asynchronously when either the ApcRoutine or Event parameters are specified
I presume the first one should be "if the ApcRoutine AND Event paramters are NULL"?
Also, it says under the return value:
When called asynchronously, the IcmpSendEcho2 function returns ERROR_IO_PENDING to indicate the operation is in progress
But I don't see that, I see it return 0 and GetLastError() returns ERROR_IO_PENDING. So, can both cases exist, or is the documentation completely wrong?
Now on to the next issue. I wanted to use IcmpSendEcho2() asynchronously using the ACP callback without events. This way, I didn't have to worry about resources should the number of hosts to process be extremely large. However, it doesn't work because no callback occurs. I found this in the documentation under the AcpRoutine parameter:
The routine that is called when the calling thread is in an alertable thread and an ICMPv4 reply arrives.
So I believe my problem is the main thread is not in an alterable state. Since I don't have an event to wait on, and I don't want to wait beyond the time it takes to complete everything, how do I put the main thread in an alterable state without having to guess using something like SleepEx()? Also, if I did use something like SleepEx(10,TRUE), would all the callbacks occur, or do you have to sit in a loop?
My callback context structure includes a shared global OutstandingCount type variable so I'd know when all requests were completed.
Also the ReplyBuffer is in the context structure. Another little nugget hidden in the documentation regarding the ReplyBuffer when using it asynchronously is:
The application must parse the data pointed to by ReplyBuffer parameter using the IcmpParseReplies function
So, the main question here: How are you supposed to properly use the IcmpSendEcho2() function with a AcpRoutine and no Event in a main thread?
-- Update --
Not sure if I should ask an entirely new question but now a problem where it doesn't call the ApcRoutine for every IcmpSendEcho2Ex() sent. The following code works for my normal network adapters (which are 255.255.255.0) but hangs for a 255.255.0.0 network because the outstandingcount never gets to zero.
The adapter it hangs on is:
VirtualBox Host-Only Ethernet Adapter
DHCP Enable: Yes
Autoconfiguration Enabled: Yes
Autoconfiguration IPv4Address: 169.254.21.120
Subnet Mask: 255.255.0.0
Also wonder how long it would take on networks like 10. with a subnet of 255.0.0.0.
Here's the code that starts with the IPV4Scan() built as x64 on Win10 x64:
#define PIO_APC_ROUTINE_DEFINED
#include <winternl.h>
#include <iphlpapi.h>
#include <IcmpAPI.h>
//--------------
// types
//--------------
typedef DWORD (WINAPI *LPFN_IcmpSendEcho2)(HANDLE, HANDLE , PIO_APC_ROUTINE, PVOID, IPAddr, LPVOID, WORD, PIP_OPTION_INFORMATION, LPVOID, DWORD, DWORD);
typedef DWORD (WINAPI *LPFN_IcmpSendEcho2Ex)(HANDLE, HANDLE , PIO_APC_ROUTINE, PVOID, IPAddr, IPAddr, LPVOID, WORD, PIP_OPTION_INFORMATION, LPVOID, DWORD, DWORD);
typedef HANDLE (WINAPI *LPFN_IcmpCreateFile)();
typedef BOOL (WINAPI *LPFN_IcmpCloseHandle)(HANDLE);
typedef DWORD (WINAPI *LPFN_IcmpParseReplies)(LPVOID, DWORD);
BYTE PingSignature[]={ 'X', 'Y', 'Z', '1', '2', '3', '4', '5', '6', '7', '8' };
typedef struct _sPingContext
{
ULONG *OutstandingCount; // shared number of pings outstanding
CMutex *Mutex; // mutex for ipsfound
CNumericBuffer<uint32_t> *IPsFound; // list of ips found (MSBF format)
LPFN_IcmpParseReplies fnIcmpParseReplies; // function pointer
BYTE ReplyBuffer[sizeof(ICMP_ECHO_REPLY) + sizeof(PingSignature) + sizeof(IO_STATUS_BLOCK) + 8]; // reply buffer (see API docs)
_sPingContext(ULONG *outstandingcount, CMutex *mutex, CNumericBuffer<uint32_t> *ipsfound, LPFN_IcmpParseReplies fnicmpparsereplies)
{
OutstandingCount=outstandingcount;
Mutex=mutex;
IPsFound=ipsfound;
fnIcmpParseReplies=fnicmpparsereplies;
memset(ReplyBuffer, 0, sizeof(ReplyBuffer));
};
} sPingContext, *psPingContext;
//-------------------------------------------------------------------------
// Purpose: Callback for async ping
//
// Input: ioresult - [i] io result of async operation
// pingccontext - [i] context passed on ping
// replysize - [i] reply size of ReplyBuffer
//
// Output: na
//
// Notes:
//
VOID PingCallbackCommon(DWORD ioresult, sPingContext* pingcontext, DWORD replysize)
{
// parse response buffer
if (pingcontext) {
if (ioresult==IP_SUCCESS) {
if (pingcontext->fnIcmpParseReplies(pingcontext->ReplyBuffer, replysize)) {
// point to reply buffer
PICMP_ECHO_REPLY pechoreply=reinterpret_cast<PICMP_ECHO_REPLY>(pingcontext->ReplyBuffer);
if (pechoreply->Status==IP_SUCCESS) {
// check response
if (pechoreply->DataSize==sizeof(PingSignature)) {
if (memcmp(pechoreply->Data, PingSignature, pechoreply->DataSize)==0) {
// successful ping
pingcontext->Mutex->Lock();
pingcontext->IPsFound->AddItem(pechoreply->Address);
pingcontext->Mutex->Unlock();
}
}
}
}
}
// reduce count
InterlockedDecrement(pingcontext->OutstandingCount);
// clean up
delete pingcontext;
}
}
//-------------------------------------------------------------------------
// Purpose: Callback for async ping
//
// Input: apccontext - [i] context passed on ping
//
// Output: na
//
// Notes:
//
VOID PingCallbackOld(PVOID apcontext)
{
sPingContext *pingcontext=reinterpret_cast<sPingContext*>(apcontext);
PingCallbackCommon(IP_SUCCESS, pingcontext, sizeof(pingcontext->ReplyBuffer));
}
//-------------------------------------------------------------------------
// Purpose: Callback for async ping
//
// Input: apccontext - [i] context passed on ping
// iostatusblock - [i] status of request
//
// Output: na
//
// Notes:
//
VOID PingCallback(PVOID apcontext, PIO_STATUS_BLOCK iostatusblock, ULONG reserved)
{
PingCallbackCommon(iostatusblock->Status, reinterpret_cast<sPingContext*>(apcontext), iostatusblock->Information);
}
//-------------------------------------------------------------------------
// Purpose: build list of network hosts using IPv4 Ping
//
// Input: subnet - [i] subnet being scanned (LSB format)
// hoststart - [i] host starting number for scan
// hostend - [i] host ending number for scan
// ips - [io] numeric buffer to update with found addresses
//
// Output: na
//
// Notes:
//
void IPV4Ping(IPAddr sourceip, uint32_t subnet, uint32_t hoststart, uint32_t hostend, CNumericBuffer<uint32_t> &ips)
{
// skip 127. network
if ((sourceip & 0xFF)==127)
return;
bool oldlib=false;
LPFN_IcmpSendEcho2Ex fnIcmpSendEcho2Ex=NULL;
LPFN_IcmpCreateFile fnIcmpCreateFile=NULL;
LPFN_IcmpCloseHandle fnIcmpCloseHandle=NULL;
LPFN_IcmpParseReplies fnIcmpParseReplies=NULL;
// first thing is first - check which set of functions to use
HMODULE hlib=LoadLibrary(_T("iphlpapi.dll"));
if (hlib) {
// load functions
fnIcmpCreateFile=(LPFN_IcmpCreateFile) GetProcAddress(hlib, "IcmpCreateFile");
fnIcmpSendEcho2Ex=(LPFN_IcmpSendEcho2Ex) GetProcAddress(hlib, "IcmpSendEcho2Ex");
fnIcmpCloseHandle=(LPFN_IcmpCloseHandle) GetProcAddress(hlib, "IcmpCloseHandle");
fnIcmpParseReplies=(LPFN_IcmpParseReplies) GetProcAddress(hlib, "IcmpParseReplies");
}
// check if have everything
if (!hlib || fnIcmpCreateFile==NULL || fnIcmpSendEcho2Ex==NULL || fnIcmpCloseHandle==NULL || fnIcmpParseReplies==NULL) {
// no, try old version
oldlib=true;
// clean up
if (hlib) {
FreeLibrary(hlib);
}
// load old lib
hlib=LoadLibrary(_T("icmp.dll"));
// check if loaded
if (hlib) {
// load functions
fnIcmpCreateFile=(LPFN_IcmpCreateFile) GetProcAddress(hlib, "IcmpCreateFile");
fnIcmpSendEcho2Ex=(LPFN_IcmpSendEcho2Ex) GetProcAddress(hlib, "IcmpSendEcho2Ex");
fnIcmpCloseHandle=(LPFN_IcmpCloseHandle) GetProcAddress(hlib, "IcmpCloseHandle");
fnIcmpParseReplies=(LPFN_IcmpParseReplies) GetProcAddress(hlib, "IcmpParseReplies");
}
}
// check if have everything
if (hlib) {
if (fnIcmpCreateFile!=NULL && fnIcmpSendEcho2Ex!=NULL && fnIcmpCloseHandle!=NULL && fnIcmpParseReplies!=NULL) {
// open icmp
HANDLE hicmp=fnIcmpCreateFile();
if (hicmp!=INVALID_HANDLE_VALUE) {
// variables for callback handling
ULONG outstandingcount=0;
CMutex mutex;
// process pings
for (uint32_t host=hoststart; host<=hostend; host++) {
// build full ip
IPAddr ip=subnet | host;
ip=GETMSBFDWORD(&ip);
// create context
sPingContext *pcontext;
if ((pcontext=new sPingContext(&outstandingcount, &mutex, &ips, fnIcmpParseReplies))!=NULL) {
// count request
InterlockedIncrement(&outstandingcount);
// now issue ping
DWORD result=fnIcmpSendEcho2Ex(hicmp,
NULL,
oldlib ? (PIO_APC_ROUTINE) PingCallbackOld : PingCallback,
pcontext,
sourceip,
ip,
PingSignature,
sizeof(PingSignature),
NULL,
pcontext->ReplyBuffer,
sizeof(pcontext->ReplyBuffer),
50);
// check if failed
if (result==0) {
// check if because pending
if (GetLastError()!=ERROR_IO_PENDING) {
// no - use callback to clean up
CDebugPrint::DebugPrint(_T("IcmpSendEcho Error %u\n"), GetLastError());
PingCallbackOld(pcontext);
}
else {
// fire off pending APC callbacks ready
SleepEx(0, TRUE);
}
}
else {
// completed sync - use callback to clean up
PingCallbackOld(pcontext);
}
}
}
// wait for completion
while (outstandingcount) {
// handle callbacks
SleepEx(10, TRUE);
}
// clean up
fnIcmpCloseHandle(hicmp);
}
}
// clean up
FreeLibrary(hlib);
}
}
//-------------------------------------------------------------------------
// Purpose: build list of network hosts by way of IP scan for V4
//
// Input: ipadapteraddress - [i] adapter ip address to build for
//
// Output: na
//
// Notes: ip addresses are MSBF
//
void IPV4Scan(IP_ADAPTER_UNICAST_ADDRESS *ipadapteraddress)
{
// build the subnet mask to use
if (ipadapteraddress->OnLinkPrefixLength<=32 && ipadapteraddress->OnLinkPrefixLength!=0) {
in_addr ia=reinterpret_cast<sockaddr_in*>(ipadapteraddress->Address.lpSockaddr)->sin_addr;
// valid mask length - build mask
uint32_t rangemask=((1U<<(32-ipadapteraddress->OnLinkPrefixLength))-1);
uint32_t mask=~rangemask;
uint32_t subnet=GETMSBFDWORD(&ia.s_addr) & mask;
CDebugPrint::DebugPrint(_T("Subnet %u.%u.%u.%u/%u\n"), (subnet>>24) & 0xFF, (subnet>>16) & 0xFF, (subnet>>8) & 0xFF, (subnet>>0) & 0xFF, ipadapteraddress->OnLinkPrefixLength);
CDebugPrint::DebugPrint(_T("Scanning %u hosts\n"), (UINT32_MAX & rangemask)-1);
CNumericBuffer<uint32_t> ipsfound;
IPV4Ping(ia.s_addr, subnet, 1, (UINT32_MAX & rangemask)-1, ipsfound);
for (UINT i=0; i<(UINT)ipsfound.GetCount(); i++) {
uint32_t ip=ipsfound[i];
CDebugPrint::DebugPrint(_T("Ping found %u.%u.%u.%u\n"), ip & 0xFF, (ip>>8) & 0xFF, (ip>>16) & 0xFF, (ip>>24) & 0xFF);
}
}
else CDebugPrint::DebugPrint(_T("Invalid subnet length %u\n"), ipadapteraddress->OnLinkPrefixLength);
}
I presume the first one should be "if the ApcRoutine AND Event
paramters are NULL"?
yes, you correct.
But I don't see that, I see it return 0 and GetLastError() returns
ERROR_IO_PENDING. So, can both cases exist, or is the documentation
completely wrong?
documentation completely wrong. by fact IcmpSendEcho2[Ex] return BOOL and error code via SetLastError ( more exactly by RtlNtStatusToDosError)
so on asynchronous call it return FALSE (0) and GetLastError() will be ERROR_IO_PENDING if all ok - this mean apc callback will be called, or another error if fail - apc callback will be not called (better call it by self in this case, for common error handling)
how do I put the main thread in an alterable state
this already depend from what your thread doing. in some case possible write event loop with MsgWaitForMultipleObjectsEx function - at once wait on windows events and be alertable. also possible wait on some objects too. if you can not rewrite self message loop with MsgWaitForMultipleObjectsEx - you can do call from worked thread, or periodically call SleepEx(0, TRUE) or undocumented NtTestAlert. without know what your main thread doing - hard say exactly what is better.
demo code can look like:
#include <iphlpapi.h>
#include <IPExport.h>
#include <icmpapi.h>
class EchoRequestContext
{
HANDLE _hFile = 0;
PVOID _ReplyBuffer = 0;
LONG _dwRefCount = 1;
ULONG _dwThreadId = GetCurrentThreadId();
static void WINAPI sOnApc(PVOID This, PIO_STATUS_BLOCK piosb, ULONG )
{
reinterpret_cast<EchoRequestContext*>(This)->OnApc(
RtlNtStatusToDosError(piosb->Status),
(ULONG)piosb->Information);
}
void OnApc(ULONG dwError, ULONG ReplySize)
{
OnReply(dwError, (PICMP_ECHO_REPLY)_ReplyBuffer, ReplySize);
if (_ReplyBuffer) delete [] _ReplyBuffer;
Release();
}
void OnReply(ULONG dwError, PICMP_ECHO_REPLY ReplyBuffer, ULONG ReplySize)
{
if (dwError)
{
DbgPrint("dwError=%u\n", dwError);
return ;
}
if (IcmpParseReplies(ReplyBuffer, ReplySize))
{
__nop();
}
}
~EchoRequestContext()
{
if (_hFile) IcmpCloseHandle(_hFile);
PostThreadMessageW(_dwThreadId, WM_QUIT, 0, 0);
}
public:
void AddRef()
{
InterlockedIncrementNoFence(&_dwRefCount);
}
void Release()
{
if (!InterlockedDecrement(&_dwRefCount))
{
delete this;
}
}
ULONG Create()
{
HANDLE hFile = IcmpCreateFile();
if (hFile == INVALID_HANDLE_VALUE)
{
return GetLastError();
}
_hFile = hFile;
return NOERROR;
}
void SendEcho(
IPAddr DestinationAddress,
const void* RequestData,
WORD RequestSize,
ULONG ReplySize,
ULONG Timeout,
UCHAR Flags,
UCHAR Ttl)
{
if (PVOID ReplyBuffer = new UCHAR[ReplySize])
{
_ReplyBuffer = ReplyBuffer;
IP_OPTION_INFORMATION opt = { Ttl, 0, Flags };
AddRef();
ULONG dwError = IcmpSendEcho2Ex(_hFile, 0, sOnApc, this,
0, DestinationAddress,
const_cast<void*>(RequestData), RequestSize,
&opt, ReplyBuffer, ReplySize, Timeout) ? NOERROR : GetLastError();
switch (dwError)
{
case NOERROR:
case ERROR_IO_PENDING:
break;
default:
OnApc(dwError, 0 );
}
return ;
}
OnApc(ERROR_OUTOFMEMORY, 0);
}
};
#define IP(a, b, c, d) ((ULONG)(a + (b << 8) + (c << 16) + (d << 24)))
void EchoTest()
{
WSADATA wd;
if (NOERROR == WSAStartup(WINSOCK_VERSION, &wd))
{
if (EchoRequestContext* p = new EchoRequestContext)
{
if (p->Create() == NOERROR)
{
p->SendEcho(IP(8,8,8,8), "1234567890ABCDEF", 16, 0x100, 4000, IP_FLAG_DF, 255);
}
p->Release();
}
MSG msg;
__loop:
switch (MsgWaitForMultipleObjectsEx(0, 0, INFINITE,
QS_ALLINPUT, MWMO_ALERTABLE|MWMO_WAITALL))
{
default:
__debugbreak();
break;
case WAIT_FAILED:
break;
case WAIT_OBJECT_0:
while (PeekMessageW(&msg, 0, 0, 0, PM_REMOVE))
{
if (msg.message == WM_QUIT)
{
goto __exit;
}
}
case STATUS_USER_APC: // == WAIT_IO_COMPLETION
goto __loop;
}
__exit:
WSACleanup();
}
}

How to properly compare Q_ENUM and QStringList

I have a small example .ui where the user can drag and drop from a QListWidget to a QGraphicsView using QGraphicsProxyWidget a specific widget (in my case is a QTableWidget) as shown below.
Basically the behavior I have been looking for is:
If I drag and drop "Imgaes" than the QTableWidget on the QGraphicsView will have 2 columns and 2 rows; (which is correct)
If I drag and drop "Path" than the QTableWidget on the QGraphicsView will have 3 columns and 3 rows; (which is wrong)
[1] : https://imgur.com/NQ1DZpx
Below the selection of "Path", which still give 2 rows and 2 columns instead of 3 rows and 3 columns
[2] : https://imgur.com/d4YcOiL
Below the code:
scene.h
class Scene : public QGraphicsScene {
enum LaserTableWidget {
Images,
Path
};
Q_ENUM(LaserTableWidget)
public:
Scene(QObject *parent = nullptr);
void compare(const QString& comp);
template<typename QEnum>
std::string QtEnumToString (const QEnum value)
{
return std::string(QMetaEnum::fromType<QEnum>().valueToKey(value));
}
protected:
void dropEvent(QGraphicsSceneDragDropEvent *event);
scene.cpp
Scene::Scene(QObject *parent) : QGraphicsScene(parent) {
setBackgroundBrush(Qt::lightGray);
}
void Scene::compare(const QString &comp)
{
// get information about the enum named "LaserTableWidget"
QMetaObject MetaObject = this->staticMetaObject;
QMetaEnum MetaEnum = MetaObject.enumerator(MetaObject.indexOfEnumerator("LaserTableWidget"));
QStringList tabs;
switch (MetaEnum.keyToValue(comp.toUpper().toLatin1()))
// or simply switch (MetaEnum.keyToValue(word)) if no string modification is needed
{
case Images:
for (const QString &color : tabs) {
QPoint initPos(0,0);
QTableWidget *wgt = new QTableWidget;
QGraphicsRectItem *proxyControl = addRect(initPos.x(), initPos.y(), wgt->width(), 20, QPen(Qt::black), QBrush(Qt::darkGreen)); // widget->width() works properly here because of the resize(layout->sizeHint()) that we have used inside it
proxyControl->setFlag(QGraphicsItem::ItemIsMovable, true);
proxyControl->setFlag(QGraphicsItem::ItemIsSelectable, true);
wgt->setColumnCount(2);
wgt->setRowCount(2);
for (int ridx = 0 ; ridx < wgt->rowCount() ; ridx++ )
{
for (int cidx = 0 ; cidx < wgt->columnCount() ; cidx++)
{
QTableWidgetItem* item = new QTableWidgetItem();
item->setText(QString("%1").arg(ridx));
wgt->setItem(ridx,cidx,item);
}
}
QGraphicsProxyWidget * const proxy = addWidget(wgt);
// In my case the rectangular graphics item is supposed to be above my widget so the position of the widget is shifted along the Y axis based on the height of the rectangle of that graphics item
proxy->setPos(initPos.x(), initPos.y()+proxyControl->rect().height());
proxy->setParentItem(proxyControl);
}
break;
case Path:
for (const QString &color : tabs) {
QPoint initPos(0,0);
QTableWidget *wgt = new QTableWidget;
QGraphicsRectItem *proxyControl = addRect(initPos.x(), initPos.y(), wgt->width(), 20, QPen(Qt::black), QBrush(Qt::darkGreen)); // widget->width() works properly here because of the resize(layout->sizeHint()) that we have used inside it
proxyControl->setFlag(QGraphicsItem::ItemIsMovable, true);
proxyControl->setFlag(QGraphicsItem::ItemIsSelectable, true);
wgt->setColumnCount(3);
wgt->setRowCount(3);
for (int ridx = 0 ; ridx < wgt->rowCount() ; ridx++ )
{
for (int cidx = 0 ; cidx < wgt->columnCount() ; cidx++)
{
QTableWidgetItem* item = new QTableWidgetItem();
item->setText(QString("%1").arg(ridx));
wgt->setItem(ridx,cidx,item);
}
}
QGraphicsProxyWidget * const proxy = addWidget(wgt);
// In my case the rectangular graphics item is supposed to be above my widget so the position of the widget is shifted along the Y axis based on the height of the rectangle of that graphics item
proxy->setPos(initPos.x(), initPos.y()+proxyControl->rect().height());
proxy->setParentItem(proxyControl);
}
break;
default:
break;
}
}
void Scene::dropEvent(QGraphicsSceneDragDropEvent *event) {
QByteArray encoded =
event->mimeData()->data("application/x-qabstractitemmodeldatalist");
QDataStream stream(&encoded, QIODevice::ReadOnly);
QStringList rosTables;
while (!stream.atEnd()) {
int row, col;
QMap<int, QVariant> roleDataMap;
stream >> row >> col >> roleDataMap;
rosTables << roleDataMap[Qt::DisplayRole].toString();
}
compare(const QString &comp)
}
I tried to use the template function declared on the header file that for completeness I am also attaching below:
template<typename QEnum>
std::string QtEnumToString (const QEnum value)
{
return std::string(QMetaEnum::fromType<QEnum>().valueToKey(value));
}
Maybe I the template function is the wrong choice? I was trying to find a way to use it if possible.
That is the reason why I switched to a void compare(const QString& comp); function and tried to delegate the job to a switch - case statement but that also is not working as I expect and I still see the same exact QtableWidget dropped onto the QGraphicsView.
Of course I did more research and came across this source and above all this post which was useful to understand the basic comparison and following this post I decided to go ahead and try to apply the Q_ENUM - QString or even the QStringList as a valuable tool. But I could not figure out what I was doing wrong.
Can anyone please shed light on which approach could be better? (or maybe they are both correct) and try to explain what I am missing to solve this problem.
There are two problems I see in your code:
Wrong parameter passed to keyToValue(). Since you have Image and Path in your enum, the valid values to pass to keyToValue() are "Image" (returns 0), and "Path" (return 1), other values will returns -1.
In the function Scene::compare(), tabs is just created as an empty QStringList, so the code inside the loops for (const QString &color : tabs) are never executed
Below is a test program to show you what I mean:
mainwindow.h
#ifndef MAINWINDOW_H
#define MAINWINDOW_H
#include <QMainWindow>
#include <QMetaEnum>
class MainWindow : public QMainWindow
{
Q_OBJECT
public:
MainWindow(QWidget *parent = nullptr);
~MainWindow();
enum LaserTableWidget
{
Images,
Path
};
Q_ENUM(LaserTableWidget)
template<typename enum_type>
QString QtEnumToString (const enum_type value)
{
return QMetaEnum::fromType<enum_type>().valueToKey(value);
}
};
#endif // MAINWINDOW_H
mainwindow.cpp
#include "mainwindow.h"
#include <QDebug>
#include <QStringList>
MainWindow::MainWindow(QWidget *parent)
: QMainWindow(parent)
{
// First problem: wrong parameter passed to keyToValue()
qDebug() << 1 << QtEnumToString<LaserTableWidget>(Images);
qDebug() << 2 << QtEnumToString<LaserTableWidget>(Path);
QMetaObject MetaObject = this->staticMetaObject;
QMetaEnum MetaEnum = MetaObject.enumerator(MetaObject.indexOfEnumerator("LaserTableWidget"));
qDebug() << 3 << MetaEnum.keyToValue(QtEnumToString<LaserTableWidget>(Path).toUpper().toLatin1());
qDebug() << 4 << MetaEnum.keyToValue(QtEnumToString<LaserTableWidget>(Path).toStdString().c_str());
switch (MetaEnum.keyToValue(QtEnumToString<LaserTableWidget>(Path).toUpper().toLatin1()))
{
case Images:
qDebug() << "switch Images";
break;
case Path:
qDebug() << "switch Path";
break;
default:
qDebug() << "switch default";
break;
}
// Second problem: tabs is empty
QStringList tabs;
for (const QString &color : tabs)
qDebug() << color; // this line is never executed
}
MainWindow::~MainWindow()
{
}
Output:
1 "Images"
2 "Path"
3 -1
4 1
switch default

USN NFTS change notification event interrupt

I'm trying to find a way to let the system tell me whenever there's a new entry in the USN Change Journal to track modifications made to files and directories on an NTFS volume (Server 2008/2012).
This way I don't have to constantly poll the journal and can just let my thread sleep until I get notified when there's a new change-event.
However, is there even such an interrupt?
The FSCTL_QUERY_USN_JOURNAL function doesn't specifically mention interrupts (events, notifications), nor have I been able to find another way to achieve this with less intensive poll-and-compare techniques.
I'm not a hard-core programmer so there may be simpler ways to tie these functions to interrupts that I'm not aware of.
Could I perhaps find out where the USN Change Journal is stored and watch that file with another process that can generate and interrupt on change?
https://msdn.microsoft.com/en-us/library/aa365729(v=vs.85).aspx
The code posted here blocks the executing thread till the new USN record is created in the Journal. When new records arrive, the thread awakens and you can process changes and/or notify listeners via a callback that filesystem has changed (in the example it just prints message to the console). Then the thread blocks again. This example uses one thread per volume (so for each volume, separate NTFSChangesWatcher class instance needed).
It is not specified which tools or language you use, so I will write as I did it. To run this code, create a Visual Studio C++ Win32 Console Application.
Create NTFSChangesWatcher class. Paste this code in NTFSChangesWatcher.h file (replacing auto-generated one):
#pragma once
#include <windows.h>
#include <memory>
class NTFSChangesWatcher
{
public:
NTFSChangesWatcher(char drive_letter);
~NTFSChangesWatcher() = default;
// Method which runs an infinite loop and waits for new update sequence number in a journal.
// The thread is blocked till the new USN record created in the journal.
void WatchChanges();
private:
HANDLE OpenVolume(char drive_letter);
bool CreateJournal(HANDLE volume);
bool LoadJournal(HANDLE volume, USN_JOURNAL_DATA* journal_data);
bool NTFSChangesWatcher::WaitForNextUsn(PREAD_USN_JOURNAL_DATA read_journal_data) const;
std::unique_ptr<READ_USN_JOURNAL_DATA> GetWaitForNextUsnQuery(USN start_usn);
bool NTFSChangesWatcher::ReadJournalRecords(PREAD_USN_JOURNAL_DATA journal_query, LPVOID buffer,
DWORD& byte_count) const;
std::unique_ptr<READ_USN_JOURNAL_DATA> NTFSChangesWatcher::GetReadJournalQuery(USN low_usn);
char drive_letter_;
HANDLE volume_;
std::unique_ptr<USN_JOURNAL_DATA> journal_;
DWORDLONG journal_id_;
USN last_usn_;
// Flags, which indicate which types of changes you want to listen.
static const int FILE_CHANGE_BITMASK;
static const int kBufferSize;
};
and this code in NTFSChangesWatcher.cpp file:
#include "NTFSChangesWatcher.h"
#include <iostream>
using namespace std;
const int NTFSChangesWatcher::kBufferSize = 1024 * 1024 / 2;
const int NTFSChangesWatcher::FILE_CHANGE_BITMASK =
USN_REASON_RENAME_NEW_NAME | USN_REASON_SECURITY_CHANGE | USN_REASON_BASIC_INFO_CHANGE | USN_REASON_DATA_OVERWRITE |
USN_REASON_DATA_TRUNCATION | USN_REASON_DATA_EXTEND | USN_REASON_CLOSE;
NTFSChangesWatcher::NTFSChangesWatcher(char drive_letter) :
drive_letter_(drive_letter)
{
volume_ = OpenVolume(drive_letter_);
journal_ = make_unique<USN_JOURNAL_DATA>();
bool res = LoadJournal(volume_, journal_.get());
if (!res) {
cout << "Failed to load journal" << endl;
return;
}
journal_id_ = journal_->UsnJournalID;
last_usn_ = journal_->NextUsn;
}
HANDLE NTFSChangesWatcher::OpenVolume(char drive_letter) {
wchar_t pattern[10] = L"\\\\?\\a:";
pattern[4] = static_cast<wchar_t>(drive_letter);
HANDLE volume = nullptr;
volume = CreateFile(
pattern, // lpFileName
// also could be | FILE_READ_DATA | FILE_READ_ATTRIBUTES | SYNCHRONIZE
GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE, // dwDesiredAccess
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, // share mode
NULL, // default security attributes
OPEN_EXISTING, // disposition
// It is always set, no matter whether you explicitly specify it or not. This means, that access
// must be aligned with sector size so we can only read a number of bytes that is a multiple of the sector size.
FILE_FLAG_NO_BUFFERING, // file attributes
NULL // do not copy file attributes
);
if (volume == INVALID_HANDLE_VALUE) {
// An error occurred!
cout << "Failed to open volume" << endl;
return nullptr;
}
return volume;
}
bool NTFSChangesWatcher::CreateJournal(HANDLE volume) {
DWORD byte_count;
CREATE_USN_JOURNAL_DATA create_journal_data;
bool ok = DeviceIoControl(volume, // handle to volume
FSCTL_CREATE_USN_JOURNAL, // dwIoControlCode
&create_journal_data, // input buffer
sizeof(create_journal_data), // size of input buffer
NULL, // lpOutBuffer
0, // nOutBufferSize
&byte_count, // number of bytes returned
NULL) != 0; // OVERLAPPED structure
if (!ok) {
// An error occurred!
}
return ok;
}
bool NTFSChangesWatcher::LoadJournal(HANDLE volume, USN_JOURNAL_DATA* journal_data) {
DWORD byte_count;
// Try to open journal.
if (!DeviceIoControl(volume, FSCTL_QUERY_USN_JOURNAL, NULL, 0, journal_data, sizeof(*journal_data), &byte_count,
NULL)) {
// If failed (for example, in case journaling is disabled), create journal and retry.
if (CreateJournal(volume)) {
return LoadJournal(volume, journal_data);
}
return false;
}
return true;
}
void NTFSChangesWatcher::WatchChanges() {
auto u_buffer = make_unique<char[]>(kBufferSize);
auto read_journal_query = GetWaitForNextUsnQuery(last_usn_);
while (true) {
// This function does not return until new USN record created.
WaitForNextUsn(read_journal_query.get());
cout << "New entry created in the journal!" << endl;
auto journal_query = GetReadJournalQuery(read_journal_query->StartUsn);
DWORD byte_count;
if (!ReadJournalRecords(journal_query.get(), u_buffer.get(), byte_count)) {
// An error occurred.
cout << "Failed to read journal records" << endl;
}
last_usn_ = *(USN*)u_buffer.get();
read_journal_query->StartUsn = last_usn_;
// If you need here you can:
// Read and parse Journal records from the buffer.
// Notify an NTFSChangeObservers about journal changes.
}
}
bool NTFSChangesWatcher::WaitForNextUsn(PREAD_USN_JOURNAL_DATA read_journal_data) const {
DWORD bytes_read;
bool ok = true;
// This function does not return until new USN record created.
ok = DeviceIoControl(volume_, FSCTL_READ_USN_JOURNAL, read_journal_data, sizeof(*read_journal_data),
&read_journal_data->StartUsn, sizeof(read_journal_data->StartUsn), &bytes_read,
nullptr) != 0;
return ok;
}
unique_ptr<READ_USN_JOURNAL_DATA> NTFSChangesWatcher::GetWaitForNextUsnQuery(USN start_usn) {
auto query = make_unique<READ_USN_JOURNAL_DATA>();
query->StartUsn = start_usn;
query->ReasonMask = 0xFFFFFFFF; // All bits.
query->ReturnOnlyOnClose = FALSE; // All entries.
query->Timeout = 0; // No timeout.
query->BytesToWaitFor = 1; // Wait for this.
query->UsnJournalID = journal_id_; // The journal.
query->MinMajorVersion = 2;
query->MaxMajorVersion = 2;
return query;
}
bool NTFSChangesWatcher::ReadJournalRecords(PREAD_USN_JOURNAL_DATA journal_query, LPVOID buffer,
DWORD& byte_count) const {
return DeviceIoControl(volume_, FSCTL_READ_USN_JOURNAL, journal_query, sizeof(*journal_query), buffer, kBufferSize,
&byte_count, nullptr) != 0;
}
unique_ptr<READ_USN_JOURNAL_DATA> NTFSChangesWatcher::GetReadJournalQuery(USN low_usn) {
auto query = make_unique<READ_USN_JOURNAL_DATA>();
query->StartUsn = low_usn;
query->ReasonMask = 0xFFFFFFFF; // All bits.
query->ReturnOnlyOnClose = FALSE;
query->Timeout = 0; // No timeout.
query->BytesToWaitFor = 0;
query->UsnJournalID = journal_id_;
query->MinMajorVersion = 2;
query->MaxMajorVersion = 2;
return query;
}
Now you can use it (for example in the main function for testing):
#include "NTFSChangesWatcher.h"
int _tmain(int argc, _TCHAR* argv[])
{
auto watcher = new NTFSChangesWatcher('z');
watcher->WatchChanges();
return 0;
}
And console output should be like this on every change in the filesystem:
This code was slightly reworked to remove unrelated details and is a part of the Indexer++ project. So for more details, you can refer to the original code.
You can use Journal, but in this case I'd use easier method via registering a directory notification by calling the FindFirstChangeNotification or ReadDirectoryChangesW functions, see https://msdn.microsoft.com/en-us/library/aa364417.aspx
If you'd prefer to use Journal, this is - I think - the best introductory article with many examples. It is written for W2K, but those concepts are still valid: https://www.microsoft.com/msj/0999/journal/journal.aspx

Filter items from boost::lockfree::spsc_queue

I use the boost::lockfree::spsc_queue to send stream data from a thread to a working thread.
These is the struct of the items:
struct spsc_queue_item
{
uint8_t ID;
void *data;
};
The data get insert by spsc_queue.push and read by the other thread by spsc_queue.pop.
But I also have some "commands" for the worker thread.
Like ID 0 is "start filter",
ID 1 is "stop filter",
ID 2 id "data"...
So if a lot of "data" get pushed to the queue the command like "stop filter" will be delayed because first the "data" items are handled.
But if the command "stop filter" is coming in the "data" items are useless and can be discarded.
Now I know there is also the member function "consume_one" and "consume_all".
But I don't find a example how to use the functor with these functions.
My idea is to use as example consume_one to check first if a item with ID==0 or ID==1 is on the queue before continue working with the data items with ID==2.
Somebody have a small example how to use the functor to filter out items releated to the requested ID?
Or is there any other fast way to get items from the queue by a "priority" flag?
UPDATE In response to sehe's answer, some more information:
Thank you for this information.
Any idea how to make it better?
I will need to signal the worker thread like for "start filter", "stop filter",...
I was thinking of using events:
SetEvent(hStartFilter);
But here I have to use, create and close for each command a single event.
The "data" can also have different IDs.
Like the worker thread receives:
"start filter" with ID=0
"start filter" with ID=1
Then "data" with ID0 and ID1 is coming into the queue.
Now the thread receives "stop filter" with ID0.
So all items on the data-queue with ID0 are obsolete and can be deleted.
My first test was to grap all items from the queue. Check each of them of the matching ID and remove the item. The other items what are left are getting pushed back to the queue after.
But this is a lot of CPU usage and time consuming action if there are a lot of data with different IDs (up to 32). The maximum size of the queue is 2048 items.
Is there also a better way?
Queues have FIFO semantics. There's no way to filter, unless you want to just discard elements that don't match your criteria. (Just wrap the pop function)
The typical solution to this is to use
a priority queue
two separate queues, one for data and one for commands.
If you can never have more than x elements in the data queue, consider using a circular buffer. spsc_queue uses a ring buffer as the underlying storage.
UPDATE In response to the question edit, I decided to create a demo using out-of-band signaling of filtering-state per ID.
Let me walk through, starting with the usual definitions:
static constexpr uint8_t NUM_SOURCES = 32;
Now, the common definitions shared by both the constumer and the producer:
namespace queueing {
using data_t = std::vector<char>; // just for demo
struct spsc_queue_item {
uint8_t ID;
data_t data;
};
// process control
boost::atomic_bool shutdown_flag { false };
namespace statistics {
namespace events {
boost::atomic_size_t occurred { 0 };
}
namespace packets {
boost::atomic_size_t queued { 0 };
boost::atomic_size_t dropped { 0 };
boost::atomic_size_t processed { 0 };
boost::atomic_size_t skipped[NUM_SOURCES] = {};
}
boost::atomic_size_t idle_cycles { 0 };
void report();
}
// business logic
boost::atomic_bool source_enabled [NUM_SOURCES] = {}; // true:started (process) / false:stopped (skip)
boost::lockfree::spsc_queue<spsc_queue_item, boost::lockfree::capacity<2048> > shared_queue;
}
As you can see I changed data (because it's easier to demo without the void*). Also, I added a number of useful statistics that can be report()-ed at the end of your rest runs.
void producer_thread() {
using namespace boost;
namespace stats = queueing::statistics;
// helpers to generate random data packets or start/stop filter events
enum kind_t { knd_data, knd_start, knd_stop };
queueing::data_t const empty {};
struct event_t { kind_t kind; spsc_queue_item item; };
// ...
// now generate queue items in a loop
while (!queueing::shutdown_flag) {
auto evt = gen_event();
std::this_thread::sleep_for(std::chrono::nanoseconds(engine()%102400));
switch(evt.kind) {
case knd_data:
stats::events::occurred++;
if (queueing::shared_queue.push(evt.item)) {
stats::packets::queued++;
} else {
stats::packets::dropped++;
}
break;
case knd_start: {
bool expected = false;
if (queueing::source_enabled[evt.item.ID].compare_exchange_weak(expected, true))
std::cout << "+";// << static_cast<int>(evt.item.ID);
}
break;
case knd_stop: {
bool expected = true;
if (queueing::source_enabled[evt.item.ID].compare_exchange_weak(expected, false))
std::cout << "-";// << static_cast<int>(evt.item.ID);
}
break;
}
}
}
The body of the thread function is quite straight-forward, but a notable thing is that start and stop events are not communicated over the queue.
The producer is even simpler. All it does is exhaust the queue, updating some statistics counters.
Before processing an item, the corresponding filtering state (source_enabled) is checked:
void consumer_thread() {
namespace stats = queueing::statistics;
queueing::spsc_queue_item item;
auto consume_pending = [&] {
while (queueing::shared_queue.pop(item)) {
if (queueing::source_enabled[item.ID])
fake_process(item); // if filtering started, process
else
stats::packets::skipped[item.ID]++; // if filtering stopped, skip
}
};
while (!queueing::shutdown_flag) {
consume_pending();
stats::idle_cycles++;
}
consume_pending(); // drain any remaining queued items, to avoid race with shutdown_flag
}
Now, everything should be self-explanatory so, piece together a main() function:
int main() {
using namespace std;
// check no source_enabled flags are set at start
assert(0 == count(begin(queueing::source_enabled), end(queueing::source_enabled), true));
auto producer = thread(producer_thread);
auto consumer = thread(consumer_thread);
this_thread::sleep_for(chrono::seconds(1));
queueing::shutdown_flag = true;
if (producer.joinable()) producer.join();
if (consumer.joinable()) consumer.join();
queueing::statistics::report();
}
Our program runs both threads for approximately 1 second and waits for them join.
Then it reports statistics which, on my system, look like:
++-+++++++--+++-++++-++-+++---+-+-+-+++++-+--+---+++-++---+-++-++-+-+++---++--+++-++---+----+-+-+-+--+++-++--+--+--++--+-+-+-+--+--+++--++-+-++-++-+--+--+++-++-+---+----++-+++-+-++-+----+--+-+-+--+++--+++++-+-+--++-+--++++-+-+---++-+---+-+--++---++++----+-+---+-+-+-+--+-++--+-+++--+++-+----+-+-+-+++-+++--+-++-++++++---++--+-++-++---+-+-++--+-+-----++---+-+-+--+++--++---++--+-+++-++++-+++-+-+--+++-+-+----+-++++-+--+++----+++-------+-++-+-+-++++-++++---++-+---+-++-----+-++++----+++-++++--+--+-----+-++++----++++-+++-+---+---+-+-++++-++---+-++-+-+-+++-+-+--+-----++-+++---+-++---+++-++-+--+++++------++---+-++++-+-+-+--++++-++++-+--+++-++---+-----++-+-++-+-+++--++-+-+-++-++-----+-++--+--+--+-------++--+-++-+--++-++-++--+-+-++-+-+++-++++-+---+--+++--++--+-+++++-+-----++--++--+++--++-+---++----+--+-+--++-++---+++++++-+--+-++---+----+-+-+--+-+-+--++++-++--+--+-+---+++-+++++++-++-+-----+--++------+-++++++--++-++-+---+-++---++-++------+-++--+-++-+++--+++-+++-+-+--+-+--+--+---+-+-+-+--+-++-+-++---+++-+-+-++--+-++-+---++--+-+--++-+++-+--+++---+----+--++-++++++-++-+----+++-+-+--+++-----+---+--++-+--+-++++++-+-+++--+++---+-+-++++-++-+-+----++++----+++-++----+---++-+---++-+-+-++--+++---+--+++----++-++-+++--+--+---+++--+--+--+--+--++++-++++---+-+-+--+-+-+--++++--+-+--++--++++----++-++++++-+--+-+------+-----+++----++-+++++-+--+--+---++-+-++-+--++++-+++---+++-+----+--+++++-+-+--+++--++-+++-+-++---++-++-+-+-+--+-++--+---+-+++--+++++-----+-++-+-+++-+-+-------++++---+-+-++-+--+++++---+--++-+-++-+++----+++-++++---++------+-+---++++--+-+---+++------++++++---++-+----+-+++-+--++-+-+-+-----+-++-++-++--++-+-+-++++++--++---+-+-+-+-+-+-++-++-++----++--++-+++-++---+++--+++---+++--+-+++----++--+-+-+++---++---++-+--+++++-+---++----++--+++-+--+-+++++++-+--+---+--+---+----+-++-++-+--++--+--++-++---+++++--+-+---+-++-+-+----+++-++-+-+--+---+-++-+-----++---++++--+++++-+---+-++--+-+-+----+--++++-+-----++++--++-+-+++++----+++---+++++++--+---+--+--++++--+++-----+-++--+-+-----+++++----+-++++---+-++--+-++-+++--+++-+-+++++--+----++--+--+-+-++-----++-+--++--++++++-+-+++----++++---++-+--+-+------+-+--+++++--+++--++-----+--++-++-+++++-++-------+----++-++--+--++--++++-++---+-+++++----+-++-++---+++---+-++-++----++--++--+++++-+--+-----+-+-+-+-+++-+--++-+-+++--+-+-+++-+-++--+-+-+-+--+-+-+++++---+---+-++-+---++-+-++-+-+++-++-++-+-++-------++---+-++-++++-++--++--+-++-+++---++++--+----+---+-++-+++--+-+++---+-++-++----+--+--+-++--+-++-++++++--+-++-+--+---+-+--+-+--++---+--+-++--++--+--++-++++----+--+--+++-+++-+-+-++--++-+-+---+-+-------+--++++++-++++++-++-+-++-+---+--+-+-++--+++---+----+--+--+-++----+-+-++-++-++-+++--++---++-------+++++--+-+++++++--+--+-+--++--++--++-+--+--+++----+++++-++-------++---+-+--++-++--+++-+-+-+-+------+-+--+++++-+-+--++-++-++--+++++++---+-++--+++-+++--++++-++--+-+---+----+----+---+--+-+++-+-+++++---+--++--+-+++-+++++--+---+-+++++-+---++++--+-++----+---++----+++---+++++-+-++--+--+-++-++----+---++-++-+-+-+---+++-++-+++-+---+++--+-+-----++-+---++-+---++---+-++--++++-+--++-+-++----+-+-+--++--++++--+--++--+--+-+-+++++++--++-+-+-+++--+---+++--++++++--+-+-----+---++-+++--+++--++---+++--+--+-++++-----+++-----++++--++--+-+--
Events occurred: 3061
Queued packets: 3061
Dropped packets: 0
Processed packets: 1464
Filtered (per source) 58 48 53 51 47 39 45 42 53 52 57 50 63 43 49 57 45 58 40 42 56 54 58 52 44 53 61 41 50 33 51 52
Total filtered: 1597
Idle cycles: 26408166
The first line (++-+++++++--+++-++++-++-+++---+...) is a shorthand notation showing the number of effective changes in source_enabled[] flags.
You can see that at this rate, the queue is not saturated and the consumer thread has quite a number of idle cycles.
Demo Live On Coliru
Full listing for reference:
#include <boost/lockfree/spsc_queue.hpp>
#include <boost/atomic.hpp>
#include <boost/random.hpp>
#include <boost/bind.hpp>
#include <thread>
static constexpr uint8_t NUM_SOURCES = 32;
namespace queueing {
using data_t = std::vector<char>; // just for demo
struct spsc_queue_item {
uint8_t ID;
data_t data;
};
// process control
boost::atomic_bool shutdown_flag { false };
namespace statistics {
namespace events {
boost::atomic_size_t occurred { 0 };
}
namespace packets {
boost::atomic_size_t queued { 0 };
boost::atomic_size_t dropped { 0 };
boost::atomic_size_t processed { 0 };
boost::atomic_size_t skipped[NUM_SOURCES] = {};
}
boost::atomic_size_t idle_cycles { 0 };
void report() {
namespace stats = queueing::statistics;
std::cout << "\n";
std::cout << "Events occurred: " << stats::events::occurred << "\n";
std::cout << "Queued packets: " << stats::packets::queued << "\n";
std::cout << "Dropped packets: " << stats::packets::dropped << "\n";
std::cout << "Processed packets: " << stats::packets::processed << "\n";
std::cout << "Filtered (per source) ";
std::copy(std::begin(stats::packets::skipped), std::end(stats::packets::skipped),
std::ostream_iterator<size_t>(std::cout, " "));
std::cout << "\n";
auto total_filtered = std::accumulate(std::begin(stats::packets::skipped), std::end(stats::packets::skipped), 0ul);
std::cout << "Total filtered: " << total_filtered << "\n";
std::cout << "Idle cycles: " << stats::idle_cycles << "\n";
}
}
// business logic
boost::atomic_bool source_enabled [NUM_SOURCES] = {}; // true:started (process) / false:stopped (skip)
boost::lockfree::spsc_queue<spsc_queue_item, boost::lockfree::capacity<2048> > shared_queue;
}
void producer_thread() {
using namespace boost;
namespace stats = queueing::statistics;
// generate random data packets or start/stop filter events
using queueing::spsc_queue_item;
mt19937 engine;
auto gen_srce = bind(uniform_int<uint8_t>(0, NUM_SOURCES-1), ref(engine));
auto gen_data = [&] {
std::vector<char> v;
std::generate_n(back_inserter(v), engine()%1024, bind(uniform_int<uint8_t>{}, ref(engine)));
return v;
};
enum kind_t { knd_data, knd_start, knd_stop };
auto gen_kind = bind(uniform_int<uint8_t>(knd_data, knd_stop), ref(engine));
queueing::data_t const empty {};
//
struct event_t { kind_t kind; spsc_queue_item item; };
auto gen_event = [&] {
auto kind = static_cast<kind_t>(gen_kind());
return event_t {
kind,
spsc_queue_item {
gen_srce(),
kind == knd_data? gen_data() : empty
}
};
};
// now that we can easily generate queue items, let's do so in a loop
while (!queueing::shutdown_flag) {
auto evt = gen_event();
std::this_thread::sleep_for(std::chrono::nanoseconds(engine()%102400));
switch(evt.kind) {
case knd_data:
stats::events::occurred++;
if (queueing::shared_queue.push(evt.item)) {
stats::packets::queued++;
} else {
stats::packets::dropped++;
}
break;
case knd_start:
{
bool expected = false;
if (queueing::source_enabled[evt.item.ID].compare_exchange_weak(expected, true))
std::cout << "+";// << static_cast<int>(evt.item.ID);
}
break;
case knd_stop:
{
bool expected = true;
if (queueing::source_enabled[evt.item.ID].compare_exchange_weak(expected, false))
std::cout << "-";// << static_cast<int>(evt.item.ID);
}
break;
}
}
}
void fake_process(queueing::spsc_queue_item const& item) {
// pretend it takes time proportional to the amount of data
std::this_thread::sleep_for(std::chrono::microseconds(item.data.size()));
queueing::statistics::packets::processed++;
}
void consumer_thread() {
namespace stats = queueing::statistics;
queueing::spsc_queue_item item;
auto consume_pending = [&] {
while (queueing::shared_queue.pop(item)) {
if (queueing::source_enabled[item.ID])
fake_process(item); // if filtering started, process
else
stats::packets::skipped[item.ID]++; // if filtering stopped, skip
}
};
while (!queueing::shutdown_flag) {
consume_pending();
stats::idle_cycles++;
}
consume_pending(); // drain any remaining queued items, to avoid race with shutdown_flag
}
#include <cassert>
int main() {
using namespace std;
// check no source_enabled flags are set at start
assert(0 == count(begin(queueing::source_enabled), end(queueing::source_enabled), true));
auto producer = thread(producer_thread);
auto consumer = thread(consumer_thread);
this_thread::sleep_for(chrono::seconds(1));
queueing::shutdown_flag = true;
if (producer.joinable()) producer.join();
if (consumer.joinable()) consumer.join();
queueing::statistics::report();
}

C++11: How to implement fast, lightweight, and fair synchronized resource access

Question
What can I do to get a locking mechanism that provides minimal and stable latency while guaranteeing that a thread cannot reacquire a resource before another thread has acquired and released it?
The desirability of answers to this question are ranked as follows:
Some combination of built-in C++11 features that work in MinGW on Windows 7 (note that the <thread> and <mutex> libraries do not work on a Windows platform)
Some combination of Windows API features
A modification to the FairLock listed below, my own attempt at implementing such a mechanism
Some features provided by a free, open-source library that does not require a .configure/make/make install process, (getting that to work in MSYS is more of an adventure than I care for)
Background
I am writing an application which is effectively a multi-stage producer/consumer. One thread generates input consumed by another thread, which produces output consumed by yet another thread. The application uses pairs of buffers so that, after an initial delay, all threads can work nearly simultaneously.
Since I am writing a Windows 7 application, I had been using CriticalSections to guard the buffers. The problem with using CriticalSections (or, so far as I can tell, any other Windows or C++11-built-in synchronization object) is that it does not allow for any provision that a thread that just released a lock cannot reacquire it until another thread has done so first. Because of this, many of my test drivers for the middle thread (the Encoder) never gave the Encoder a chance to acquire the test input buffers and completed without having tested them. The end result was a ridiculous process of trying to determine an artificial wait time that stochastically worked for my machine.
Since the structure of my application requires that each stage waits for the other stage to have acquired, finished using, and released the necessary buffers for getting to use the buffer again, I need, for lack of a better term, a fair locking mechanism. I took a crack at writing one (the source code is provided below). In testing, this FairLock allows my test driver to run my Encoder at the same speeds that I was able to achieve using the CriticalSection maybe 60% of the runs. The other 40% of the runs take anywhere between 10 to 100 ms longer, which is not acceptable for my application.
FairLock
// FairLock.hpp
#ifndef FAIRLOCK_HPP
#define FAIRLOCK_HPP
#include <atomic>
using namespace std;
class FairLock {
private:
atomic_bool owned {false};
atomic<DWORD> lastOwner {0};
public:
FairLock(bool owned);
bool inline hasLock() const;
bool tryLock();
void seizeLock();
void tryRelease();
void waitForLock();
};
#endif
// FairLock.cpp
#include <windows.h>
#include "FairLock.hpp"
#define ID GetCurrentThreadId()
FairLock::FairLock(bool owned) {
if (owned) {
this->owned = true;
this->lastOwner = ID;
} else {
this->owned = false;
this->lastOwner = 0;
}
}
bool inline FairLock::hasLock() const {
return owned && lastOwner == ID;
}
bool FairLock::tryLock() {
bool success = false;
DWORD id = ID;
if (owned) {
success = lastOwner == id;
} else if (
lastOwner != id &&
owned.compare_exchange_strong(success, true)
) {
lastOwner = id;
success = true;
} else {
success = false;
}
return success;
}
void FairLock::seizeLock() {
bool success = false;
DWORD id = ID;
if (!(owned && lastOwner == id)) {
while (!owned.compare_exchange_strong(success, true)) {
success = false;
}
lastOwner = id;
}
}
void FairLock::tryRelease() {
if (hasLock()) {
owned = false;
}
}
void FairLock::waitForLock() {
bool success = false;
DWORD id = ID;
if (!(owned && lastOwner == id)) {
while (lastOwner == id); // spin
while (!owned.compare_exchange_strong(success, true)) {
success = false;
}
lastOwner = id;
}
}
EDIT
DO NOT USE THIS FairLock CLASS; IT DOES NOT GUARANTEE MUTUAL EXCLUSION!
I reviewed the above code to compare it against The C++ Programming Language: 4th Edition text I had not read carefully and what CouchDeveloper's recommended Synchronous Queue. I realized that there are several sequences in which the thread that just released the FairLock can be tricked into thinking it still owns it. All it takes is interleaving instructions as follows:
New owner: set owned to true
Old owner: is owned true? yes
Old owner: am I the last owner? yes
New owner: set me as the last owner
At this point, the old and new owners both enter their critical sections.
I am considering whether this problem has a solution and whether it is worth attempting to solve this at all. In the meantime, don't use this unless you see a fix.
I would implement this in C++11 using a condition_variable-per-thread setup so that I could choose exactly which thread to wake up when (Live demo at Coliru):
class FairMutex {
private:
class waitnode {
std::condition_variable cv_;
waitnode* next_ = nullptr;
FairMutex& fmtx_;
public:
waitnode(FairMutex& fmtx) : fmtx_(fmtx) {
*fmtx.tail_ = this;
fmtx.tail_ = &next_;
}
~waitnode() {
for (waitnode** p = &fmtx_.waiters_; *p; p = &(*p)->next_) {
if (*p == this) {
*p = next_;
if (!next_) {
fmtx_.tail_ = &fmtx_.waiters_;
}
break;
}
}
}
void wait(std::unique_lock<std::mutex>& lk) {
while (fmtx_.held_ || fmtx_.waiters_ != this) {
cv_.wait(lk);
}
}
void notify() {
cv_.notify_one();
}
};
waitnode* waiters_ = nullptr;
waitnode** tail_ = &waiters_;
std::mutex mtx_;
bool held_ = false;
public:
void lock() {
auto lk = std::unique_lock<std::mutex>{mtx_};
if (held_ || waiters_) {
waitnode{*this}.wait(lk);
}
held_ = true;
}
bool try_lock() {
if (mtx_.try_lock()) {
std::lock_guard<std::mutex> lk(mtx_, std::adopt_lock);
if (!held_ && !waiters_) {
held_ = true;
return true;
}
}
return false;
}
void unlock() {
std::lock_guard<std::mutex> lk(mtx_);
held_ = false;
if (waiters_ != nullptr) {
waiters_->notify();
}
}
};
FairMutex models the Lockable concept so it can be used like any other standard library mutex type. Put simply, it achieves fairness by inserting waiters into a list in arrival order, and passing the mutex to the first waiter in the list when unlocking.
If it's useful:
This demonstrates *) an implementation of a "synchronous queue" using semaphores as synchronization primitives.
Note: the actually implementation uses semaphores implemented with GCD (Grand Central Dispatch):
using gcd::mutex;
using gcd::semaphore;
// A blocking queue in which each put must wait for a get, and vice
// versa. A synchronous queue does not have any internal capacity,
// not even a capacity of one.
template <typename T>
class simple_synchronous_queue {
public:
typedef T value_type;
enum result_type {
OK = 0,
TIMEOUT_NOT_DELIVERED = -1,
TIMEOUT_NOT_PICKED = -2,
TIMEOUT_NOTHING_OFFERED = -3
};
simple_synchronous_queue()
: sync_(0), send_(1), recv_(0)
{
}
void put(const T& v) {
send_.wait();
new (address()) T(v);
recv_.signal();
sync_.wait();
}
result_type put(const T& v, double timeout) {
if (send_.wait(timeout)) {
new (storage_) T(v);
recv_.signal();
if (sync_.wait(timeout)) {
return OK;
}
else {
return TIMEOUT_NOT_PICKED;
}
}
else {
return TIMEOUT_NOT_DELIVERED;
}
}
T get() {
recv_.wait();
T result = *address();
address()->~T();
sync_.signal();
send_.signal();
return result;
}
std::pair<result_type, T> get(double timeout) {
if (recv_.wait(timeout)) {
std::pair<result_type, T> result =
std::pair<result_type, T>(OK, *address());
address()->~T();
sync_.signal();
send_.signal();
return result;
}
else {
return std::pair<result_type, T>(TIMEOUT_NOTHING_OFFERED, T());
}
}
private:
using storage_t = typename std::aligned_storage<sizeof(T), std::alignment_of<T>::value>::type;
T* address() { 
return static_cast<T*>(static_cast<void*>(&storage_));
}
storage_t storage_;
semaphore sync_;
semaphore send_;
semaphore recv_;
};
*) demonstrates: be carefully about potential issues, could be improved, etc. ... ;)
I accepted CouchDeveloper's answer since it pointed me down the right path. I wrote a Windows-specific C++11 implementation of a synchronous queue, and added this answer so that others could consider/use it if they so choose.
// SynchronousQueue.hpp
#ifndef SYNCHRONOUSQUEUE_HPP
#define SYNCHRONOUSQUEUE_HPP
#include <atomic>
#include <exception>
#include <windows>
using namespace std;
class CouldNotEnterException: public exception {};
class NoPairedCallException: public exception {};
template typename<T>
class SynchronousQueue {
private:
atomic_bool valueReady {false};
CRITICAL_SECTION getCriticalSection;
CRITICAL_SECTION putCriticalSection;
DWORD wait {0};
HANDLE getSemaphore;
HANDLE putSemaphore;
const T* address {nullptr};
public:
SynchronousQueue(DWORD waitMS): wait {waitMS}, address {nullptr} {
initializeCriticalSection(&getCriticalSection);
initializeCriticalSection(&putCriticalSection);
getSemaphore = CreateSemaphore(nullptr, 0, 1, nullptr);
putSemaphore = CreateSemaphore(nullptr, 0, 1, nullptr);
}
~SynchronousQueue() {
EnterCriticalSection(&getCriticalSection);
EnterCriticalSection(&putCriticalSection);
CloseHandle(getSemaphore);
CloseHandle(putSemaphore);
DeleteCriticalSection(&putCriticalSection);
DeleteCriticalSection(&getCriticalSection);
}
void put(const T& value) {
if (!TryEnterCriticalSection(&putCriticalSection)) {
throw CouldNotEnterException();
}
ReleaseSemaphore(putSemaphore, (LONG) 1, nullptr);
if (WaitForSingleObject(getSemaphore, wait) != WAIT_OBJECT_0) {
if (WaitForSingleObject(putSemaphore, 0) == WAIT_OBJECT_0) {
LeaveCriticalSection(&putCriticalSection);
throw NoPairedCallException();
} else {
WaitForSingleObject(getSemaphore, 0);
}
}
address = &value;
valueReady = true;
while (valueReady);
LeaveCriticalSection(&putCriticalSection);
}
T get() {
if (!TryEnterCriticalSection(&getCriticalSection)) {
throw CouldNotEnterException();
}
ReleaseSemaphore(getSemaphore, (LONG) 1, nullptr);
if (WaitForSingleObject(putSemaphore, wait) != WAIT_OBJECT_0) {
if (WaitForSingleObject(getSemaphore, 0) == WAIT_OBJECT_0) {
LeaveCriticalSection(&getCriticalSection);
throw NoPairedCallException();
} else {
WaitForSingleObject(putSemaphore, 0);
}
}
while (!valueReady);
T toReturn = *address;
valueReady = false;
LeaveCriticalSection(&getCriticalSection);
return toReturn;
}
};
#endif

Resources