Simple multithreading mutex example is incorrect - windows

I expect to get numbers from 0 to 4 in random order, but instead, I have some unsynchronized mess
What i do wrong?
#include <iostream>
#include <windows.h>
#include <process.h>
using namespace std;
void addQuery(void *v );
HANDLE ghMutex;
int main()
{
HANDLE hs[5];
ghMutex = CreateMutex( NULL, FALSE, NULL);
for(int i=0; i<5; ++i)
{
hs[i] = (HANDLE)_beginthread(addQuery, 0, (void *)&i);
if (hs[i] == NULL)
{
printf("error\n"); return -1;
}
}
printf("WaitForMultipleObjects return: %d error: %d\n",
(DWORD)WaitForMultipleObjects(5, hs, TRUE, INFINITE), GetLastError());
return 0;
}
void addQuery(void *v )
{
int t = *((int*)v);
WaitForSingleObject(ghMutex, INFINITE);
cout << t << endl;
ReleaseMutex(ghMutex);
_endthread();
}

You have to read and write the shared variable inside the lock. You are reading it outside of the lock and thus rendering the lock irrelevant.
But even that's not enough since your shared variable is a loop variable that you are writing to without protection of the lock. A much better example would run like this:
#include <iostream>
#include <windows.h>
#include <process.h>
using namespace std;
void addQuery(void *v );
HANDLE ghMutex;
int counter = 0;
int main()
{
HANDLE hs[5];
ghMutex = CreateMutex( NULL, FALSE, NULL);
for(int i=0; i<5; ++i)
{
hs[i] = (HANDLE)_beginthread(addQuery, 0, NULL);
if (hs[i] == NULL)
{
printf("error\n"); return -1;
}
}
printf("WaitForMultipleObjects return: %d error: %d\n",
(DWORD)WaitForMultipleObjects(5, hs, TRUE, INFINITE), GetLastError());
return 0;
}
void addQuery(void *v)
{
WaitForSingleObject(ghMutex, INFINITE);
cout << counter << endl;
counter++;
ReleaseMutex(ghMutex);
_endthread();
}
If you can, use a critical section rather than a mutex because they are simpler to use and more efficient. But they have the same semantics in that they only protect code inside the locking block.
Note: Jerry has pointer out some other problems, but I've concentrated on the high level trheading and serialization concerns.

Your synchronization has some issues as you want to get numbers from 0 to 4 in random order.
The problem is that the variable i is write outside the lock and every time the addQuery method get called by the execution of a thread, it get the modified version of variable i. That why you may see 5 as the value at the output for all.
So, here is my fix for this scenario. Instead of pass the address of variable i in parameters of the function addQuery, you should pass it's value. Hope it helps:
#include <iostream>
#include <windows.h>
#include <process.h>
using namespace std;
void addQuery(void *v);
HANDLE ghMutex;
int main()
{
HANDLE hs[5];
ghMutex = CreateMutex(NULL, FALSE, NULL);
for (int i = 0; i<5; ++i)
{
hs[i] = (HANDLE)_beginthread(addQuery, 0, (void *)i);
if (hs[i] == NULL)
{
printf("error\n"); return -1;
}
}
printf("WaitForMultipleObjects return: %d error: %d\n",
(DWORD)WaitForMultipleObjects(5, hs, TRUE, INFINITE), GetLastError());
return 0;
}
void addQuery(void *v)
{
int t = (int)v;
WaitForSingleObject(ghMutex, INFINITE);
cout << t << endl;
ReleaseMutex(ghMutex);
_endthread();
}

Related

C++11 std::threads not exiting

Could you please check the following code which is not exiting even after condition becomes false?
I'm trying to print numbers from 1 to 10 by first thread, 2 to 20 by second thread likewise & I have 10 threads, whenever count reaches to 100, my program should terminate safely by terminating all threads. But that is not happening, after printing, it stuck up and I don't understand why?
Is there any data race? Please guide.
#include<iostream>
#include<vector>
#include<thread>
#include<mutex>
#include<condition_variable>
std::mutex mu;
int count=1;
bool isDone = true;
std::condition_variable cv;
void Print10(int tid)
{
std::unique_lock<std::mutex> lock(mu);
while(isDone){
cv.wait(lock,[tid](){ return ((count/10)==tid);});
for(int i=0;i<10;i++)
std::cout<<"tid="<<tid<<" count="<<count++<<"\n";
isDone = count<100;//!(count == (((tid+1)*10)+1));
std::cout<<"tid="<<tid<<" isDone="<<isDone<<"\n";
cv.notify_all();
}
}
int main()
{
std::vector<std::thread> vec;
for(int i=0;i<10;i++)
{
vec.push_back(std::thread(Print10,i));
}
for(auto &th : vec)
{
if(th.joinable())
th.join();
}
}
I believe the following code should work for you
#include<iostream>
#include<vector>
#include<thread>
#include<mutex>
#include<condition_variable>
using namespace std;
mutex mu;
int count=1;
bool isDone = true;
condition_variable cv;
void Print10(int tid)
{
unique_lock<std::mutex> lock(mu);
// Wait until condition --> Wait till count/10 = tid
while(count/10 != tid)
cv.wait(lock);
// Core logic
for(int i=0;i<10;i++)
cout<<"tid="<<tid<<" count="<<count++<<"\n";
// Release the current thread thus ensuring serailization
cv.notify_one();
}
int main()
{
std::vector<std::thread> vec;
for(int i=0;i<10;i++)
{
vec.push_back(std::thread(Print10,i));
}
for(auto &th : vec)
{
if(th.joinable())
th.join();
}
return 0;
}

no data while cpu profiling - visual studio

i tried to profile performance of my code, and thats what i get:
i took a code from microsoft docs from topic about profiling:
#include <iostream>
#include <limits>
#include <mutex>
#include <random>
#include <functional>
//.cpp file code:
static constexpr int MIN_ITERATIONS = std::numeric_limits<int>::max() / 1000;
static constexpr int MAX_ITERATIONS = MIN_ITERATIONS + 10000;
long long m_totalIterations = 0;
std::mutex m_totalItersLock;
int getNumber()
{
std::uniform_int_distribution<int> num_distribution(MIN_ITERATIONS, MAX_ITERATIONS);
std::mt19937 random_number_engine; // pseudorandom number generator
auto get_num = std::bind(num_distribution, random_number_engine);
int random_num = get_num();
auto result = 0;
{
std::lock_guard<std::mutex> lock(m_totalItersLock);
m_totalIterations += random_num;
}
// we're just spinning here
// to increase CPU usage
for (int i = 0; i < random_num; i++)
{
result = get_num();
}
return result;
}
void doWork()
{
std::wcout << L"The doWork function is running on another thread." << std::endl;
auto x = getNumber();
}
int main()
{
std::vector<std::thread> threads;
for (int i = 0; i < 10; ++i) {
threads.push_back(std::thread(doWork));
std::cout << "The Main() thread calls this after starting the new thread" << std::endl;
}
for (auto& thread : threads) {
thread.join();
}
return 0;
}
, and still i'm getting different output (or no output actually). Can someone help me pls? I'm trying to do that on Visual Studio Community 2019

std::atomic on struct bit-fields

I'm modifying some existing open source library and there is a struct (say named as Node) containing bit-fields, e.g.
struct Node {
std::atomic<uint32_t> size:30;
std::atomic<uint32_t> isnull:1;
};
To fit my needs, these fields need to be atomic so I was expecting to use std::atomic for this and faced compile time error:
bit-field 'size' has non-integral type 'std::atomic<uint32_t>'
According to documentation, there is a restricted set of types which can be used for std::atomic
Can anyone advise/have idea on how to get functionality of atomic fields with the minimum impact to the existing source code?
Thanks in advance!
I used an unsigned short as an example below.
This is less ideal, but you could sacrifice 8 bits and insert a std::atomic_flag in the bit field with a union. Unfortunately, std::atomic_flag type is a std::atomic_bool type.
This structure can be spin locked manually every time you access it. However, the code should have minimal performance degradation (unlike creating, locking, unlocking, destroying with a std::mutex and std::unique_lock).
This code may waste about 10-30 clock cycles to enable low cost multi-threading.
PS. Make sure the reserved 8 bits below are not messed up by the endian structure of the processor. You may have to define at the end for big-endian processors. I only tested this code on an Intel CPU (always little-endian).
#include <iostream>
#include <atomic>
#include <thread>
union Data
{
std::atomic_flag access = ATOMIC_FLAG_INIT; // one byte
struct
{
typedef unsigned short ushort;
ushort reserved : 8;
ushort count : 4;
ushort ready : 1;
ushort unused : 3;
} bits;
};
class SpinLock
{
public:
inline SpinLock(std::atomic_flag &access, bool locked=true)
: mAccess(access)
{
if(locked) lock();
}
inline ~SpinLock()
{
unlock();
}
inline void lock()
{
while (mAccess.test_and_set(std::memory_order_acquire))
{
}
}
// each attempt will take about 10-30 clock cycles
inline bool try_lock(unsigned int attempts=0)
{
while(mAccess.test_and_set(std::memory_order_acquire))
{
if (! attempts) return false;
-- attempts;
}
return true;
}
inline void unlock()
{
mAccess.clear(std::memory_order_release);
}
private:
std::atomic_flag &mAccess;
};
void aFn(int &i, Data &d)
{
SpinLock lock(d.access, false);
// manually locking/unlocking can be tighter
lock.lock();
if (d.bits.ready)
{
++d.bits.count;
}
d.bits.ready ^= true; // alternate each time
lock.unlock();
}
int main(void)
{
Data f;
f.bits.count = 0;
f.bits.ready = true;
std::thread *p[8];
for (int i = 0; i < 8; ++ i)
{
p[i] = new std::thread([&f] (int i) { aFn(i, f); }, i);
}
for (int i = 0; i < 8; ++i)
{
p[i]->join();
delete p[i];
}
std::cout << "size: " << sizeof(f) << std::endl;
std::cout << "count: " << f.bits.count << std::endl;
}
The result is as expected...
size: 2
count: 4

ReadFile/WriteFile crahes

Something wrong with next ReadFile/WriteFile code.
I need to use copy file by using this functions (yes, it's better to use CopyFile, but now I need it), but it crashed at read/write loop.
What can be wrong?
PS C:\Users\user\Documents\SysLab1\dist\Debug\MinGW-Windows> g++ --version
g++.exe (x86_64-posix-sjlj-rev0, Built by MinGW-W64 project) 4.8.3
I used next code :
#include <windows.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define BLOCK_SIZE 1024
uint32_t copy_c(char* source, char* destination) {...}
uint32_t copy_api_readwrite(char* source, char* destination) {
bool result;
HANDLE input = CreateFile(source, GENERIC_READ, 0, NULL,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (input!=INVALID_HANDLE_VALUE) {
HANDLE output = CreateFile(destination, GENERIC_WRITE, 0, NULL,
CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if(output!=INVALID_HANDLE_VALUE) {
DWORD readed;
char block[BLOCK_SIZE];
while(ReadFile(input, block, BLOCK_SIZE * sizeof(char), &readed, NULL)>0) {
WriteFile(output, block, readed, NULL, NULL);
}
if(GetLastError()==ERROR_HANDLE_EOF) {
result = true;
}
else {
result = false;
}
CloseHandle(output);
}
else {
result = false;
}
CloseHandle(input);
}
else {
result = true;
}
if(result) {
return 0;
}
else {
return GetLastError();
}
return result;
}
uint32_t copy_api(char* source, char* destination) {...}
#define COPY_READWRITE
#ifdef COPY_C
#define COPY copy_c
#else
#ifdef COPY_READWRITE
#define COPY copy_api_readwrite
#else
#ifdef COPY_API
#define COPY copy_api
#endif
#endif
#endif
int main(int argc, char** argv) {
if(argc<3) {
std::cout << "Bad command line arguments\n";
return 1;
}
uint32_t result = COPY(argv[1], argv[2]);
if(result==0) {
std::cout << "Success\n";
return 0;
}
else {
std::cout << "Error : " << result << "\n";
return 2;
}
}
From the documentation of WriteFile:
lpNumberOfBytesWritten
This parameter can be NULL only when the lpOverlapped parameter is not NULL.
You are not meeting that requirement. You will have to pass the address of a DWORD variable into which the number of bytes written will be stored.
Another mistake is in the test of the return value of ReadFile. Instead of testing ReadFile(...) > 0 you must test ReadFile(...) != 0, again as described in the documentation.
You don't check the return value of WriteFile which I also would regard as a mistake.
By definition, sizeof(char) == 1. It is idiomatic to make use of that.
When dealing with binary data, as you are, again it is idiomatic to use unsigned char.
More idiom. Write the assignment of result like this:
result = (GetLastError() == ERROR_HANDLE_EOF);

After insmod I am not able to see the device entry in /proc/devices

After performing the command "insmod demo_device" the modules listed in /proc/modules
**demo_device 2528 0 - Live 0xe02da000**
fp_indicators 5072 1 - Live 0xe02d2000 (P)
screader 22672 1 - Live 0xe02c5000 (P)
icamdescrambler 12912 0 - Live 0xe02b2000 (P)
icamemmfilter 16208 0 - Live 0xe02a4000 (P)
icamecmfilter 14992 0 - Live 0xe0294000 (P)
but "(P)" is not avail after that.
After firing the command cat /proc/devices the device "demo_device" is not listed there.
So my question is that: what (P) stands in (cat /proc/modules) and what could be the reason that the device is not listed in (cat /proc/devices).
Thanks in Advance !!
The source code is as:
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/version.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/errno.h>
#include <asm/uaccess.h>
#include "query_ioctl.h"
#define FIRST_MINOR 0
#define MINOR_CNT 1
static dev_t dev;
static struct cdev c_dev;
static struct class *cl;
static int status = 1, dignity = 3, ego = 5;
static int my_open(struct inode *i, struct file *f)
{
return 0;
}
static int my_close(struct inode *i, struct file *f)
{
return 0;
}
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35))
static int my_ioctl(struct inode *i, struct file *f, unsigned int cmd, unsigned long arg)
#else
static long my_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
#endif
{
query_arg_t q;
switch (cmd)
{
case QUERY_GET_VARIABLES:
q.status = status;
q.dignity = dignity;
q.ego = ego;
if (copy_to_user((query_arg_t *)arg, &q, sizeof(query_arg_t)))
{
return -EACCES;
}
break;
case QUERY_CLR_VARIABLES:
status = 0;
dignity = 0;
ego = 0;
break;
case QUERY_SET_VARIABLES:
if (copy_from_user(&q, (query_arg_t *)arg, sizeof(query_arg_t)))
{
return -EACCES;
}
status = q.status;
dignity = q.dignity;
ego = q.ego;
break;
default:
return -EINVAL;
}
return 0;
}
static struct file_operations query_fops =
{
.owner = THIS_MODULE,
.open = my_open,
.release = my_close,
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35))
.ioctl = my_ioctl
#else
.unlocked_ioctl = my_ioctl
#endif
};
static int __init query_ioctl_init(void)
{
int ret;
struct device *dev_ret;
printk("Before calling alloc\n");
dev=150;
if ((ret = register_chrdev_region(dev, MINOR_CNT, "demo_device")))
{
return ret;
}
else if((ret = alloc_chrdev_region(&dev,0,MINOR_CNT,"demo_device")))
{
return ret;
}
printk("After alloc %d %d\n",ret,dev);
cdev_init(&c_dev, &query_fops);
if ((ret = cdev_add(&c_dev, dev, MINOR_CNT)) < 0)
{
return ret;
}
printk("After cdev_add\n");
if (IS_ERR(cl = class_create(THIS_MODULE, "char")))
{
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
return PTR_ERR(cl);
}
printk("After class_create\n");
if (IS_ERR(dev_ret = device_create(cl, NULL, dev, NULL, "demo")))
{
class_destroy(cl);
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
return PTR_ERR(dev_ret);
}
printk("After device_create\n");
return 0;
}
static void __exit query_ioctl_exit(void)
{
device_destroy(cl, dev);
class_destroy(cl);
cdev_del(&c_dev);
unregister_chrdev_region(dev, MINOR_CNT);
}
module_init(query_ioctl_init);
module_exit(query_ioctl_exit);
MODULE_LICENSE("GPL");
And after inserting the module I am able to see these messages:
$insmod demo_device.ko
Before calling alloc
After alloc 0 217055232
After cdev_add
After class_create
After device_create
$
Make sure that Major Number of the device is not preoccupied by some other device file. use the following command to check the occupied Major Numbers
cat /proc/devices
Use the following code to capture initialization error in init function
int t=register_chrdev(majorNumber,"mydev",&fops);
if(t<0)
printk(KERN_ALERT "device registration failed.");
Use dmesg to look into kernel logs
Look at module_flags_taint() in kernel/module.c.
The 'P' flag merely indicated the other modules are proprietary. The reason your device doesn't show up in /proc/devices is probably because something is wrong with the initialisation, but we can't help you with that unless you show us code.
After perfroming make clean to the linux/application source code and rebuilding it again...make it works. Now after inserting the module the corresponding entry is visibe in the /proc/devcies file :)

Resources