I have enabled CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC in kernel configuration, now I want to test this behavior that kernel should panic in case of softlockup,
To try this I have created one module,
int thread_function(void *data)
{
int var;
var = 10;
printk(KERN_INFO "Inside %s %s\n",STR_MOD,__func__);
//while(!kthread_should_stop()){
while(1) {
printk(KERN_INFO "Mutiplying...\n");
var = var*var*var; //test
}
//}
return var;
}
static int kernel_init(void)
{
data = 20;
printk(KERN_INFO"insmod %s\n",STR_MOD);
/*task = kthread_create(&thread_function,(void *)data,"SD");*/
task = kthread_run(&thread_function,(void *)data,"SD");
set_cpus_allowed(task, *cpumask_of(0));
printk(KERN_INFO"Kernel Thread : %s\n",task->comm);
return 0;
}
With this CPU 0 is busy processing task and reports ~99.9% usage with top
but it is not generating softlockup.
So, my question is how can I achieve this?
Related
I'm trying to format data sent over a USB UART with printf and it's giving me garbage. I can send a simple string and that works but anything I try to format gives junk. Looking through the code I think it has to do with my string not being in program space but I'm not sure.
Here is my main:
void main(void) {
CPU_PRESCALE(CPU_16MHz);
init_uart();
int degree = 0;
char buffer[50];
while(1) {
degree = (degree + 1) % 360;
send_str(PSTR("\n\nHello!!!\n\n"));
memset(buffer, 0, 50);
sprintf_P(buffer, PSTR("%d degrees\n"), degree);
send_str(buffer);
_delay_ms(20);
}
}
The output looks like this:
Hello!!!
����/�������(/����#Q��������
Hello!!!
����/�������(/����#Q��������
The USB UART code I found in a tutorial. The relevant parts look like this:
void send_str(const char *s)
{
char c;
while (1) {
c = pgm_read_byte(s++);
if (!c) break;
usb_serial_putchar(c);
}
}
int8_t usb_serial_putchar(uint8_t c)
{
uint8_t timeout, intr_state;
// if we're not online (enumerated and configured), error
if (!usb_configuration) return -1;
// interrupts are disabled so these functions can be
// used from the main program or interrupt context,
// even both in the same program!
intr_state = SREG;
cli();
UENUM = CDC_TX_ENDPOINT;
// if we gave up due to timeout before, don't wait again
if (transmit_previous_timeout) {
if (!(UEINTX & (1<<RWAL))) {
SREG = intr_state;
return -1;
}
transmit_previous_timeout = 0;
}
// wait for the FIFO to be ready to accept data
timeout = UDFNUML + TRANSMIT_TIMEOUT;
while (1) {
// are we ready to transmit?
if (UEINTX & (1<<RWAL)) break;
SREG = intr_state;
// have we waited too long? This happens if the user
// is not running an application that is listening
if (UDFNUML == timeout) {
transmit_previous_timeout = 1;
return -1;
}
// has the USB gone offline?
if (!usb_configuration) return -1;
// get ready to try checking again
intr_state = SREG;
cli();
UENUM = CDC_TX_ENDPOINT;
}
// actually write the byte into the FIFO
UEDATX = c;
// if this completed a packet, transmit it now!
if (!(UEINTX & (1<<RWAL))) UEINTX = 0x3A;
transmit_flush_timer = TRANSMIT_FLUSH_TIMEOUT;
SREG = intr_state;
return 0;
}
I have implemented a custom storage interface in libtorrent as described in the help section here.
The storage_interface is working fine, although I can't figure out why readv is only called randomly while downloading a torrent. From my view the overriden virtual function readv should get called each time I call handle->read_piece in piece_finished_alert. It should read the piece for read_piece_alert?
The buffer is provided in read_piece_alert without getting notified in readv.
So the question is why it is called only randomly and why it's not called on a read_piece() call? Is my storage_interface maybe wrong?
The code looks like this:
struct temp_storage : storage_interface
{
virtual int readv(file::iovec_t const* bufs, int num_bufs
, int piece, int offset, int flags, storage_error& ec)
{
// Only called on random pieces while downloading a larger torrent
std::map<int, std::vector<char> >::const_iterator i = m_file_data.find(piece);
if (i == m_file_data.end()) return 0;
int available = i->second.size() - offset;
if (available <= 0) return 0;
if (available > num_bufs) available = num_bufs;
memcpy(&bufs, &i->second[offset], available);
return available;
}
virtual int writev(file::iovec_t const* bufs, int num_bufs
, int piece, int offset, int flags, storage_error& ec)
{
std::vector<char>& data = m_file_data[piece];
if (data.size() < offset + num_bufs) data.resize(offset + num_bufs);
std::memcpy(&data[offset], bufs, num_bufs);
return num_bufs;
}
virtual bool has_any_file(storage_error& ec) { return false; }
virtual ...
virtual ...
}
Intialized with
storage_interface* temp_storage_constructor(storage_params const& params)
{
printf("NEW INTERFACE\n");
return new temp_storage(*params.files);
}
p.storage = &temp_storage_constructor;
The function below sets up alerts and invokes read_piece on each completed piece.
while(true) {
std::vector<alert*> alerts;
s.pop_alerts(&alerts);
for (alert* i : alerts)
{
switch (i->type()) {
case read_piece_alert::alert_type:
{
read_piece_alert* p = (read_piece_alert*)i;
if (p->ec) {
// read_piece failed
break;
}
// piece buffer, size is provided without readv
// notification after invoking read_piece in piece_finished_alert
break;
}
case piece_finished_alert::alert_type: {
piece_finished_alert* p = (piece_finished_alert*)i;
p->handle.read_piece(p->piece_index);
// Once the piece is finished, we read it to obtain the buffer in read_piece_alert.
break;
}
default:
break;
}
}
Sleep(100);
}
I will answer my own question. As Arvid said in the comments: readv was not invoked because of caching. Setting settings_pack::use_read_cache to false will invoke readv always.
In an attempt to make a more usable version of the code I wrote for an answer to another question, I used a lambda function to process an individual unit. This is a work in progress. I've got the "client" syntax looking pretty nice:
// for loop split into 4 threads, calling doThing for each index
parloop(4, 0, 100000000, [](int i) { doThing(i); });
However, I have an issue. Whenever I call the saved lambda, it takes up a ton of CPU time. doThing itself is an empty stub. If I just comment out the internal call to the lambda, then the speed returns to normal (4 times speedup for 4 threads). I'm using std::function to save the reference to the lambda.
My question is - Is there some better way that the stl library internally manages lambdas for large sets of data, that I haven't come across?
struct parloop
{
public:
std::vector<std::thread> myThreads;
int numThreads, rangeStart, rangeEnd;
std::function<void (int)> lambda;
parloop(int _numThreads, int _rangeStart, int _rangeEnd, std::function<void(int)> _lambda) //
: numThreads(_numThreads), rangeStart(_rangeStart), rangeEnd(_rangeEnd), lambda(_lambda) //
{
init();
exit();
}
void init()
{
myThreads.resize(numThreads);
for (int i = 0; i < numThreads; ++i)
{
myThreads[i] = std::thread(myThreadFunction, this, chunkStart(i), chunkEnd(i));
}
}
void exit()
{
for (int i = 0; i < numThreads; ++i)
{
myThreads[i].join();
}
}
int rangeJump()
{
return ceil(float(rangeEnd - rangeStart) / float(numThreads));
}
int chunkStart(int i)
{
return rangeJump() * i;
}
int chunkEnd(int i)
{
return std::min(rangeJump() * (i + 1) - 1, rangeEnd);
}
static void myThreadFunction(parloop *self, int start, int end) //
{
std::function<void(int)> lambda = self->lambda;
// we're just going to loop through the numbers and print them out
for (int i = start; i <= end; ++i)
{
lambda(i); // commenting this out speeds things up back to normal
}
}
};
void doThing(int i) // "payload" of the lambda function
{
}
int main()
{
auto start = timer.now();
auto stop = timer.now();
// run 4 trials of each number of threads
for (int x = 1; x <= 4; ++x)
{
// test between 1-8 threads
for (int numThreads = 1; numThreads <= 8; ++numThreads)
{
start = timer.now();
// this is the line of code which calls doThing in the loop
parloop(numThreads, 0, 100000000, [](int i) { doThing(i); });
stop = timer.now();
cout << numThreads << " Time = " << std::chrono::duration_cast<std::chrono::nanoseconds>(stop - start).count() / 1000000.0f << " ms\n";
//cout << "\t\tsimple list, time was " << deltaTime2 / 1000000.0f << " ms\n";
}
}
cin.ignore();
cin.get();
return 0;
}
I'm using std::function to save the reference to the lambda.
That's one possible problem, as std::function is not a zero-runtime-cost abstraction. It is a type-erased wrapper that has a virtual-call like cost when invoking operator() and could also potentially heap-allocate (which could mean a cache-miss per call).
If you want to store your lambda in such a way that does not introduce additional overhead and that allows the compiler to inline it, you should use a template parameter. This is not always possible, but might fit your use case. Example:
template <typename TFunction>
struct parloop
{
public:
std::thread **myThreads;
int numThreads, rangeStart, rangeEnd;
TFunction lambda;
parloop(TFunction&& _lambda,
int _numThreads, int _rangeStart, int _rangeEnd)
: lambda(std::move(_lambda)),
numThreads(_numThreads), rangeStart(_rangeStart),
rangeEnd(_rangeEnd)
{
init();
exit();
}
// ...
To deduce the type of the lambda, you can use an helper function:
template <typename TF, typename... TArgs>
auto make_parloop(TF&& lambda, TArgs&&... xs)
{
return parloop<std::decay_t<TF>>(
std::forward<TF>(lambda), std::forward<TArgs>(xs)...);
}
Usage:
auto p = make_parloop([](int i) { doThing(i); },
numThreads, 0, 100000000);
I wrote an article that's related to the subject:
"Passing functions to functions"
It contains some benchmarks that show how much assembly is generated for std::function compared to a template parameter and other solutions.
I've been finally able to add the portion of code to explain my question ...
I've recently upgraded my development PC and with it gcc from 4.1 to 4.8.
Compiling the same source I'm experiencing now a var overlap issue.
I can't post the entire cource since it is huge but just for understand I have some variables defined before the main() that are defined as extern on outside class so that can be accessed.
Someone of theese global variables change their value unexpectedly and debugging this with gdb I can see that this variable is sharing the same memory address as one member of a class instance.
This is the definition of variables on main.cpp the variable loopcounter is the only one I really need, I've surrounded it with unused variables in order to better understand this overlap.
uint64_t loopcounterx = 0;
uint64_t loopcounterxx = 0;
uint64_t loopcounterxxx = 0;
uint64_t loopcounterxxxx = 0;
uint64_t loopcounterxxxxx = 0;
uint64_t loopcounterxxxxxx = 0;
uint64_t loopcounter = 0;
uint64_t loopcountery = 0;
uint64_t loopcounteryy = 0;
uint64_t loopcounteryyy = 0;
uint64_t loopcounteryyyy = 0;
uint64_t loopcounteryyyyy = 0;
uint64_t loopcounteryyyyyy = 0;
uint64_t loopcounteryyyyyyy = 0;
.
.
.
int main(int argc, char *argv[]) {
I've set a watchdog for this variable write access with gdb
watch loopcounter
when the program execution stops I can see that we are inside an instance of the class Timer
Hardware watchpoint 1: loopcounter
Old value = 541
New value = 66077
Timer::signal (this=0xc235b0 (loopcounteryyy), s=true) at chrono.cpp:229
this is the portion of chrono.cpp
bool Timer::signal (bool s) {
if ((s != in_signal_old) && !r_pulse) {
in_signal = s;
if (s) in_signal_fp=true;
else in_signal_rp=true;
in_signal_old = s;
}
}
the watchdog stops the execution at line 229 of chrono.cpp which contains
if (s) in_signal_fp=true;
at the time of stop, the memory violation has already happened, so I think we must consider the line before, that is
in_signal = s;
where the code is accessing a member of it's class and it really seems that the writeing of this member will damage the content of loopcounter variable.
In fact, looking to pointers I get this
(gdb) p &loopcounter
$1 = (uint64_t *) 0xc235c8 (loopcounter)
(gdb) p &in_signal
$2 = (uint64_t *) 0xc235ca (loopcounter + 2)
(gdb) p &in_signal_fp
$5 = (bool *) 0xc235cc (loopcounter+4)
(gdb) p &active_f
$7 = (bool *) 0xc235c8 (loopcounter)
active_f is another member of the chrono class and it is perfectly overlapping the loopcounter variable.
this is the Timer class contained in chrono.h header
class Timer {
private:
Timer *me;
Timer *prev;
Timer *next;
bool active_f;
bool running_f;
bool in_signal;
bool in_signal_old;
bool in_signal_fp;
bool in_signal_rp;
bool out_signal;
bool out_signal_old;
bool out_signal_fp;
bool out_signal_rp;
bool f_pulse;
bool r_pulse;
uint64_t start;
uint64_t end;
uint64_t lenght;
timer_type type;
public:
int init (int lenght);
bool out (void);
bool active () { return active_f; }
bool fp () { return f_pulse; }
bool rp () { return r_pulse; }
bool running () { return running_f; }
void set_next (Timer *n) { next = n; }
void set_prev (Timer *p) { prev = p; }
void set_end (RTIME x) { end = x; }
void set_active (bool x) { active_f = x; }
void set_running (bool x) { running_f = x; }
void set_lenght (RTIME x) { lenght = x; }
void set_start (RTIME x) { start = x; }
void set_type (timer_type x) { type = x; }
void set_timer (RTIME x, timer_type y);// { lenght = x; type = y; }
bool signal (bool);
void set_fp (bool x) { f_pulse = x; }
void set_rp (bool x) { r_pulse = x; }
Timer * get_next () { return next; }
Timer * get_prev () { return prev; }
RTIME get_end () { return end; }
RTIME get_start () { return start; }
RTIME get_lenght (void) { return lenght; }
void append (Timer *newt);
Timer();
};
Why the compiler/linker or I don't know who else, is allocating two different objects on the same memory area ?
I've also used valgrind and the -fsanitize=address option introduced in gcc-4.8 but I can't get more useful informations or something more where to work on.
In which other way can I investigate over this behaviour ?
I have read many posts about this same topic, but I am unable to find out what is exactly wrong with my sysfs implementation in my kernel module. I am trying to make a userspace program block on a poll untill the value changes in a sysfs file. Most people seem to not get blocking, I seem to not be able to get out of my blocking. Here is the relevent code:
kernel module:
static int sysfs_test = 88;
static ssize_t test_interrupts_show(struct device* dev, struct device_attribute* attr, const char* buf)
{
return scnprintf(buf, PAGE_SIZE, "%d\n", sysfs_test);
}
static ssize_t test_interrupts_store(struct device* dev, struct device_attribute* attr, const char* buf, size_t count)
{
kstrtol(buf, 10, &sysfs_test);
return count;
}
static DEVICE_ATTR(interrupts, S_IWUSR | S_IRUGO, test_interrupts_show, test_interrupts_store);
static int __init test_init(void)
{
int result;
if(dev_major)
{
dev = MKDEV(dev_major, dev_minor);
result = register_chrdev_region(dev, NUM_DEVICES, name);
} else {
result = alloc_chrdev_region(&dev, dev_minor, NUM_DEVICES, name);
dev_major = MAJOR(dev);
dev_minor = MINOR(dev);
}
if(result < 0) {
printk(KERN_WARNING "%s: can't get major %d\n", name, dev_major);
return -1;
}
printk(KERN_NOTICE "%s: Major = %d, Minor = %d\n", name, dev_major, dev_minor);
// Register as character device
test_cdev = cdev_alloc();
cdev_init(cajun_cdev, &test_fops); // Initialize cdev structure
test_cdev->owner = THIS_MODULE; // Add owner
result = cdev_add(test_cdev, dev,1); // Tell kernel about our device
if(result)
{
printk(KERN_NOTICE "Error %d adding cdev\n", result);
goto OUT2;
}
// This stuff relates to sysfs:
ctest_class = class_create(THIS_MODULE, NAME);
if(IS_ERR(test_class))
{
printk(KERN_ALERT "Failed to register device class\n");
goto OUT2;
}
test_device = device_create(test_class, NULL, dev, NULL, NAME);
if(IS_ERR(test_device))
{
printk(KERN_ALERT "Failed to create device\n");
goto OUT3;
}
result = device_create_file(test_device, &dev_attr_interrupts);
if (result < 0)
{
printk(KERN_ALERT "failed\n");
}
OUT3:
class_unregister(test_class);
class_destroy(test_class);
OUT2:
cdev_del(test_cdev);
OUT1:
unregister_chrdev_region(dev, NUM_DEVICES);
return -1;
}
Relevent userspace code:
char interrupts_path[] = "/sys/class/test_module/test_module/interrupts";
int main()
{
struct pollfd fds;
fds.fd = open(interrupts_path, O_RDWR | O_SYNC);
char dummy_buff[1];
read(fds.fd, dummy_buff, 1);
lseek(fds.fd, 0, SEEK_SET);
fds.events = POLLPRI;
printf("Polling for interrupt\n");
poll(&fds,1,-1);
printf("Interrupt occured\n");
return 0;
}
I run my userspace code in the background (./test &) and then I echo a new value into the sysfs file for interrupts. I am hopping for my userspace program to unblock and return when the value changes. What am I doing wrong here?
edit:
struct file_operations test_fops = {
.owner = THIS_MODULE,
.llseek = test_llseek,
.read = test_read,
.write = test_write,
.unlocked_ioctl = test_ioctl,
.open = test_open,
.release = test_release
};