As one MPI process executes MPI_Barrier(), other processes hang - parallel-processing

I have an MPI program for having multiple processes read from a file that contains list of file names and based on the file names read - it reads the corresponding file and counts the frequency of words.
If one of the processes completes this and returns - to block executing MPI_Barrier(), the other processes also hang. On debugging, it could be seen that the readFile() function is not entered by the processes currently in process_files() Unable to figure out why this happens. Please find the code below:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <ctype.h>
#include <string.h>
#include "hash.h"
void process_files(char*, int* , int, hashtable_t* );
void initialize_word(char *c,int size)
{
int i;
for(i=0;i<size;i++)
c[i]=0;
return;
}
char* readFilesList(MPI_File fh, char* file,int rank, int nprocs, char* block, const int overlap, int* length)
{
char *text;
int blockstart,blockend;
MPI_Offset size;
MPI_Offset blocksize;
MPI_Offset begin;
MPI_Offset end;
MPI_Status status;
MPI_File_open(MPI_COMM_WORLD,file,MPI_MODE_RDONLY,MPI_INFO_NULL,&fh);
MPI_File_get_size(fh,&size);
/*Block size calculation*/
blocksize = size/nprocs;
begin = rank*blocksize;
end = begin+blocksize-1;
end+=overlap;
if(rank==nprocs-1)
end = size;
blocksize = end-begin+1;
text = (char*)malloc((blocksize+1)*sizeof(char));
MPI_File_read_at_all(fh,begin,text,blocksize,MPI_CHAR, &status);
text[blocksize+1]=0;
blockstart = 0;
blockend = blocksize;
if(rank!=0)
{
while(text[blockstart]!='\n' && blockstart!=blockend) blockstart++;
blockstart++;
}
if(rank!=nprocs-1)
{
blockend-=overlap;
while(text[blockend]!='\n'&& blockend!=blocksize) blockend++;
}
blocksize = blockend-blockstart;
block = (char*)malloc((blocksize+1)*sizeof(char));
block = memcpy(block, text + blockstart, blocksize);
block[blocksize]=0;
*length = strlen(block);
MPI_File_close(&fh);
return block;
}
void calculate_term_frequencies(char* file, char* text, hashtable_t *hashtable,int rank)
{
printf("Start File %s, rank %d \n\n ",file,rank);
fflush(stdout);
if(strlen(text)!=0||strlen(file)!=0)
{
int i,j;
char w[100];
i=0,j=0;
while(text[i]!=0)
{
if((text[i]>=65&&text[i]<=90)||(text[i]>=97&&text[i]<=122))
{
w[j]=text[i];
j++; i++;
}
else
{
w[j] = 0;
if(j!=0)
{
//ht_set( hashtable, strcat(strcat(w,"#"),file),1);
}
j=0;
i++;
initialize_word(w,100);
}
}
}
return;
}
void readFile(char* filename, hashtable_t *hashtable,int rank)
{
MPI_Status stat;
MPI_Offset size;
MPI_File fx;
char* textFromFile=0;
printf("Start File %d, rank %d \n\n ",strlen(filename),rank);
fflush(stdout);
if(strlen(filename)!=0)
{
MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_RDONLY,MPI_INFO_NULL,&fx);
MPI_File_get_size(fx,&size);
printf("Start File %s, rank %d \n\n ",filename,rank);
fflush(stdout);
textFromFile = (char*)malloc((size+1)*sizeof(char));
MPI_File_read_at_all(fx,0,textFromFile,size,MPI_CHAR, &stat);
textFromFile[size]=0;
calculate_term_frequencies(filename, textFromFile, hashtable,rank);
MPI_File_close(&fx);
}
printf("Done File %s, rank %d \n\n ",filename,rank);
fflush(stdout);
return;
}
void process_files(char* block, int* length, int rank,hashtable_t *hashtable)
{
char s[2];
s[0] = '\n';
s[1] = 0;
char *file;
if(*length!=0)
{
/* get the first file */
file = strtok(block, s);
/* walk through other tokens */
while( file != NULL )
{
readFile(file,hashtable,rank);
file = strtok(NULL, s);
}
}
return;
}
void execute_process(MPI_File fh, char* file, int rank, int nprocs, char* block, const int overlap, int * length, hashtable_t *hashtable)
{
block = readFilesList(fh,file,rank,nprocs,block,overlap,length);
process_files(block,length,rank,hashtable);
}
int main(int argc, char *argv[]){
/*Initialization*/
MPI_Init(&argc, &argv);
MPI_File fh=0;
int rank,nprocs,namelen;
char *block=0;
const int overlap = 70;
char* file = "filepaths.txt";
int *length = (int*)malloc(sizeof(int));
hashtable_t *hashtable = ht_create( 65536 );
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Get_processor_name(processor_name, &namelen);
printf("Rank %d is on processor %s\n",rank,processor_name);
fflush(stdout);
execute_process(fh,file,rank,nprocs,block,overlap,length,hashtable);
printf("Rank %d returned after processing\n",rank);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
The filepaths.txt is a file that contain the absolute file names of normal text files:
eg:
/home/mpiuser/mpi/MPI_Codes/code/test1.txt
/home/mpiuser/mpi/MPI_Codes/code/test2.txt
/home/mpiuser/mpi/MPI_Codes/code/test3.txt

Your readFilesList function is pretty confusing, and I believe it doesn't do what you want it to do, but maybe I just do not understand it correctly. I believe it is supposed to collect a bunch of filenames out of the list file for each process. A different set for each process. It does not do that, but this is not the problem, even if this would do what you want it to, the subsequent MPI IO would not work.
When reading files, you use MPI_File_read_all with MPI_COMM_WORLD as communicator. This requires all processes to participate in reading this file. Now, if each process should read a different file, this obviously is not going to work.
So there are several issues with your implementation, though I can not really explain your described behavior, I would rather first start off and try to fix them, before debugging in detail, what might go wrong.
I am under the impression, you want to have an algorithm along these lines:
Read a list of file names
Distribute that list of files equally to all processes
Have each process work on its own set of files
Do something with the data from this processing
And I would suggest to try this with the following approach:
Read the list on a single process (no MPI IO)
Scatter the list of files to all processes, such that all get around the same amount of work
Have each process work on its list of files independently and in serial (serial file access and processing)
Some data reduction with MPI, as needed
I believe, this would be the best (easiest and fastest) strategy in your scenario. Note, that no MPI IO is involved here at all. I don't think doing some complicated distributed reading of the file list in the first step would result in any advantage here, and in the actual processing it would actually be harmful. The more independent your processes are, the better your scalability usually.

Related

How to trigger fops poll function from the kernel driver

I am working on a kernel driver which logs some spi data in a virtual file using debugfs.
My main goal is to be able to "listen" for incomming data from userspace using for example $ tail -f /sys/kernel/debug/spi-logs which is using select to wait for new data on the debugfs file.
I've implemented the fops poll function in the driver and when I am trying to get the data from the userspace, the poll function is never called even though there is new data available in the kernel to be read.
I assume that the poll function never gets called because the debugfs file never gets actually written.
My question is, is there a way to trigger the poll function from the kernel space when new data is available?
EDIT: Added an example
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/debugfs.h>
#include <linux/wait.h>
#include <linux/poll.h>
struct module_ctx {
struct wait_queue_head wq;
};
struct module_ctx module_ctx;
static ssize_t debugfs_read(struct file *filp, char __user *buff, size_t count, loff_t *off)
{
// simulate no data left to read for now
return 0;
}
static __poll_t debugfs_poll(struct file *filp, struct poll_table_struct *wait) {
struct module_ctx *module_hdl;
__poll_t mask = 0;
module_hdl = filp->f_path.dentry->d_inode->i_private;
pr_info("CALLED!!!");
poll_wait(filp, &module_hdl->wq, wait);
if (is_data_available_from_an_external_ring_buffer())
mask |= POLLIN | POLLRDNORM;
return mask;
}
loff_t debugfs_llseek(struct file *filp, loff_t offset, int orig)
{
loff_t pos = filp->f_pos;
switch (orig) {
case SEEK_SET:
pos = offset;
break;
case SEEK_CUR:
pos += offset;
break;
case SEEK_END:
pos = 0; /* Going to the end => to the beginning */
break;
default:
return -EINVAL;
}
filp->f_pos = pos;
return pos;
}
static const struct file_operations debugfs_fops = {
.owner = THIS_MODULE,
.read = debugfs_read,
.poll = debugfs_poll,
.llseek = debugfs_llseek,
};
static int __init rb_example_init(void)
{
struct dentry *file;
init_waitqueue_head(&module_ctx.wq);
file = debugfs_create_file("spi_logs", 0666, NULL, &module_ctx,
&debugfs_fops);
if (!file) {
pr_err("qm35: failed to create /sys/kernel/debug/spi_logs\n");
return 1;
}
return 0;
}
static void __exit
rb_example_exit(void) {
}
module_init(rb_example_init);
module_exit(rb_example_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Mihai Pop");
MODULE_DESCRIPTION("A simple example Linux module.");
MODULE_VERSION("0.01");
Using tail -f /sys/kernel/debug/spi_logs, the poll function never gets called
Semantic of poll is to return whenever encoded operations (read and/or write) on a file would return without block. In case of read operation, "block" means:
If read is called in nonblocking mode (field f_flags of the struct file has flag O_NONBLOCK set), then it returns -EAGAIN.
If read is called in blocking mode, then it puts a thread into the waiting state.
As you can see, your read function doesn't follow that convention and returns 0, which means EOF. So the caller has no reason to call poll after that.
Semantic of -f option for tail:
... not stop when end of file is reached, but rather to wait ...
is about the situation, when read returns 0, but the program needs to wait.
As you can see, poll semantic is not suitable for such wait. Instead, such programs use inotify mechanism.

Reading and printing last N characters

I have a program that I want to use to read a file and output its last N characters (could be 50 or whatever that I have coded). From my piece of code, I get output that is question marks in diamond boxes,(unsupported unicode?)
I'm using lseek to set the cursor, could someone please assist me?
int main(int argc,char *argv[]){
int fd; //file descriptor to hold open info
int count=0; //to hold value of last 200th char number
char ch; //holds read char
char* outputString = "The file does not exist!\n";
if(!access("myFile.txt",F_OK)==0){
write(2,outputString,strlen(outputString));
exit(1);
}
fd = open("myFile.txt",O_RDONLY| O_NONBLOCK);
int ret = lseek(fd,200,SEEK_END); //get position of the last 200th item
while (ret!=0) {
write(1, &ch,1);
ret--;
}
close(fd);
return(0);
}
I don't want to use <stdio.h> functions so I'm using the file descriptors not making a FILE* object.
I slightly modified your attempt. The lseek(fd, 200, SEEK_END) seeks the file 200 characters past the end of file. If you want to read last 200 character from a file, you need to seek to 200 character to end of file, ie lseek(fd, -200, SEEK_END).
I places some comments in code to help explaining.
// please include headers when posting questions on stackoverflow
// It makes it way easier to reproduce and play with the code from others
#include <unistd.h>
#include <error.h>
// I use glibc error(3) to handle errors
#include <errno.h>
#include <stdlib.h>
#include <fcntl.h>
int main(int argc,char *argv[]){
// no idea if a typo, myFile.txt != logfile.txt
if(!access("myFile.txt", F_OK) == 0) {
error(1, errno, "The file does not exist!");
exit(1);
}
int fd = open("logfile.txt", O_RDONLY | O_NONBLOCK);
if (fd == -1) {
error(1, errno, "Failed opening the file");
}
// move cursor position to the 200th characters from the end
int ret = lseek(fd, -200, SEEK_END);
if (ret == -1) {
error(1, errno, "Failed seeking the file");
}
// we break below
while (1) {
char ch = 0; // holds read char
ssize_t readed = read(fd, &ch, sizeof(ch));
if (readed == 0) {
// end-of-file, break
break;
} else if (readed == -1) {
// error handle
// actually we could handle `readed != 1`
error(1, errno, "Error reading from file");
}
// output the readed character on stdout
// note that `STDOUT_FILENO` as more readable alternative to plain `1`
write(STDOUT_FILENO, &ch, sizeof(ch));
}
close(fd);
return 0;
}

Read binary data from QProcess in Windows

I have some .exe file (say some.exe) that writes to the standard output binary data. I have no sources of this program. I need to run some.exe from my C++/Qt application and read standard output of the process I created. When I'm trying to do this with QProcess::readAll someone replaces byte \n (0x0d) to \r\n (0x0a 0x0d).
Here is a code:
QProcess some;
some.start( "some.exe", QStringList() << "-I" << "inp.txt" );
// some.setTextModeEnabled( false ); // no effect at all
some.waitForFinished();
QByteArray output = some.readAll();
I tried in cmd.exe to redirect output to file like this:
some.exe -I inp.txt > out.bin
and viewed out.bin with hexedit there was 0a 0d in the place where should be 0d.
Edit:
Here is a simple program to emulate some.exe behaviour:
#include <stdio.h>
int main() {
char buf[] = { 0x00, 0x11, 0x0a, 0x33 };
fwrite( buf, sizeof( buf[ 0 ] ), sizeof( buf ), stdout );
}
run:
a.exe > out.bin
//out.bin
00 11 0d 0a 33
Note, that I can't modify some.exe that's why I shouldn't modify my example like _setmode( _fileno( stdout, BINARY ) )
The question is: how can I say to QProcess or to Windows or to console do not change CR with LF CR?
OS: Windows 7
Qt: 5.6.2
how can I say to QProcess or to Windows or to console do not change CR with LF CR?
They don't change anything. some.exe is broken. That's all. It outputs the wrong thing. Whoever made it output brinary data in text mode has messed up badly.
There's a way to recover, though. You have to implement a decoder that will fix the broken output of some.exe. You know that every 0a has to be preceded by 0d. So you have to parse the output, and if you find a 0a, and there's 0d before it, remove the 0d, and continue. Optionally, you can abort if a 0a is not preceded by 0d - some.exe should not produce such output since it's broken.
The appendBinFix function takes the corrupted data and appends the fixed version to a buffer.
// https://github.com/KubaO/stackoverflown/tree/master/questions/process-fix-binary-crlf-51519654
#include <QtCore>
#include <algorithm>
bool appendBinFix(QByteArray &buf, const char *src, int size) {
bool okData = true;
if (!size) return okData;
constexpr char CR = '\x0d';
constexpr char LF = '\x0a';
bool hasCR = buf.endsWith(CR);
buf.resize(buf.size() + size);
char *dst = buf.end() - size;
const char *lastSrc = src;
for (const char *const end = src + size; src != end; src++) {
char const c = *src;
if (c == LF) {
if (hasCR) {
std::copy(lastSrc, src, dst);
dst += (src - lastSrc);
dst[-1] = LF;
lastSrc = src + 1;
} else
okData = false;
}
hasCR = (c == CR);
}
dst = std::copy(lastSrc, src, dst);
buf.resize(dst - buf.constData());
return okData;
}
bool appendBinFix(QByteArray &buf, const QByteArray &src) {
return appendBinFix(buf, src.data(), src.size());
}
The following test harness ensures that it does the right thing, including emulating the output of some.exe (itself):
#include <QtTest>
#include <cstdio>
#ifdef Q_OS_WIN
#include <fcntl.h>
#include <io.h>
#endif
const auto dataFixed = QByteArrayLiteral("\x00\x11\x0d\x0a\x33");
const auto data = QByteArrayLiteral("\x00\x11\x0d\x0d\x0a\x33");
int writeOutput() {
#ifdef Q_OS_WIN
_setmode(_fileno(stdout), _O_BINARY);
#endif
auto size = fwrite(data.data(), 1, data.size(), stdout);
qDebug() << size << data.size();
return (size == data.size()) ? 0 : 1;
}
class AppendTest : public QObject {
Q_OBJECT
struct Result {
QByteArray d;
bool ok;
bool operator==(const Result &o) const { return ok == o.ok && d == o.d; }
};
static Result getFixed(const QByteArray &src, int split) {
Result f;
f.ok = appendBinFix(f.d, src.data(), split);
f.ok = appendBinFix(f.d, src.data() + split, src.size() - split) && f.ok;
return f;
}
Q_SLOT void worksWithLFCR() {
const auto lf_cr = QByteArrayLiteral("\x00\x11\x0a\x0d\x33");
for (int i = 0; i < lf_cr.size(); ++i)
QCOMPARE(getFixed(lf_cr, i), (Result{lf_cr, false}));
}
Q_SLOT void worksWithCRLF() {
const auto cr_lf = QByteArrayLiteral("\x00\x11\x0d\x0a\x33");
const auto cr_lf_fixed = QByteArrayLiteral("\x00\x11\x0a\x33");
for (int i = 0; i < cr_lf.size(); ++i)
QCOMPARE(getFixed(cr_lf, i), (Result{cr_lf_fixed, true}));
}
Q_SLOT void worksWithCRCRLF() {
for (int i = 0; i < data.size(); ++i) QCOMPARE(getFixed(data, i).d, dataFixed);
}
Q_SLOT void worksWithQProcess() {
QProcess proc;
proc.start(QCoreApplication::applicationFilePath(), {"output"},
QIODevice::ReadOnly);
proc.waitForFinished(5000);
QCOMPARE(proc.exitCode(), 0);
QCOMPARE(proc.exitStatus(), QProcess::NormalExit);
QByteArray out = proc.readAllStandardOutput();
QByteArray fixed;
appendBinFix(fixed, out);
QCOMPARE(out, data);
QCOMPARE(fixed, dataFixed);
}
};
int main(int argc, char *argv[]) {
QCoreApplication app(argc, argv);
if (app.arguments().size() > 1) return writeOutput();
AppendTest test;
QTEST_SET_MAIN_SOURCE_PATH
return QTest::qExec(&test, argc, argv);
}
#include "main.moc"
Unfortunately it has nothing to do with QProcess or Windows or console. It's all about CRT. Functions like printf or fwrite are taking into account _O_TEXT flag to add an additional 0x0D (true only for Windows). So the only solution is to modify stdout's fields of your some.exe with WriteProcessMemory or call the _setmode inside an address space of your some.exe with DLL Injection technique or patch the lib. But it's a tricky job.

Shell Program in C, running executable in background

I am writing a simple shell program in C and I believe I have it just about finished. The program should continually print "Prompt>" and wait for a user to either enter the name of an executable along with any parameters the executable needs. The shell only has one built in function, quit, which ends the program. If the user were to put an '&' at the end of the line then the given executable should be run in the background. (Built-in functions and commands without the '&' should run in the foreground and wait for the child process to finish.) However when I run my code and put an '&' at the end of my line, the executable runs and finishes but I no longer see the "prompt>" show up. I can still enter the name of an executable or quit and it runs and everything but I don't understand why the prompt isn't showing up.
Also as a side question. Is my program properly handling child processes? Basically, am I not leaving zombie processes with this code?
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
#include <string.h>
#define MAXBUFF 100
#define MAXLINE 200
int parse_line(char *buffer, char **arg_array);
void evaluate_commandline(char *commandline);
int builtin_command();
int parse_line(char *buffer, char **arg_array){
char *delimiter;
int num_args;
int run_background;
buffer[strlen(buffer)-1] = ' ';
while(*buffer && (*buffer == ' '))
buffer++;
num_args = 0;
while((delimiter = strchr(buffer, ' '))){
arg_array[num_args++] = buffer;
*delimiter = '\0';
buffer = delimiter + 1;
while(*buffer && (*buffer == ' '))
buffer++;
}
arg_array[num_args] = NULL;
if(num_args == 0)
return 1;
if((run_background = (*arg_array[num_args-1] == '&')) != 0)
arg_array[--num_args] = NULL;
return run_background;
}
void evaluate_commandline(char *commandline){
char *arg_array[MAXBUFF];
char buffer[MAXLINE];
int run_background;
pid_t pid;
strcpy(buffer, commandline);
run_background = parse_line(buffer, arg_array);
if(arg_array[0] == NULL)
return;
if(!builtin_command(arg_array)){
if((pid = fork())== 0){
if(execvp(arg_array[0],arg_array)< 0){
printf("%s: Command not found.\n", arg_array[0]);
exit(0);
}
}
if(!run_background){
int child_status;
wait(&child_status);
}
}
return;
}
int builtin_command(char **arg_array){
if(!strcmp(arg_array[0],"quit"))
exit(0);
return 0;
}
int main(){
char commandline[MAXLINE];
while(1){
printf("prompt> ");
fgets(commandline, MAXLINE, stdin);
if(feof(stdin))
exit(0);
evaluate_commandline(commandline);
}
}
i think where you say:
if(!run_background){
you forget a "else"
else if(!run_background){

Getting process base address in Mac OSX

I'm trying to read the memory of a process using task_for_pid / vm_read.
uint32_t sz;
pointer_t buf;
task_t task;
pid_t pid = 9484;
kern_return_t error = task_for_pid(current_task(), pid, &task);
vm_read(task, 0x10e448000, 2048, &buf, &sz);
In this case I read the first 2048 bytes.
This works when I know the base address of the process (which I can find out using gdb "info shared" - in this case 0x10e448000), but how do I find out the base address at runtime (without looking at it with gdb)?
Answering my own question. I was able to get the base address using mach_vm_region_recurse like below. The offset lands in vmoffset. If there is another way that is more "right" - don't hesitate to comment!
#include <stdio.h>
#include <mach/mach_init.h>
#include <sys/sysctl.h>
#include <mach/mach_vm.h>
...
mach_port_name_t task;
vm_map_offset_t vmoffset;
vm_map_size_t vmsize;
uint32_t nesting_depth = 0;
struct vm_region_submap_info_64 vbr;
mach_msg_type_number_t vbrcount = 16;
kern_return_t kr;
if ((kr = mach_vm_region_recurse(task, &vmoffset, &vmsize,
&nesting_depth,
(vm_region_recurse_info_t)&vbr,
&vbrcount)) != KERN_SUCCESS)
{
printf("FAIL");
}
Since you're calling current_task(), I assume you're aiming at your own process at runtime. So the base address you mentioned should be the dynamic base address, i.e. static base address + image slide caused by ASLR, right? Based on this assumption, you can use "Section and Segment Accessors" to get the static base address of your process, and then use the dyld functions to get the image slide. Here's a snippet:
#import <Foundation/Foundation.h>
#include </usr/include/mach-o/getsect.h>
#include <stdio.h>
#include </usr/include/mach-o/dyld.h>
#include <string.h>
uint64_t StaticBaseAddress(void)
{
const struct segment_command_64* command = getsegbyname("__TEXT");
uint64_t addr = command->vmaddr;
return addr;
}
intptr_t ImageSlide(void)
{
char path[1024];
uint32_t size = sizeof(path);
if (_NSGetExecutablePath(path, &size) != 0) return -1;
for (uint32_t i = 0; i < _dyld_image_count(); i++)
{
if (strcmp(_dyld_get_image_name(i), path) == 0)
return _dyld_get_image_vmaddr_slide(i);
}
return 0;
}
uint64_t DynamicBaseAddress(void)
{
return StaticBaseAddress() + ImageSlide();
}
int main (int argc, const char *argv[])
{
printf("dynamic base address (%0llx) = static base address (%0llx) + image slide (%0lx)\n", DynamicBaseAddress(), StaticBaseAddress(), ImageSlide());
while (1) {}; // you can attach to this process via gdb/lldb to view the base address now :)
return 0;
}
Hope it helps!

Resources