I am using json-c to parse json files in my project. I tried creating json_tokener_parse but this has resulted in seg-fault. could any please check and tell me the reason for segfault.
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h> // O_RDONLY
#include<stdlib.h>
#include<stdio.h>
#include<unistd.h>
#include<json-c/json.h>
int main() {
int oflag = O_RDONLY;
const char *path = "file.json";
const int fd = open(path, oflag);
// use stat to find the file size
struct stat stat;
int ret = fstat(fd, &stat);
int mflags = MAP_SHARED; // information about handling the mapped data
int mprot = PROT_READ|PROT_WRITE; // access permissions to the data being mapped
size_t size = stat.st_size;
void *addr = mmap(NULL, size, mprot, mflags, fd, 0);
const char *file = (char *)addr;
json_object * jobj = json_tokener_parse(addr);
//json_parse(jobj);
}
json_tokener_parse() takes a null-terminated string. A text file is not null-terminated. You'll have to use json_tokener_parse_ex() and specify the length.
I have an MPI program for having multiple processes read from a file that contains list of file names and based on the file names read - it reads the corresponding file and counts the frequency of words.
If one of the processes completes this and returns - to block executing MPI_Barrier(), the other processes also hang. On debugging, it could be seen that the readFile() function is not entered by the processes currently in process_files() Unable to figure out why this happens. Please find the code below:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#include <ctype.h>
#include <string.h>
#include "hash.h"
void process_files(char*, int* , int, hashtable_t* );
void initialize_word(char *c,int size)
{
int i;
for(i=0;i<size;i++)
c[i]=0;
return;
}
char* readFilesList(MPI_File fh, char* file,int rank, int nprocs, char* block, const int overlap, int* length)
{
char *text;
int blockstart,blockend;
MPI_Offset size;
MPI_Offset blocksize;
MPI_Offset begin;
MPI_Offset end;
MPI_Status status;
MPI_File_open(MPI_COMM_WORLD,file,MPI_MODE_RDONLY,MPI_INFO_NULL,&fh);
MPI_File_get_size(fh,&size);
/*Block size calculation*/
blocksize = size/nprocs;
begin = rank*blocksize;
end = begin+blocksize-1;
end+=overlap;
if(rank==nprocs-1)
end = size;
blocksize = end-begin+1;
text = (char*)malloc((blocksize+1)*sizeof(char));
MPI_File_read_at_all(fh,begin,text,blocksize,MPI_CHAR, &status);
text[blocksize+1]=0;
blockstart = 0;
blockend = blocksize;
if(rank!=0)
{
while(text[blockstart]!='\n' && blockstart!=blockend) blockstart++;
blockstart++;
}
if(rank!=nprocs-1)
{
blockend-=overlap;
while(text[blockend]!='\n'&& blockend!=blocksize) blockend++;
}
blocksize = blockend-blockstart;
block = (char*)malloc((blocksize+1)*sizeof(char));
block = memcpy(block, text + blockstart, blocksize);
block[blocksize]=0;
*length = strlen(block);
MPI_File_close(&fh);
return block;
}
void calculate_term_frequencies(char* file, char* text, hashtable_t *hashtable,int rank)
{
printf("Start File %s, rank %d \n\n ",file,rank);
fflush(stdout);
if(strlen(text)!=0||strlen(file)!=0)
{
int i,j;
char w[100];
i=0,j=0;
while(text[i]!=0)
{
if((text[i]>=65&&text[i]<=90)||(text[i]>=97&&text[i]<=122))
{
w[j]=text[i];
j++; i++;
}
else
{
w[j] = 0;
if(j!=0)
{
//ht_set( hashtable, strcat(strcat(w,"#"),file),1);
}
j=0;
i++;
initialize_word(w,100);
}
}
}
return;
}
void readFile(char* filename, hashtable_t *hashtable,int rank)
{
MPI_Status stat;
MPI_Offset size;
MPI_File fx;
char* textFromFile=0;
printf("Start File %d, rank %d \n\n ",strlen(filename),rank);
fflush(stdout);
if(strlen(filename)!=0)
{
MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_RDONLY,MPI_INFO_NULL,&fx);
MPI_File_get_size(fx,&size);
printf("Start File %s, rank %d \n\n ",filename,rank);
fflush(stdout);
textFromFile = (char*)malloc((size+1)*sizeof(char));
MPI_File_read_at_all(fx,0,textFromFile,size,MPI_CHAR, &stat);
textFromFile[size]=0;
calculate_term_frequencies(filename, textFromFile, hashtable,rank);
MPI_File_close(&fx);
}
printf("Done File %s, rank %d \n\n ",filename,rank);
fflush(stdout);
return;
}
void process_files(char* block, int* length, int rank,hashtable_t *hashtable)
{
char s[2];
s[0] = '\n';
s[1] = 0;
char *file;
if(*length!=0)
{
/* get the first file */
file = strtok(block, s);
/* walk through other tokens */
while( file != NULL )
{
readFile(file,hashtable,rank);
file = strtok(NULL, s);
}
}
return;
}
void execute_process(MPI_File fh, char* file, int rank, int nprocs, char* block, const int overlap, int * length, hashtable_t *hashtable)
{
block = readFilesList(fh,file,rank,nprocs,block,overlap,length);
process_files(block,length,rank,hashtable);
}
int main(int argc, char *argv[]){
/*Initialization*/
MPI_Init(&argc, &argv);
MPI_File fh=0;
int rank,nprocs,namelen;
char *block=0;
const int overlap = 70;
char* file = "filepaths.txt";
int *length = (int*)malloc(sizeof(int));
hashtable_t *hashtable = ht_create( 65536 );
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Get_processor_name(processor_name, &namelen);
printf("Rank %d is on processor %s\n",rank,processor_name);
fflush(stdout);
execute_process(fh,file,rank,nprocs,block,overlap,length,hashtable);
printf("Rank %d returned after processing\n",rank);
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
return 0;
}
The filepaths.txt is a file that contain the absolute file names of normal text files:
eg:
/home/mpiuser/mpi/MPI_Codes/code/test1.txt
/home/mpiuser/mpi/MPI_Codes/code/test2.txt
/home/mpiuser/mpi/MPI_Codes/code/test3.txt
Your readFilesList function is pretty confusing, and I believe it doesn't do what you want it to do, but maybe I just do not understand it correctly. I believe it is supposed to collect a bunch of filenames out of the list file for each process. A different set for each process. It does not do that, but this is not the problem, even if this would do what you want it to, the subsequent MPI IO would not work.
When reading files, you use MPI_File_read_all with MPI_COMM_WORLD as communicator. This requires all processes to participate in reading this file. Now, if each process should read a different file, this obviously is not going to work.
So there are several issues with your implementation, though I can not really explain your described behavior, I would rather first start off and try to fix them, before debugging in detail, what might go wrong.
I am under the impression, you want to have an algorithm along these lines:
Read a list of file names
Distribute that list of files equally to all processes
Have each process work on its own set of files
Do something with the data from this processing
And I would suggest to try this with the following approach:
Read the list on a single process (no MPI IO)
Scatter the list of files to all processes, such that all get around the same amount of work
Have each process work on its list of files independently and in serial (serial file access and processing)
Some data reduction with MPI, as needed
I believe, this would be the best (easiest and fastest) strategy in your scenario. Note, that no MPI IO is involved here at all. I don't think doing some complicated distributed reading of the file list in the first step would result in any advantage here, and in the actual processing it would actually be harmful. The more independent your processes are, the better your scalability usually.
I just want to verify I got this right.
The copy from sr to ds2 gives an error. Is this because ds2 is considered "const"??
Thanks and hope this isn't a bore.
#include <stdio.h>
#include <string.h>
#include <malloc.h>
int main(void)
{
char *sr = "Hello World";
char *ds1 = (char*)malloc(100 * sizeof(char));
char *ds2 = "12345678901234567890";
// This statement works just fine
printf("%s\n", strcpy(ds1, sr));
// This gives error
strcpy(ds2, sr);
printf("%s\n", ds2);
return 0;
}
Here is a similar post
difference between char* and char[] with strcpy()
When you do this
char *ds2 = "12345678901234567890";
the compiler leaves the pointer pointing to a non-writable memory region.
With this line
// This gives error
strcpy(ds2, sr);
You are trying to do an strcpy into the non-writable memory.
You should also have a free for each malloc as you are allocating memory but not de-allocating it.
I have succeeded in making a modified version of ext2 (so called myext2.ko) and tested it for mount and umount, and something else; the problem occurs when I add the following code into my fs/myext2/file.c and tried to implement a simple "encryption" func, that is, negating the last bit of the read-in string :
ssize_t my_new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
struct iovec iov; //changed
struct kiocb kiocb;
struct iov_iter iter;
ssize_t ret;
//inserted by adward - begin
size_t i;
char buff[len];
for (i=0;i<len;i++){
buff[i] = buf[i] ^ 1;
}
iov.iov_base = (void __user *)buff;
iov.iov_len = len;
printk("Inside my_new_sync_write");
//inserted by adward - end
init_sync_kiocb(&ki_nbytesocb, filp);
kiocb.ki_pos = *ppos;
kiocb.ki_nbytes = len;
iov_iter_init(&iter, WRITE, &iov, 1, len);
ret = filp->f_op->write_iter(&kiocb, &iter);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
return ret;
}
ssize_t my_new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
struct iovec iov = { .iov_base = buf, .iov_len = len };
struct kiocb kiocb;
struct iov_iter iter;
ssize_t ret;
//inserted by adward - begin
size_t i;
//inserted by adward - end
init_sync_kiocb(&kiocb, filp);
kiocb.ki_pos = *ppos;
kiocb.ki_nbytes = len;
iov_iter_init(&iter, READ, &iov, 1, len);
ret = filp->f_op->read_iter(&kiocb, &iter);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
//inserted by adward - begin
for (i=0;i<len;i++){
buf[i] ^= 1;
}
printk("inside my_new_sync_read");
//inserted by adward - end
return ret;
}
The prototype of the above two functions are actually in fs/read_write.c , using by almost all file system types in the kernel code ver 3.17.6; I just copied them into fs/myext2/file.c and make some minor change as commented, so that I can do some test without having to change any Makefile.
But the moment I paste them into my file.c, "sudo make" gives the error message as following:
/home/adward/linux-3.17.6/fs/myext2/file.c:64:15: error: storage size of ‘kiocb’ isn’t known
struct kiocb kiocb;
^
/home/adward/linux-3.17.6/fs/myext2/file.c:65:18: error: storage size of ‘iter’ isn’t known
struct iov_iter iter;
^
and cc1: some warnings being treated as errors
even if I haven't refered to them by changing the func pointers in file_operations in the same source code file, or say, I haven't used them!
P.S.
My file_operation struct now looks like:
const struct file_operations myext2_file_operations = {
.llseek = generic_file_llseek,
.read = new_sync_read, //want to replace with my_new_sync_read
.write = new_sync_write, //want to replace with my_new_sync_write
...
}
Has anyone who have done something similar and crashed into some problems like this one? Please notify me if I have done something remarkable wrong, thanks.
Met the same error before. U should add <linux/aio.h> as ext2 uses asynchronous IO for reading/writing files.
Hope that helps :)
I'm trying to read the memory of a process using task_for_pid / vm_read.
uint32_t sz;
pointer_t buf;
task_t task;
pid_t pid = 9484;
kern_return_t error = task_for_pid(current_task(), pid, &task);
vm_read(task, 0x10e448000, 2048, &buf, &sz);
In this case I read the first 2048 bytes.
This works when I know the base address of the process (which I can find out using gdb "info shared" - in this case 0x10e448000), but how do I find out the base address at runtime (without looking at it with gdb)?
Answering my own question. I was able to get the base address using mach_vm_region_recurse like below. The offset lands in vmoffset. If there is another way that is more "right" - don't hesitate to comment!
#include <stdio.h>
#include <mach/mach_init.h>
#include <sys/sysctl.h>
#include <mach/mach_vm.h>
...
mach_port_name_t task;
vm_map_offset_t vmoffset;
vm_map_size_t vmsize;
uint32_t nesting_depth = 0;
struct vm_region_submap_info_64 vbr;
mach_msg_type_number_t vbrcount = 16;
kern_return_t kr;
if ((kr = mach_vm_region_recurse(task, &vmoffset, &vmsize,
&nesting_depth,
(vm_region_recurse_info_t)&vbr,
&vbrcount)) != KERN_SUCCESS)
{
printf("FAIL");
}
Since you're calling current_task(), I assume you're aiming at your own process at runtime. So the base address you mentioned should be the dynamic base address, i.e. static base address + image slide caused by ASLR, right? Based on this assumption, you can use "Section and Segment Accessors" to get the static base address of your process, and then use the dyld functions to get the image slide. Here's a snippet:
#import <Foundation/Foundation.h>
#include </usr/include/mach-o/getsect.h>
#include <stdio.h>
#include </usr/include/mach-o/dyld.h>
#include <string.h>
uint64_t StaticBaseAddress(void)
{
const struct segment_command_64* command = getsegbyname("__TEXT");
uint64_t addr = command->vmaddr;
return addr;
}
intptr_t ImageSlide(void)
{
char path[1024];
uint32_t size = sizeof(path);
if (_NSGetExecutablePath(path, &size) != 0) return -1;
for (uint32_t i = 0; i < _dyld_image_count(); i++)
{
if (strcmp(_dyld_get_image_name(i), path) == 0)
return _dyld_get_image_vmaddr_slide(i);
}
return 0;
}
uint64_t DynamicBaseAddress(void)
{
return StaticBaseAddress() + ImageSlide();
}
int main (int argc, const char *argv[])
{
printf("dynamic base address (%0llx) = static base address (%0llx) + image slide (%0lx)\n", DynamicBaseAddress(), StaticBaseAddress(), ImageSlide());
while (1) {}; // you can attach to this process via gdb/lldb to view the base address now :)
return 0;
}
Hope it helps!