My shell has to interpret the double quote - shell

I have to write a shell that can interpret double quotes.
I've written a basic shell.
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int main ()
{
int i;
char * ligne;
char *tokens[100];
ligne=(char*)malloc(300);
printf("$ ");
fgets(ligne,256,stdin);
while (strcmp(ligne,"exit\n"))
{ i=0;
tokens[i]=strtok(ligne," \n");
while (tokens[i] != NULL) tokens[++i]=strtok(NULL," \n");
if (fork()==0)
{ execvp(tokens[0],tokens);
printf("Commande invalide\n");
exit(1);
}
wait(0);
printf("$ ");
fgets(ligne,256,stdin);
}
exit(0);
}
In a linux shell: When you enter a command like
$ echo "`a b`"
The shell interprets spaces and therefore
a b
is taken as a file.
I do not see how to remove the double quotes and keep the spaces.
Thank you.

strtokis not suited for that. Replace
tokens[i]=strtok(ligne," \n");
while (tokens[i] != NULL) tokens[++i]=strtok(NULL," \n");
e. g. with
char quot = 0, *cp;
for (cp = ligne; tokens[i] = cp += strspn(cp, " \n"), *cp; ++i)
{
do if (*cp == '"') quot ^= 1, memmove(cp, cp+1, strlen(cp));
while (cp += strcspn(cp, quot ? "\"" : " \n\""), *cp == '\"');
if (*cp) *cp++ = '\0';
}
tokens[i] = NULL;
if (quot) puts("unmatched quotation mark");
else
.

Related

Cannot access memory at address using gdb in CLION IDE. How do I setup gdb?

I get the error "Cannot access memory at address 0x100403055" when I try and set a memory value to 0x00 when stopped in the debugger.
Is there a special switch I need to set to enable the set operation?
Here is my complete C code file "main.c"
#include <stdio.h>
#include <string.h>
/*
separator - consume all non-token characters until next token. This includes:
comments: '#'
nesting: '{'
unnesting: '}'
whitespace: ' ','\t','\n'
*nest is changed according to nesting/unnesting processed
*/
static void separator(int *nest, char **tokens) {
char c, *s;
s = *tokens;
while ((c = *s)) {
/* #->eol = comment */
if (c == '#') {
s++;
while ((c = *s)) {
s++;
if (c == '\n')
break;
}
continue;
}
if (c == '{') {
(*nest)++;
s++;
continue;
}
if (c == '}') {
(*nest)--;
s++;
continue;
}
if (c == ' ' || c == '\n' || c == '\t') {
s++;
continue;
}
break;
}
*tokens = s;
}
/*
token - capture all characters until next separator, then consume separator,
return captured token, leave **tokens pointing to next token.
*/
static char *token(int *nest, char **tokens) {
char c, *s, *t;
char terminator = '\0';
s = t = *tokens;
while ((c = *s)) {
if (c == '#'
|| c == ' ' || c == '\t' || c == '\n' || c == '{' || c == '}')
break;
s++;
}
*tokens = s;
separator(nest, tokens);
/* Breakpoint here to examine and manipulate memory */
*s = '\0';
return t;
}
struct test_case {
char *input;
int nest;
char *expected_output;
};
int main() {
int nest = 0;
int TESTSEP = 0;
if (TESTSEP>0) {
char *tokens = "# this is a comment\n{nesting {example} unnesting}\n \t end";
separator(&nest, &tokens);
printf("nest: %d\n", nest);
printf("tokens: %s\n", tokens);
return 0;
} else {
struct test_case test_cases[] = {
{"hello world", 0, "hello"},
{"hello#world", 0, "hello"},
{"hello{world}", 0, "hello"},
{"hello world", 0, "hello"},
{"hello\tworld", 0, "hello"},
{"hello\nworld", 0, "hello"},
};
for (int i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
struct test_case test_case = test_cases[i];
char *tokens = test_case.input;
char *output = token(&test_case.nest, &tokens);
if (strcmp(output, test_case.expected_output) != 0) {
printf("Test case %d failed: expected %s, got %s\n", i, test_case.expected_output, output);
}
}
return 0;
}
}
In the token function there is a comment line where I place a breakpoint and drop into the gdb debugger. The code is supposed to write a '\0' at the address of the pointer *s to truncate the string.
When I'm in the debugger and I examine the 's' variable I get the following:
(gdb) x s
0x100403055: 0x726f7720
When I try and set the variable I get:
(gdb) [![set *0x0000000100403055 = 0x726f7700][1]][1]
Cannot access memory at address 0x100403055
I'm using the CLION IDE and am a novice. I'm not sure if its an IDE problem, a user problem or some external memory protection mechanism that is preventing this.
Does anyone know how to make this work?
Here is a screenshot of the IDE:
When I run the code (without the debugger) I get this output:
./explore.exe
Test case 0 failed: expected hello, got hello world
Test case 1 failed: expected hello, got hello#world
Test case 2 failed: expected hello, got hello{world}
Test case 3 failed: expected hello, got hello world
Test case 4 failed: expected hello, got hello world
Test case 5 failed: expected hello, got hello world
Process finished with exit code 0
I this case I believe I was passing in a pointer to memory in the read only space. The struct test_case is built into the code and is read only. So that when I pass that into the token function it was trying to write to read only.
Here is the code that seems to work.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/*
separator - consume all non-token characters until next token.
This includes:
comments: '#' ... '\n'
nesting: '{'
unnesting: '}'
whitespace: ' ','\t','\n'
*nest is changed according to nesting/unnesting processed
*/
static void separator(int *nest, char **tokens) {
char c, *s;
s = *tokens;
while ((c = *s)) {
/* #->eol = comment */
if (c == '#') {
s++;
while ((c = *s)) {
s++;
if (c == '\n')
break;
}
continue;
}
if (c == '{') {
(*nest)++;
s++;
continue;
}
if (c == '}') {
(*nest)--;
s++;
continue;
}
if (c == ' ' || c == '\n' || c == '\t') {
s++;
continue;
}
break;
}
*tokens = s;
}
/*
token - capture all characters until next separator, then consume
separator,
return captured token, leave **tokens pointing to next token.
*/
static char *token(int *nest, char **tokens) {
char c, *s, *t;
char terminator = '\0';
s = t = *tokens;
while ((c = *s)) {
if (c == '#'
|| c == ' ' || c == '\t' || c == '\n' || c == '{' || c == '}')
break;
s++;
}
*tokens = s;
separator(nest, tokens);
*s = '\0';
return t;
}
struct test_case {
char *input;
int nest;
char *expected_output;
};
int main() {
int nest = 0;
int TESTSEP = 0;
char *temp_malloc_string;
if (TESTSEP>0) {
char *tokens = "# this is a comment\n{nesting {example}
unnesting}\n \t end";
temp_malloc_string = malloc(strlen(tokens)*sizeof(char));
strcpy(temp_malloc_string, tokens);
char * t = token(&nest, &temp_malloc_string);
printf("nest: %d\n", nest);
printf("tokens: %s\n", t);
separator(&nest, &temp_malloc_string);
printf("nest: %d\n", nest);
printf("tokens: %s\n", temp_malloc_string);
return 0;
} else {
struct test_case test_cases[] = {
{"hello world", 0, "hello"},
{"hello#world", 0, "hello"},
{"hello{world}", 0, "hello"},
{"hello world", 0, "hello"},
{"hello\tworld", 0, "hello"},
{"hello\nworld", 0, "hello"},
};
for (int i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
struct test_case test_case = test_cases[i];
char *tokens = test_case.input;
printf("len of string is %d\n", strlen(tokens));
temp_malloc_string = malloc((strlen(tokens)+1)*sizeof(char));
char * tt = temp_malloc_string;
if ( temp_malloc_string==NULL ) {
printf("error!\n");
}
strcpy(temp_malloc_string, tokens);
printf("tm going in: %s\n", temp_malloc_string);
char *output = token(&test_case.nest, &temp_malloc_string);
printf("Test case %d: expected %s, got %s\n\t\ttm is now: %s\n",
i, test_case.expected_output, output, temp_malloc_string);
if (strcmp(output, test_case.expected_output) != 0) {
printf("Test case %d failed: expected %s, got %s\n",
i, test_case.expected_output, output);
}
free(tt);
temp_malloc_string = NULL;
}
return 0;
}
}
Now when I run the code I get:
./explore.exe
len of string is 11
tm going in: hello world
Test case 0: expected hello, got hello
tm is now: world
len of string is 11
tm going in: hello#world
Test case 1: expected hello, got hello
tm is now:
len of string is 12
tm going in: hello{world}
Test case 2: expected hello, got hello
tm is now: world}
len of string is 12
tm going in: hello world
Test case 3: expected hello, got hello
tm is now: world
len of string is 11
tm going in: hello world
Test case 4: expected hello, got hello
tm is now: world
len of string is 11
tm going in: hello
world
Test case 5: expected hello, got hello
tm is now: world
Process finished with exit code 0
And when I stop at the breakpoint I can write to memory.
In this modified code I malloc a char* object and copy the string from the struct into that then pass that into the token function.
I'm guess that gdb is protecting me from writing to the .text block in code.
Like I said: I'm a newbie :(

How can I convert all UTF8 Unicode characters in a string to their relevant Codepoints using bash/shell/zsh?

There seem to be loads of questions about converting 'Codepoints' to utf8, but I can't find an answer anywhere to do it the other way around.
I want to run a script which will convert a string such as My📔 to MyU+1F4D4.
I have tried something like this: DECODEDSTRING=$(echo "My📔" | iconv -f utf-8 -t ASCII//TRANSLIT) but I can't seem to figure out what is needed.
Thanks in advance.
Using perl, works in any shell as long as the arguments are encoded in UTF-8:
$ perl -CA -E 'for my $arg (#ARGV) { say map { my $cp = ord; $cp > 127 ? sprintf "U+%04X", $cp : $_ } split //, $arg }' "My📔"
MyU+1F4D4
Non-ASCII codepoints are printed as U+XXXX (0-padded, more hex digits if needed), ASCII ones as human-readable letters.
Or for maximum speed, a C program that does the same:
// Compile with: gcc -o print_unicode -std=c11 -O -Wall -Wextra print_unicode.c
#include <assert.h>
#include <inttypes.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <uchar.h>
#if __STDC_VERSION__ < 201112L
#error "Need C11 or newer"
#endif
#ifndef __STDC_UTF_32__
#error "Not using Unicode"
#endif
int main(int argc, char **argv) {
// arguments should be encoded according to locale's character set
setlocale(LC_CTYPE, "");
for (int i = 1; i < argc; i++) {
char *s = argv[i];
size_t len = strlen(argv[i]);
mbstate_t state;
memset(&state, 0, sizeof state);
while (len > 0) {
char32_t c;
size_t rc = mbrtoc32(&c, s, len, &state);
assert(rc != (size_t)-3);
if (rc == (size_t)-1) {
perror("mbrtoc32");
return EXIT_FAILURE;
} else if (rc == (size_t)-2) {
fprintf(stderr, "Argument %d is incomplete!\n", i);
return EXIT_FAILURE;
} else if (rc > 0) {
if (c > 127) {
printf("U+%04" PRIXLEAST32, c);
} else {
putchar((char)c);
}
s += rc;
len -= rc;
}
putchar('\n');
}
return 0;
}
$ ./print_unicode "My📔"
MyU+1F4D4

Why my program terminates (using pipes)?

I want to make a shell that can use pipes. When I use this code to run a pipe in my shell even though everything is in a WHILE(1) loop my shell terminates. Why? Is there a problem with the use of the dup function?
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdbool.h>
int main(void)
{
int pfds[2];
pipe(pfds);
char *ar1;
const char sp = ' ';
int temp, temp1, temp2, acc;
int i, j;
int t = 0;
char *line=(char *) malloc(1024*sizeof(char));
char *frsarg=(char *) malloc(1024*sizeof(char));
char *firstcmd=(char *) malloc(1024*sizeof(char));
char *seccmd=(char *) malloc(1024*sizeof(char));
char *scmd=(char *) malloc(1024*sizeof(char));
char *secondcmd=(char *) malloc(1024*sizeof(char));
char *secarg=(char *) malloc(1024*sizeof(char));
char *frscmd=(char *) malloc(1024*sizeof(char));
char *cmd1=(char *) malloc(1024*sizeof(char));
char *cmd=(char *) malloc(1024*sizeof(char));
char *cmdf=(char *) malloc(1024*sizeof(char));
char *arg1=(char *) malloc(1024*sizeof(char));
char *allarg=(char *) malloc(1024*sizeof(char));
char *arg2=(char *) malloc(1024*sizeof(char));
char *arg3=(char *) malloc(1024*sizeof(char));
while (1) {
/* Ektypwse to command prompt */
printf("$ ");
fflush(stdout);
fgets(line, 1024, stdin); //Reads the command.
for(i=0;i<1024;i++){
if(line[i]=='\n') //Deletes the "Enter" from the end of the string.
{
line[i]='\0'; //Replace "Enter" with \0.
}
if(line[i] == 'e' && line[i+1] == 'x' && line[i+2] == 'i' && line[i+3] == 't' ) {
exit(1);
}
}
seccmd = strchr(line, '|');
acc = 0;
for(i=0;i<1024;i++){
if(line[i]=='|'){
acc = i;
t=t+1;
} //Finds the second space.
}
/*FIRST COMMAND AND ARGUMENT*/
if(acc != 0 ){
//printf("OKIF\n");
for(j=0;j<acc;j++){
//printf("OKFOR\n");
frscmd[j]= line[j];
}
//printf("FIRST COMMAND %s\n", frscmd);
}
/*FIRST ARG*/
frsarg = strchr(frscmd, sp);
if(frsarg != NULL){
while(isspace(*frsarg)) ++frsarg;
}
for (i=0;i<1024;i++){
if (frsarg[i] == ' '){
frsarg[i] = '\0';
}
}
/*FIRST COMMAND*/
acc = 0;
for(i=0;i<1024;i++){
if(frscmd[i]==' '){
acc = i;
break;
}
}
if(acc != 0 ){
//printf("OKIF\n");
for(j=0;j<acc;j++){
//printf("OKFOR\n");
firstcmd[j]= frscmd[j];
}
}
if(firstcmd != NULL){
while(isspace(*firstcmd)) ++firstcmd;
}
printf("FIRST COMMAND TEST %s TEST\n", firstcmd);
printf("FIRST ARGUMENT TEST %s TEST\n", frsarg);
// firstcmd == "ls" ,frsarg == "-l"
/*SECOND COMMAND AND ARGUMENTS */
//seccmd = " | ws -l
//SECOND COMMAND WITHOUT "|" secondcmd = _wc_-l
secondcmd = strchr(seccmd, sp);
if(secondcmd != NULL){
while(isspace(*secondcmd)) ++secondcmd;
}
//SECCOND COMMAND scmd
acc = 0;
for(i=0;i<1024;i++){
if(secondcmd[i]==' '){
acc = i+1;
}
}
if(acc != 0 ){
for(j=0;j<acc;j++){
scmd[j]= secondcmd[j];
}
}
for (i=0;i<1024;i++){
if (scmd[i] == ' '){
scmd[i] = '\0';
}
}
printf("SECOND COMMAND TEST %s TEST\n", scmd);
//SECOND ARGUMENT secarg
secarg = strchr(secondcmd, sp);
if(secarg != NULL){
while(isspace(*secarg)) ++secarg;
}
printf("SECOND ARGUMENT TEST %s TEST\n", secarg);
//FIRST COMMAND = firstcmd____FIRST ARGUMENT = frsarg_____SECOND COMMAND = scmd_____SECOND ARGUMENT = secarg
if (!fork()) {
close(1); /* close normal stdout */
dup(pfds[1]); /* make stdout same as pfds[1] */
close(pfds[0]); /* we don't need this */
execlp(firstcmd, firstcmd, frsarg,(char*) NULL);
} else {
close(0); /* close normal stdin */
dup(pfds[0]); /* make stdin same as pfds[0] */
close(pfds[1]); /* we don't need this */
execlp(scmd, scmd, secarg,(char*) NULL);
}
return 0;
}
}
Well, this is a very long and complex piece of code, and the logic seems
rather ad hoc. It would take me way too long to dissect the whole
thing.
I do observe that you fork() only once, and each branch then calls
execlp() to run one of the two processes in your pipeline. That leaves
no process continuing to run the shell. So you really need to fork()
twice.
Let's say that the original process is A. After the first call to
fork() we have original process A and child A1. A then calls
wait() to pause until A1 terminates. A1 calls fork() again and
runs the pipelined commands.
Or something like that. Looking at my code bank, I see the last time I
experimented with this stuff was in 2000, and I wasn't doing two
subprocesses as you are. But this should be a step in the right
direction, anyway.

Main function with arguments

I'm trying to understand the main function with the arguments argc and argv. In the command line I am trying to copy the contents of multiple txt files on the screen (concatenation). When I write in the command line appname.exe something f1.txt, the content from the f1.txt prints in a loop. If f1.txt had the text "abcda" the output in console would be "abcdaabcdaabcda...". Sorry for my english; can someone help me understand what I did wrong?
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
int main(int argc, char *argv[])
{
int i;
for (i = 2; i <= argc - 1;i+2)
{
FILE *f = fopen(argv[i], "r");
if (f == 0)
{
printf("Error\n");
}
else
{
int x;
while ((x = fgetc(f)) != EOF)
{
printf("%c", x);
}
}
fclose(f);
}
}
Here's one big problem:
for (i = 2; i <= argc - 1;i+2)
I think you mean to do:
for (i = 2; i <= argc - 1; i++)

Bash Script - Read Binary File

I'm new to scripting, but I have a lot of experience programming in languages such as C# and Java.
I have a file that contains binary data. I want to write a Bash script that reads the year, month, and day contained in that file so I can sort the associated MOD files into folders according to the date they were recorded. I'm having trouble finding a way to read binary data and parsing it in a bash script. Is there any way to do this?
You can use od (plus head and awk for a little post-processing) for this. To get the year:
year=$(od -t x2 --skip-bytes=6 --read-bytes=2 file.moi | head -1 | awk '{print $2}')
For the month:
month=$(od -t x1 --skip-bytes=8 --read-bytes=1 file.moi | head -1 | awk '{print $2}')
And the day:
day=$(od -t x1 --skip-bytes=9 --read-bytes=1 file.moi | head -1 | awk '{print $2}')
I would recommend using python for this.
However, if you insist on bash, i would try using either sed in binary mode (never tried it) or using dd for extracting specific bytes and then convert them.
If this is not too hardcore for you I suggest compiling the following C-language program:
#include <stdio.h>
#include <inttypes.h>
typedef union {
char array[sizeof(int32_t)];
int32_t val;
} int32_u;
typedef union {
char array[sizeof(uint32_t)];
uint32_t val;
} uint32_u;
typedef union {
char array[sizeof(uint64_t)];
uint64_t val;
} uint64_u;
typedef union {
char array[sizeof(int64_t)];
int64_t val;
} int64_u;
int swap(char* mem, int size) {
if (size & 1 != 0)
return -1;
int i;
for (i = 0; i < size / 2; i++) {
char tmp = mem[i];
mem[i] = mem[size - i - 1];
mem[size - i - 1] = tmp;
}
return 0;
}
int sys_big_endian() {
int x = 1;
return !(*(char*)&x);
}
int main(int argc, char** argv) {
char* file_name = NULL;
int offset = 0;
char* type = "int32";
int big_endian = 0;
int i;
for(i = 1; i < argc; i++) {
if(!strncmp("-o", argv[i], 2)) {
++i;
sscanf(argv[i], "%d", &offset);
} else if(!strncmp("-t", argv[i], 2)) {
++i;
type = argv[i];
} else if(!strncmp("-e", argv[i], 2)) {
++i;
big_endian = !strncmp("big", argv[i], 3);
} else {
file_name = argv[i];
break;
}
}
if (i < argc - 1) {
fprintf(stderr, "Ignoring extra arguments: ");
++i;
for (; i < argc; i++) {
fprintf(stderr, "%s ", argv[i]);
}
fprintf(stderr, "\n");
}
if (file_name == NULL) {
fprintf(stderr, "Syntax: readint [-o offset] [-t type] [-e endian] <filename>\n"
"Where:\n"
" type 'uint32', 'uint64', 'int32' (default), 'int64'.\n"
" endian 'big' or 'little' (default).\n"
" offset offset in a file from where the read will happen, default is 0.\n"
);
return -1;
}
FILE* fp = fopen(file_name, "rb");
if (fp == NULL) {
fprintf(stderr, "Could not open the file: %s\n", file_name);
return -1;
}
fseek(fp, offset, SEEK_SET);
if (!strncmp("uint32", type, 6)) {
uint32_u u;
fread(u.array, sizeof(u.array), 1, fp);
if (big_endian ^ sys_big_endian())
swap(u.array, sizeof(u.array));
printf("%u\n", u.val);
} else if (!strncmp("int32", type, 5)) {
int32_u u;
fread(u.array, sizeof(u.array), 1, fp);
if (big_endian ^ sys_big_endian())
swap(u.array, sizeof(u.array));
printf("%d\n", u.val);
} else if (!strncmp("uint64", type, 6)) {
uint64_u u;
fread(u.array, sizeof(u.array), 1, fp);
if (big_endian ^ sys_big_endian())
swap(u.array, sizeof(u.array));
printf("%"PRIu64"\n", u.val);
} else if (!strncmp("int64", type, 5)) {
int64_u u;
fread(u.array, sizeof(u.array), 1, fp);
if (big_endian ^ sys_big_endian())
swap(u.array, sizeof(u.array));
printf("%"PRId64"\n", u.val);
} else {
printf("Unknown type: %s\n", type);
}
fclose(fp);
return 0;
}
Then do this:
gcc -o readint readint.c
sudo mv readint /usr/local/bin
Now you have a handy tool called 'readint' with the following syntax:
readint [-o offset] [-t int32|uint32|int64|uint64 ] [-e little|big ] <filename>
you can search the net for modules to interpret MOI files (either Perl or Python). Otherwise, i don't really think you can get the date just like that from the binary file because if you look inside, its really "garbage" since its binary. Although you may also give the strings command a try to see if there are legible strings that match the date

Resources