Why do these small D programs behave differently? - algorithm

I wrote a D implementation of the nul2pfb utility from http://www.dwheeler.com/essays/filenames-in-shell.html, as the link to the source code was broken and I wanted to try to learn D. I noticed that it was rather slow (could barely keep up with the find -print0 that was passing it data, when it should be far faster as it need not do anywhere near as many system calls).
The first implementation works correctly (tested with zsh and bash printf built-ins, as well as /usr/bin/printf). The second, though much faster (probably due to far fewer calls to write()), repeates the first part of its output many times, and fails to output the remainder of its output. What is causing this difference? I am a newbie to D and do not understand.
Working code:
import std.stdio;
import std.conv;
void main()
{
foreach (ubyte[] mybuff; chunks(stdin, 4096)) {
encodebuff (mybuff);
}
}
#safe void encodebuff (ubyte[] mybuff) {
foreach (ubyte i; mybuff) {
char b = to!char(i);
switch (i) {
case 'a': .. case 'z':
case 'A': .. case 'Z':
case '0': .. case '9':
case '/':
case '.':
case '_':
case ':': writeChar(b); break;
default: writeOctal(b); break;
case 0: writeChar ('\n'); break;
case '\\': writeString(`\\`); break;
case '\t': writeString(`\t`); break;
case '\n': writeString(`\n`); break;
case '\r': writeString(`\r`); break;
case '\f': writeString(`\f`); break;
case '\v': writeString(`\v`); break;
case '\a': writeString(`\a`); break;
case '\b': writeString(`\b`); break;
}
}
}
#trusted void writeString (string a)
{
write (a);
}
#trusted void writeOctal (int a)
{
writef ("\\%.4o", a); // leading 0 needed for for zsh printf '%b'
}
#trusted void writeChar (char a)
{
write (a);
}
The broken version:
import std.stdio;
import std.conv;
import std.string;
void main()
{
foreach (ubyte[] mybuff; chunks(stdin, 4096)) {
encodebuff (mybuff);
}
}
#safe void encodebuff (ubyte[] mybuff) {
char[] outstring;
foreach (ubyte i; mybuff) {
switch (i) {
case 'a': .. case 'z':
case 'A': .. case 'Z':
case '0': .. case '9':
case '/':
case '.':
case '_':
case ':': outstring ~= to!char(i); break;
case 0: outstring ~= '\n'; break;
default: char[5] mystring;
formatOctal(mystring, i);
outstring ~= mystring;
break;
case '\\': outstring ~= `\\`; break;
case '\t': outstring ~= `\t`; break;
case '\n': outstring ~= `\n`; break;
case '\r': outstring ~= `\r`; break;
case '\f': outstring ~= `\f`; break;
case '\v': outstring ~= `\v`; break;
case '\a': outstring ~= `\a`; break;
case '\b': outstring ~= `\b`; break;
}
writeString (outstring);
}
}
#trusted void writeString (char[] a)
{
write (a);
}
#trusted void formatOctal (char[] b, ubyte a)
{
sformat (b, "\\%.4o", a); // leading 0 needed for zsh printf '%b'
}
Tests: (note that filelist is a NUL-delimited list of files generated by find -print0 on my home directory, and filelist2.txt is generated from filelist by filelist.txt sed -e 's/\x0/\n/g' > filelist2.txt and is thus the corresponding list of newline-delimited filenames).
# the sed script escapes the backslashes so xargs does not clobber them
diff filelist2.txt <(<filelist.txt char2code2 | sed -e 's/\\/\\\\/g' | xargs /usr/bin/printf "%b\n")
# from within zsh
bash -c 'diff filelist2.txt <(for i in "$(<filelist.txt char2code)"; do printf "%b\n" "$i"; done)'
# from within zsh and bash
diff filelist.txt <(for i in $(char2code <filelist.txt); do printf '%b\0' "$i"; done)
# from within zsh, bash, and dash
for i in $(char2code <filelist.txt); do printf '%b\0' "$i"; done | diff - filelist.txt
A script I made as an acid test:
#!/bin/bash
# this creates a completely random list of NUL-delimited strings
a=''
trap 'rm -f "$a"' EXIT
a="$(mktemp)";
</dev/urandom sed -e 's/\x0\x0/\x0/g' | dd count=2048 of="$a"
test -s "$a" || exit 1
printf '\0' >> "$a"
for i in $("$#" < "$a")
do
printf '%b\0' "$i"
done | diff - "$a"
What is the reason for the difference?
EDIT: I have implemented the changes suggested by #yaz and #MichalMinich and am still seeing wrong results. Specifically, find -print0 | char2code2 (the name of the program, which is in my $PATH) from my home directory results in an exit status of 1 and no output. However, it works from a subsidiary directory with far fewer items. My revised source is below:
import std.stdio;
import std.conv;
import std.format;
import std.array;
void main()
{
foreach (ubyte[] mybuff; chunks(stdin, 4096)) {
encodebuff (mybuff);
}
writeln();
}
void encodebuff (ubyte[] mybuff) {
auto buffer = appender!string();
foreach (ubyte i; mybuff) {
switch (i) {
case 'a': .. case 'z':
case 'A': .. case 'Z':
case '0': .. case '9':
case '/':
case '.':
case '_':
case ':': buffer.put(to!char(i)); break;
case 0: buffer.put('\n'); break;
default: formatOctal(buffer, i); break;
case '\\': buffer.put(`\\`); break;
case '\t': buffer.put(`\t`); break;
case '\n': buffer.put(`\n`); break;
case '\r': buffer.put(`\r`); break;
case '\f': buffer.put(`\f`); break;
case '\v': buffer.put(`\v`); break;
case '\a': buffer.put(`\a`); break;
case '\b': buffer.put(`\b`); break;
}
}
writeString (buffer.data);
// writef(stderr, "Wrote a line\n");
}
#trusted void writeString (string a)
{
write (a);
}
#trusted void formatOctal(Writer)(Writer w, ubyte a)
{
formattedWrite(w, "\\%.4o", a); // leading 0 needed for zsh printf '%b'
}

You need to take writeString outside the foreach in encodebuff. Currently you're writing outstring on each loop without clearing it. The issue #Michal Minich pointed is valid too.

One reason could be that you are appending always 5 chars of char[5] mystring. Function sformat in formatOctal returns the string formatted which might have less than 5 chars (probably slice of the buffer), you should use that string to append to outstring.
Performance advice: use Appender instead of ~= for better performance when building string.

The real problem turned out to be with my LDC installation. It used shared libraries, which aren't supported by its version of druntime.
Recompiling LDC to used static libraries fixed the problem.

Related

How can I remove the segmentation error in the following code?

In the following code, I am getting the segmentation fault. Whenever the query type is 1, we have to push element into the stack, if it is 2 then we have to pop from stack, and if it is 3 then print the maximum value in the stack.
My guess is that the error is present somewhere in the switch case. However, I am unable to spot it. Please help.
#include<bits/stdc++.h>
using namespace std;
int maxinStack(stack<int> st){
int max=st.top();
for (int i=0;i<st.size();i++){
if(st.top()>max){
max=st.top();
}
st.pop();
}
return max;
}
int main() {
/* Enter your code here. Read input from STDIN. Print output to STDOUT */
stack<int> s;
int querySize;
cin >> querySize;
vector<int> queryType(querySize);
queue<int> queryData;
for(int i=0;i<querySize;i++){
cin>>queryType[i];
if(queryType[i]==1){
int x;
cin >> x;
queryData.push(x);
}
}
/*for (int j=0;j<querySize;j++){
cout << queryType.at(j)<<" ";
}
cout << endl;
while(!queryData.empty()){
cout << queryData.front()<<" ";
queryData.pop();
}
cout << endl;
*/
for (int j=0;j<querySize;j++){
switch (queryType[j]){
case 1:{
int y=queryData.front();
s.push(y);
queryData.pop();
}
case 2: s.pop();
case 3: cout << maxinStack(s)<<endl;
}
}
return 0;
}
Assuming inputs are correct, I think you forgot to put break at the end of each case handlers.
So it should be something like:
switch (queryType[j]){
case 1:{
int y=queryData.front();
s.push(y);
queryData.pop();
break;
}
case 2: s.pop(); break;
case 3: cout << maxinStack(s)<<endl; break;
}
Otherwise when it handles case 1 it will still fall-through to the next case handlers so it also does case 2 and case 3. This means that the stack is always empty and it causes segmentation fault when it handles query type of 2 - tried to pop for an empty stack.
As pointed above by #Hanjoung, your switch cases are missing break statements. Just to give you a little context on these break statements, if not specified all the cases after the matched case will also run. For eg:
switch(choice){
case 1:
case 2: // Suppose this case matched
case 3: // This will also run as no break in case 2
break;
case 4: // Will not run as break in case 3
default:
}
The reason you are getting segmentation error is because your "case 2" is popping from empty stack, and reason for running of this "case 2" is absence of break statement in "case 1".

Error with fread() in c, not having expected outut

I'm making a checksum algorithm for one of my classes, I want to read two binary files and run them through a checksum algorithm. The checksum algorithm works (I've tried inputting what I want into the terminal and it works) but I can't get my fread() to work. I've tried printing the outputs and they print the correct stuff, but then a bunch of other random numbers and letters at the end.
Here is my code:
int main(int argc, char *argv[])
{
FILE *ptr1;
FILE *ptr2;
ptr1 = fopen("test1.bin","rb");
ptr2 = fopen("test2.bin","rb");
char file1[sizeof(ptr1)], file2[sizeof(ptr2)];
char sum[sizeof(ptr1)], comp[sizeof(ptr1)];
fread(file1,sizeof(file1),1,ptr1);
fread(file2,sizeof(file2),1,ptr2);
fclose(ptr1);
fclose(ptr2);
/* char file1[20], file2[20];
char sum[20], comp[20];
printf("enter 1\n");
scanf("%s",&file1);
printf("enter 2\n");
scanf("%s",&file2);*/
if(strlen(file1)==strlen(file2)) {
char next='0';
int length = strlen(file1);
for(int i=length-1;i>=0;i--)
{
if(file1[i]=='0' && file2[i]=='0' && next=='0')
{
sum[i]='0';
next='0';
}
else if(file1[i]=='0' && file2[i]=='0' && next=='1')
{
sum[i]='1';
next='0';
}
else if(file1[i]=='0' && file2[i]=='1' && next=='0')
{
sum[i]='1';
next='0';
}
else if(file1[i]=='0' && file2[i]=='1' && next=='1')
{
sum[i]='0';
next='1';
}
else if(file1[i]=='1' && file2[i]=='0' && next=='0')
{
sum[i]='1';
next='0';
}
else if(file1[i]=='1' && file2[i]=='0' && next=='1')
{
sum[i]='0';
next='1';
}
else if(file1[i]=='1' && file2[i]=='1' && next=='0')
{
sum[i]='0';
next='1';
}
else if(file1[i]=='1' && file2[i]=='1' && next=='1')
{
sum[i]='1';
next='1';
}
else
break;
}
for (int i=0;i<length;i++)
{
if(sum[i]=='0')
comp[i]='1';
else
comp[i]='0';
}
if(next=='1')
next='0';
else
next='1';
printf("\nChecksum=%c%s",next, comp);
}
else {
printf("\nInput Lengths do not match");
}
}
test1.bin and test2.bin are two files that contain 8 bytes of binary. I've tried using
printf("this is file 1 %s\n", file1)
printf("this is file 2 %s\n", file2)
to help debug and it outputs
this is file 1 01001001dL
this is file 2 01001000P5L
What is my error here? I'm not great at C so I'm sure its something simple.
You allocate sizeof(ptr1) bytes for file1, but that means the size of the type FILE*, which is likely to be 4. If you know your file contains exactly 8 bytes, do write 8 there.

Uncommon syntax? ---"->"

I was looking at someone else's code and saw something that I'd only been told was a pointer. The thing is the syntax is not pointer syntax. This is the following code:
switch(control)
{
case 'w':
case 'a':
case 's':
case 'd':
execute->backup_grid();// this is the part I was talking about.
execute->fill_space();
execute->update_grid();
execute->fill_space();
execute->find_greatest_tile();
execute->display_grid();
if(execute->full()&&apocalypse)
{
response=-1;
break;
}
else if(execute->block_moves())
{
execute->spawn();
break;
}
else
{
response=0;
break;
}
That is a kind of pointer syntax. In plain C, if you have a pointer to a struct, you can access the value of a member with the "arrow" operator, like this:
struct foo {
int num ;
};
struct foo var ;
struct foo *ptr ;
var.num = 0;
ptr = &var;
printf("%d ", ptr->num); // 0
printf("%d ", (*ptr).num); //0
In essence
ptr->num
is a shorter way of
(*ptr)->num
In your code, you are calling methods with a pointer to some object.

what is the binary-to-text encoding used by protoc --decode?

I am looking at the output of the protoc --decode command and I cannot fathom the encoding used when it encounters bytes :
data {
image: "\377\330\377\340\000\020JFIF\000\001[…]\242\2634G\377\331"
}
The […] was added by me to shorten the output.
What encoding is this?
Edit
So based on Bruce's answer I wrote my own utility in order to generate sample data from a shell script :
public static void main(String[] parameters) throws IOException {
File binaryInput = new File(parameters[0]);
System.out.println("\""+TextFormat.escapeBytes(ByteString.readFrom(new FileInputStream(binaryInput)))+"\"");
}
}
that way I can call serialize my binaries and insert them in a text serialization of a protobuf before calling protoc --encode on it :
IMAGE=$(mktemp)
OUTPUT=$(mktemp)
BIN_INSTANCE=$(mktemp)
echo -n 'capture: ' > $IMAGE
java -cp "$HOME/.m2/repository/com/google/protobuf/protobuf-java/3.0.0/protobuf-java-3.0.0.jar:target/protobuf-generator-1.0.0-SNAPSHOT.jar" protobuf.BinarySerializer image.jpg >> $IMAGE
sed -e 's/{UUID}/'$(uuidgen)'/' template.protobuf > $OUTPUT
sed -i '/{IMAGE}/ {
r '$IMAGE'
d
}' $OUTPUT
cat $OUTPUT | protoc --encode=prototypesEvent.proto> $BIN_INSTANCE
with template.protobuf being :
uuid: "{UUID}"
image {
capture: "{IMAGE}"
}
I am presuming it is the samer as produced by java.
basically:
* between space (0x20) and tilde (0x7e) treat it as an ascii character
* if there is a shortcut (e.g. \n, \r, \ etc) use the shortcut
* otherwise escape the character (octal)
so in the above \377 is 1 byte: 377 octal or 255 in decimal.
"\377\330\377\340 = 255 216 255 224
You should be able to copy the string into a Java/C program and convert it to bytes
The Java code looks to be:
static String escapeBytes(final ByteSequence input) {
final StringBuilder builder = new StringBuilder(input.size());
for (int i = 0; i < input.size(); i++) {
final byte b = input.byteAt(i);
switch (b) {
// Java does not recognize \a or \v, apparently.
case 0x07: builder.append("\\a"); break;
case '\b': builder.append("\\b"); break;
case '\f': builder.append("\\f"); break;
case '\n': builder.append("\\n"); break;
case '\r': builder.append("\\r"); break;
case '\t': builder.append("\\t"); break;
case 0x0b: builder.append("\\v"); break;
case '\\': builder.append("\\\\"); break;
case '\'': builder.append("\\\'"); break;
case '"' : builder.append("\\\""); break;
default:
// Only ASCII characters between 0x20 (space) and 0x7e (tilde) are
// printable. Other byte values must be escaped.
if (b >= 0x20 && b <= 0x7e) {
builder.append((char) b);
} else {
builder.append('\\');
builder.append((char) ('0' + ((b >>> 6) & 3)));
builder.append((char) ('0' + ((b >>> 3) & 7)));
builder.append((char) ('0' + (b & 7)));
}
break;
}
}
return builder.toString();
}
taken from com.google.protobuf.TextFormatEscaper

How to create | in one's own shell?

I'm actually doing my own shell.
I have done the following special characters:
int commande(int fin, int fout, char * com, char * param, int * bg){
// execute a command
(ex. ls –l)
int symbole;
char *mot;
pid_t pid;
symbole = parsing();
switch(symbole){
case 0: // NL
case 1: // ;
case 2: // &
case 3: // <
case 4: // >
case 5: // | (Here I have some issues when I try to redirect the output of a command).
(correspond à ctrl+D)
case 10:// Mot
default:
}
return;
}
But I have some issues to do the redirection of an output when it is piped " |", when I have two instructions that follow themselves. Indeed I have tried the following operations which have all worked:
>myShell ps > fich
>myShell ls -l | wc -l
But not this one:
>myShell ls -l | wc -l >file
here are the two cases specifically developped. I think that the issue is in the case 5 and not in the case 4 because the first command I tried worked (which I shew you above).
case 4: // SYMBOLE : >
if(output==0){
output=1;
execute=1;
for (l=0;l<10;l++){
eltsoutput[l]=eltsCommande[l];
}
}
break;
case 5: // SYMBOLE : |
//if(tube==0){
/*for (l=0;l<10;l++){
eltstube[l]=eltsCommande[l];
}*/
p2=fork();
if(p2==0){
if(tube==0){
freopen( "fichtmp", "w", stdout );
execvp(eltsCommande[0], eltsCommande);
}
return(0);
}
else{ if(background==0){ // SANS MOD BG ATTENDRE FIN FILS
waitpid(p2, NULL, 0);
}
tube=1;
execute=1;
}
break;
Can you help me finding a way to execute two commands at the same time with | and that allow their result to go to a file?
In my shell, the case one work in the case of a redirection with an instruction ";":
}else if(output==1){
close(1);
int filew = creat(eltsCommande[0], 0644);
execvp(eltsoutput[0], eltsoutput);
Maybe I should use this code to make it work?
Looking at the NetBSD /bin/sh source code, I see the following pipe implementation:
static int
sh_pipe(int fds[2])
{
int nfd;
if (pipe(fds))
return -1;
if (fds[0] < 3) {
nfd = fcntl(fds[0], F_DUPFD, 3);
if (nfd != -1) {
close(fds[0]);
fds[0] = nfd;
}
}
if (fds[1] < 3) {
nfd = fcntl(fds[1], F_DUPFD, 3);
if (nfd != -1) {
close(fds[1]);
fds[1] = nfd;
}
}
return 0;
}
This function is called by evalpipe with 2 file descriptors:
STATIC void
evalpipe(union node *n)
{
struct job *jp;
struct nodelist *lp;
int pipelen;
int prevfd;
int pip[2];
TRACE(("evalpipe(0x%lx) called\n", (long)n));
pipelen = 0;
for (lp = n->npipe.cmdlist ; lp ; lp = lp->next)
pipelen++;
INTOFF;
jp = makejob(n, pipelen);
prevfd = -1;
for (lp = n->npipe.cmdlist ; lp ; lp = lp->next) {
prehash(lp->n);
pip[1] = -1;
if (lp->next) {
if (sh_pipe(pip) < 0) {
if (prevfd >= 0)
close(prevfd);
error("Pipe call failed");
}
}
if (forkshell(jp, lp->n, n->npipe.backgnd ? FORK_BG : FORK_FG) == 0) {
INTON;
if (prevfd > 0) {
close(0);
copyfd(prevfd, 0, 1);
close(prevfd);
}
if (pip[1] >= 0) {
close(pip[0]);
if (pip[1] != 1) {
close(1);
copyfd(pip[1], 1, 1);
close(pip[1]);
}
}
evaltree(lp->n, EV_EXIT);
}
if (prevfd >= 0)
close(prevfd);
prevfd = pip[0];
close(pip[1]);
}
if (n->npipe.backgnd == 0) {
exitstatus = waitforjob(jp);
TRACE(("evalpipe: job done exit status %d\n", exitstatus));
}
INTON;
}
evalpipe is called in a switch statement in evaltree as follows:
case NPIPE:
evalpipe(n);
do_etest = !(flags & EV_TESTED);
break;
... which is called by the infinite loop in evalloop, and percolates up the tree till it gets to the eval function. I hope this helps.

Resources