How could I generate my abstract tree using this makefile? Why I see only an error at 1 line? - makefile

def.h
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
typedef enum
{
NPROGRAM,
NVARDECLLIST,
NFUNCDECLLIST,
NVARDECL,
NIDLIST,
NOPTPARAMLIST,
NSTATLIST,
NASSIGNSTAT,
NWHILESTAT,
NIFSTAT,
NFORSTAT,
NRELEXPR,
NRETURNSTAT,
NREADSTAT,
NLOGICEXPR,
NWRITESTAT,
NNEGEXPR,
NFUNCCALL,
NEXPRLIST,
NCONDEXPR,
NPARAMDECL,
NMATHEXPR,
NCASTING
} Nonterminal;
typedef enum
{
T_BREAK,
T_TYPE,
T_BOOLEAN,
T_INTCONST,
T_REALCONST,
T_BOOLCONST,
T_WRITEOP,
T_STRCONST,
T_ID,
T_NONTERMINAL
} Typenode;
typedef union
{
int ival;
float rval;
char *sval;
enum {FALSE, TRUE} bval;
} Value;
typedef struct snode
{
Typenode type;
Value value;
struct snode *p1, *p2, *p3;
} Node;
typedef Node *Pnode;
char *newstring(char*);
int yylex();
Pnode nontermnode(Nonterminal, int),
ntn(Nonterminal),
idnode(),
keynode(Typenode, int),
intconstnode(),
realconstnode(),
strconstnode(),
boolconstnode(),
newnode(Typenode);
void yyerror(),
treeprint(Pnode, int);
lexer.lex
%{
#include "parser.h"
#include "def.h"
int line = 1;
Value lexval;
%}
%option noyywrap
spacing ([ \t])+
commento "#"(.)*\n
letter [A-Za-z]
digit [0­9]
intconst {digit}+
strconst \"([^\"])*\"
boolconst false|true
realconst {intconst}\.{digit}+
id {letter}({letter}|{digit})*
sugar [ \( \) : , ; \. \+ \- \* / ]
%%
{spacing} ;
\n {line++;}
integer {return(INTEGER);}
string {return(STRING);}
boolean {return(BOOLEAN);}
real {return(REAL);}
void {return(VOID);}
func {return(FUNC);}
body {return(BODY);}
end {return(END);}
else {return(ELSE);}
while {return(WHILE);}
do {return(DO);}
for {return(FOR);}
to {return(TO);}
return {return(RETURN);}
read {return(READ);}
write {return(WRITE);}
writeln {return(WRITELN);}
and {return(AND);}
or {return(OR);}
not {return(NOT);}
if {return(IF);}
then {return(THEN);}
break {return(BREAK);}
"<=" {return(LEQ);}
">=" {return(GEQ);}
"!=" {return(NEQ);}
"==" {return(EQU);}
"<" {return(LT);}
">" {return(GT);}
{intconst} {lexval.ival = atoi(yytext); return(INTCONST);}
{strconst} {lexval.sval = newstring(yytext); return(STRCONST);}
{boolconst} {lexval.bval = (yytext[0] == 'f' ? FALSE : TRUE); return(BOOLCONST);}
{realconst} {lexval.rval = atof(yytext); return(REALCONST);}
{id} {lexval.sval = newstring(yytext); return(ID);}
{sugar} {return(yytext[0]);}
. {return(ERROR);}
%%
char *newstring(char *s)
{
char *p;
p = malloc(strlen(s)+1);
strcpy(p, s);
return(p);
}
makefile
bup: lexer.o parser.o tree.o
cc -g -o bup lexer.o parser.o tree.o
lexer.o: lexer.c parser.h def.h
cc -g -c lexer.c
parser.o: parser.c def.h
cc -g -c parser.c
tree.o: tree.c def.h
cc -g -c tree.c
lexer.c: lexer.lex parser.y parser.h parser.c def.h
flex -o lexer.c lexer.lex
parser.h: parser.y def.h
bison -vd -o parser.c parser.y
parser.y
%{
#include "def.h"
#define YYSTYPE Pnode
extern char *yytext;
extern Value lexval;
extern int line;
extern FILE *yyin;
Pnode root = NULL;
%}
%token ID FUNC BODY END BREAK IF THEN ELSE TYPE WHILE DO FOR RETURN READ WRITE WRITELN
%token AND OR INTCONST REALCONST BOOLCONST STRCONST INTEGER REAL NOT STRING BOOLEAN VOID PLUS MINUS TIMES SLASH
%token LEQ GEQ NEQ EQU GT LT TO ERROR
%%
program : var-decl-list func-decl-list body '.' {root = $$ = ntn(NPROGRAM);
root->p1 = ntn(NVARDECLLIST);
root->p2 = ntn(NFUNCDECLLIST);
root->p1->p1 = $1;
root->p2->p1 = $2;
root->p3 = $3;}
;
var-decl-list : var-decl var-decl-list {$$ -> p1=$1;
$1->p3=$2;}
| {$$ = NULL;}
;
var-decl : id-list ':' type ';' {$$ = ntn(NVARDECL);
$$ -> p1 = ntn(NIDLIST);
$$->p1->p1=$1; $$ -> p1 -> p3 = $3;}
;
id-list : ID {$$ = idnode();} ',' id-list {$$ = $2;
$2 -> p3 = $4;}
| ID {$$ = idnode();}
;
type : INTEGER {$$ = keynode(T_TYPE, INTEGER);}
| REAL {$$ = keynode(T_TYPE, REAL);}
| STRING {$$ = keynode(T_TYPE, STRING);}
| BOOLEAN {$$ = keynode(T_TYPE, BOOLEAN);}
| VOID {$$ = keynode(T_TYPE, VOID); }
;
func-decl-list : func-decl func-decl-list {$$ -> p1 = $1;
$1 -> p3 = $2;}
| {$$ = NULL;}
;
func-decl : FUNC ID {$$ = idnode();} '(' opt-param-list ')' ':' type var-decl-list body ';' {$$ -> p1 = $3;
$$ -> p2 = ntn(NOPTPARAMLIST);
$$ -> p2 ->p1=$5;
$$ -> p2 -> p3 = $8;
$$ -> p2 -> p3->p3 = ntn(NVARDECLLIST);
$$ -> p2 -> p3->p3->p1 = $9;
$$ -> p2 -> p3->p3->p3 = $10;}
;
opt-param-list : param-list {$$ = $1;}
| {$$ = NULL;}
;
param-list : param-decl ',' param-list {$$ = $1;
$1 -> p3 = $3;}
| param-decl
;
param-decl : ID {$$ = idnode();} ':' type {$$=ntn(NPARAMDECL);
$$ -> p1 = $2;
$$ -> p2 = $4;}
;
body : BODY stat-list END {$$ = ntn(NSTATLIST);
$$->p1=$2;}
;
stat-list : stat ';' stat-list {$$ = $1;
$1 -> p3 = $3;}
| stat ';' {$$=$1;}
;
stat : assign-stat
| if-stat
| while-stat
| for-stat
| return-stat
| read-stat
| write-stat
| func-call
| BREAK {$$ = newnode(T_BREAK);}
;
assign-stat : ID {$$ = idnode();} '=' expr {$$ = ntn(NASSIGNSTAT);
$$ -> p1 = $2;
$$ -> p2 = $4;}
;
if-stat : IF expr THEN stat-list opt-else-stat END {$$ = ntn(NIFSTAT);
$$ -> p1 = $2;
$$ -> p2 = ntn(NSTATLIST);
$$ ->p2 -> p3 = $5;}
;
opt-else-stat : ELSE stat-list {$$ = ntn(NSTATLIST);
$$->p1=$2;}
| {$$ = NULL;}
;
while-stat : WHILE expr DO stat-list END {$$ = ntn(NWHILESTAT);
$$->p1=$2;
$$->p2=ntn(NSTATLIST);
$$->p2->p1=$4;}
;
for-stat : FOR ID {$$=idnode();} '=' expr TO expr DO stat-list END {$$ = ntn(NFORSTAT);
$$->p1=$3;
$$->p2=$5;
$$->p2->p3=$7;
$$->p2->p3->p3=ntn(NSTATLIST);
$$->p2->p3->p3->p1=$9;}
;
return-stat : RETURN opt-expr {$$ = ntn(NRETURNSTAT);
$$->p1=$2;}
;
opt-expr : expr {$$=$1;}
| {$$=NULL;}
;
read-stat : READ '(' id-list ')' {$$ = ntn(NREADSTAT);
$$->p1=ntn(NIDLIST);
$$->p1->p1=$3;}
;
write-stat : write-op '(' expr-list ')' {$$ = ntn(NWRITESTAT);
$$->p1=$1;
$$->p2=ntn(NEXPRLIST);
$$->p2->p1=$3;}
;
write-op : WRITE {$$ = keynode(T_WRITEOP, WRITE);}
| WRITELN {$$ = keynode(T_WRITEOP, WRITELN);}
;
expr-list : expr ',' expr-list {$$=$1;
$1->p3=$3;}
| expr
;
expr : expr logic-op bool-term { $$=$2;
$2->p1=$1;
$2->p2=$3;}
| bool-term
;
logic-op : AND {$$=nontermnode(NLOGICEXPR, AND);}
| OR {$$=nontermnode(NLOGICEXPR, OR);}
;
bool-term : rel-term rel-op rel-term {$$=$2;
$2->p1=$1;
$2->p2=$3;}
| rel-term
;
rel-op : EQU {$$=nontermnode(NRELEXPR, EQU);}
| NEQ {$$=nontermnode(NRELEXPR, NEQ);}
| GT {$$=nontermnode(NRELEXPR, GT);}
| GEQ {$$=nontermnode(NRELEXPR, GEQ);}
| LT {$$=nontermnode(NRELEXPR, LT);}
| LEQ {$$=nontermnode(NRELEXPR, LEQ);}
;
rel-term : rel-term low-prec-op low-term {$$=$2;
$2->p1=$1;
$2->p2=$3;}
| low-term
;
low-prec-op : PLUS {$$=nontermnode(NMATHEXPR, PLUS);}
| MINUS {$$=nontermnode(NMATHEXPR, MINUS);}
;
low-term : low-term high-prec-op factor {$$=$2;
$2->p1=$1;
$2->p2=$3;}
| factor
;
high-prec-op : TIMES {$$=nontermnode(NMATHEXPR, TIMES);}
| SLASH {$$=nontermnode(NMATHEXPR, SLASH);}
;
factor : unary-op factor {$$=$1;
$1->p3=$2;}
| '(' expr ')' {$$=$2;}
| ID {$$=idnode();}
| const {$$=$1;}
| func-call {$$=$1;}
| cond-expr {$$=$1;}
| cast '(' expr ')' {$$=$1;
$1->p3=$3;}
;
unary-op : MINUS {$$=nontermnode(NNEGEXPR, MINUS);}
| NOT {$$=nontermnode(NNEGEXPR, NOT);}
;
const : INTCONST {$$=intconstnode();}
| REALCONST {$$=realconstnode();}
| STRCONST {$$=strconstnode();}
| BOOLCONST {$$=boolconstnode();}
;
func-call : ID {$$=idnode();} '(' opt-expr-list ')' {$$ = ntn(NFUNCCALL);
$$->p1=$2; $$->p2=$4;}
;
opt-expr-list : expr-list {$$=ntn(NEXPRLIST); $$->p1=$1;}
| {$$=NULL;}
;
cond-expr : IF expr THEN expr ELSE expr END {$$=ntn(NCONDEXPR);
$$->p1=$2;
$$->p2=$4;
$$->p3=$6;}
;
cast : INTEGER {$$=nontermnode(NCASTING,INTEGER);}
| REAL {$$=nontermnode(NCASTING, REAL);}
;
%%
Pnode ntn(Nonterminal nonterm)
{
Pnode p = newnode(T_NONTERMINAL);
p->value.rval = nonterm;
return(p);
}
Pnode nontermnode(Nonterminal nonterm, int valore)
{
Pnode p = newnode(T_NONTERMINAL);
p->value.rval = nonterm;
p->value.ival = valore;
return(p);
}
Pnode idnode()
{
Pnode p = newnode(T_ID);
p->value.sval = lexval.sval;
return(p);
}
Pnode keynode(Typenode keyword, int valore)
{
Pnode p = newnode(keyword);
p->value.ival = valore;
return p;
}
Pnode intconstnode()
{
Pnode p = newnode(T_INTCONST);
p->value.ival = lexval.ival;
return(p);
}
Pnode realconstnode()
{
Pnode p = newnode(T_REALCONST);
p->value.rval = lexval.rval;
return(p);
}
Pnode strconstnode()
{
Pnode p = newnode(T_STRCONST);
p->value.sval = lexval.sval;
return(p);
}
Pnode boolconstnode()
{
Pnode p = newnode(T_BOOLCONST);
p->value.bval = lexval.bval;
return(p);
}
Pnode newnode(Typenode tnode)
{
Pnode p = malloc(sizeof(Node));
p->type = tnode;
p->p1 = p->p2 = p->p3 = NULL;
return(p);
}
int main()
{
int result;
printf("----------------------------------------------");
yyin = stdin;
if((result = yyparse()) == 0)
treeprint(root, 0);
return(result);
}
void yyerror()
{
fprintf(stderr, "Line %d: syntax error on symbol \"%s\"\n",
line, yytext);
exit(-1);
}
tree.c
#include "def.h"
char* tabtypes[] =
{
"T_BREAK",
"T_TYPE",
"T_BOOLEAN",
"T_INTCONST",
"T_REALCONST",
"T_BOOLCONST",
"T_WRITEOP",
"T_STRCONST",
"T_ID",
"T_NONTERMINAL"
};
char* tabnonterm[] =
{
"PROGRAM",
"NVARDECLLIST",
"NFUNCDECLLIST",
"NVARDECL",
"NIDLIST",
"NOPTPARAMLIST",
"NSTATLIST",
"NASSIGNSTAT",
"NWHILESTAT",
"NIFSTAT",
"NFORSTAT",
"NRELEXPR",
"NRETURNSTAT",
"NREADSTAT",
"NLOGICEXPR",
"NWRITESTAT",
"NNEGEXPR",
"NFUNCCALL",
"NEXPRLIST",
"NCONDEXPR",
"NPARAMDECL",
"NMATHEXPR",
"NCASTING"
};
void treeprint(Pnode root, int indent)
{
int i;
Pnode p;
for(i=0; i<indent; i++)
printf(" ");
printf("%s", (root->type == T_NONTERMINAL ? tabnonterm[root->value.ival] : tabtypes[root->type]));
if(root->type == T_ID || root->type == T_STRCONST)
printf(" (%s)", root->value.sval);
else if(root->type == T_INTCONST)
printf(" (%d)", root->value.ival);
else if(root->type == T_BOOLCONST)
printf(" (%s)", (root->value.ival == TRUE ? "true" : "false"));
printf("\n");
for(p=root->p1; p != NULL; p = p->p3)
treeprint(p, indent+1);
}
prog ( File with example of grammar)
numero: integer;
func fattoriale(n: integer): integer
fact: integer;
body
if n == 0 then
fact = 1;
else
fact = n * fattoriale(n­1);
end;
return fact;
end;
func stampaFattoriali(tot: integer): void
i, f: integer;
body
for i=0 to tot do
f = fattoriale(i);
writeln("Il fattoriale di ", i, "è ", f);
end;
end;
body
read(numero);
if numero < 0 then
writeln("Il numero ", numero, "non è valido");
else
stampaFattoriali(numero);
end.
When i type make on terminal , it create the files : tree.o parser.o parser.h parser.c lexer.c lexer.o bup.
When i execute the bup file , the terminal show me this error message :
"ine 1: syntax error on symbol "
So it don't generate the abstract tree.
I don't know if this error refers to the prog or lexer.lex or parse.y file.

Yacc/bison assign their own numbers to terminal tokens, and assume that the lexer will use those numbers. But you provide your own numbers in the def.h header, which yacc/bison knows absolutely nothing about. It will not correctly interpret the codes returned by yylex which will make it impossible to parse correctly.
So don't do that.
Let bison generate the token codes, use the header file it generates (parser.h with your settings), and don't step on its feet by trying to define the enum values yourself.
As a hint about debugging, that is really way too much code to have written before you start debugging, and that fact is exactly illustrated by your complaint at the end of your question that you don't know where to look for the error. Instead of writing the whole project and then hoping it works as a whole, write little pieces and debug them as you go. Although you need to parser to generate the token type values, you don't need to run the parser to test your scanner. You can write a simple program which repeatedly calls yylex and prints the returned types and values. (Or you can just enable flex debugging with the -d command line option, which is even simpler.)
Similarly, you should be able to test your AST methods by writing some test functions which use these methods to build, walk and print out an AST. Make sure that they produce the expected results.
Only once you have evidence that the lexer is producing the correct tokens and that your AST construction functions work should you start to debug your parser. Again, you will find it much easier if you use the built-in debugging facilities; see the Debugging your parser section of the Bison manual for instructions.
Good luck.

Related

How do I prove that c-'a' is within [0,26)?

Suppose I have this code:
#include "share/atspre_staload.hats"
val letters = arrayref_make_elt<bool>(i2sz(26), false)
implement main0() =
begin
println!("found('a'): ", letters[0]);
println!("found('f'): ", letters[5]);
end
Which produces the output:
found('a'): false
found('f'): false
I'd like to index into letters by character, instead. Actually, given any character I'd like to index into letters only if it's a valid index.
So this almost works:
#include "share/atspre_staload.hats"
val letters = arrayref_make_elt<bool>(i2sz(26), false)
typedef letter = [c:int | c >= 'a' && c <= 'z'] char(c)
typedef letteri = [i:int | i >= 0 && i < 26] int(i)
(* fn letter2index(c: letter): letteri = c - 'a' *) (* #1 *)
fn letter2index(c: letter): letteri =
case- c of
| 'a' => 0
| 'f' => 5
fn trychar(c: char): void = (* #2 *)
if c >= 'a' && c <= 'z' then
println!("found('", c, "'): ", letters[letter2index(c)])
implement main0() =
begin
trychar('a');
trychar('f');
trychar('+'); (* #3 *)
end
If I change char to letter at #2 and remove trychar('+') at #3, then this compiles. But of course I'd rather perform subtraction at #1 rather than have a big case of letters, and I'd to apply trychar to any kind of char, not just a letter.
The code you want can be written as follows:
#include
"share/atspre_staload.hats"
stadef
isletter(c:int): bool = ('a' <= c && c <= 'z')
val
letters = arrayref_make_elt<bool>(i2sz(26), false)
fn
letter2index
{ c:int
| isletter(c)}
(c: char(c)): int(c-'a') = char2int1(c) - char2int1('a')
fn
trychar
{c:int}
(c: char(c)): void =
if
(c >= 'a') * (c <= 'z')
then
println!("found('", c, "'): ", letters[letter2index(c)])
implement main0() =
begin
trychar('a');
trychar('f');
trychar('+');
end
In your original code, quantifiers (forall and exists) were not used correctly.

Change the parsing language

I'm using a modal-SAT solver. This solver is unfortunately using Flex and Bison, both languages that I don't master...
I wanted to change one syntax to another, but I've got some issue to do it, even after tutorials about Flex-Lexer and Bison.
So here is the problem :
I want to be able to parse such modal logic formulas :
In the previous notation, such formulas were written like this :
(NOT (IMP (AND (ALL R0 (IMP C0 C1)) (ALL R0 C0)) (ALL R0 C1)))
And here are the Flex/Bisons file used to parse them :
alc.y
%{
#include "fnode.h"
#define YYMAXDEPTH 1000000
fnode_t *formula_as_tree;
%}
%union {
int l;
int i;
fnode_t *f;
}
/* Tokens and types */
%token LP RP
%token ALL SOME
%token AND IMP OR IFF NOT
%token TOP BOT
%token RULE CONC
%token <l> NUM
%type <f> formula
%type <f> boolean_expression rule_expression atomic_expression
%type <f> other
%type <i> uboolop bboolop nboolop ruleop
%type <l> rule
%% /* Grammar rules */
input: formula {formula_as_tree = $1;}
;
formula: boolean_expression {$$ = $1;}
| rule_expression {$$ = $1;}
| atomic_expression {$$ = $1;}
;
boolean_expression: LP uboolop formula RP
{$$ = Make_formula_nary($2,empty_code,$3);}
| LP bboolop formula formula RP
{$$ = Make_formula_nary($2,empty_code, Make_operand_nary($3,$4));}
| LP nboolop formula other RP
{$$ = Make_formula_nary($2,empty_code,Make_operand_nary($3,$4));}
;
rule_expression: LP ruleop rule formula RP {$$ = Make_formula_nary($2,$3,$4);}
;
atomic_expression: CONC NUM {$$ = Make_formula_nary(atom_code,$2,Make_empty());}
| TOP {$$ = Make_formula_nary(top_code,empty_code,Make_empty());}
| BOT {$$ = Make_formula_nary(bot_code,empty_code,Make_empty());}
;
other: formula other {$$ = Make_operand_nary($1,$2);}
| {$$ = Make_empty();}
;
uboolop: NOT {$$ = not_code;}
;
bboolop: IFF {$$ = iff_code;}
| IMP {$$ = imp_code;}
;
nboolop: AND {$$ = and_code;}
| OR {$$ = or_code;}
;
ruleop: SOME {$$ = dia_code;}
| ALL {$$ = box_code;}
rule: RULE NUM {$$ = $2;}
;
%% /* End of grammar rules */
int yyerror(char *s)
{
printf("%s\n", s);
exit(0);
}
alc.lex
%{
#include <stdio.h>
#include "fnode.h"
#include "y.tab.h"
int number;
%}
%%
[ \n\t] ;
"(" return LP;
")" return RP;
"ALL" return ALL;
"SOME" return SOME;
"AND" return AND;
"IMP" return IMP;
"OR" return OR;
"IFF" return IFF;
"NOT" return NOT;
"TOP" return TOP;
"BOTTOM" return BOT;
"R" return RULE;
"C" return CONC;
0|[1-9][0-9]* {
sscanf(yytext,"%d",&number);
yylval.l=number;
return NUM;
}
. {
/* Error function */
fprintf(stderr,"Illegal character\n");
return -1;
}
%%
Now, let's write our example but in the new syntax that I want to use :
begin
(([r0](~pO | p1) & [r0]p0) | [r0]p1)
end
Major problems for me that are blocking me to parse this new syntax correctly is :
IMP (A B) is now written ~B | A (as in the boolean logic (A => B) <=> (~B v A)).
ALL RO is now written [r0].
SOME RO is now written <r0>.
IFF (A B) is now written (~B | A) & (~A | B). (IFF stands for if and only if)
Here is the small list of what are the new symbol, even if I don't know how to parse them :
"(" return LP;
")" return RP;
"[]" return ALL;
"<>" return SOME;
"&" return AND;
"IMP" return IMP;
"|" return OR;
"IFF" return IFF;
"~" return NOT;
"true" return TOP;
"false" return BOT;
"r" return RULE;
"p" return CONC;
I assume that only theses 2 files will change, Because it should still be able to read the previous syntaxe, by compiling the source code with other .y and .lex
But I'm asking your help to know exactly how to write it down :/
Thanks in advance !
Tommi Junttila's BC Package implements a language for Boolean expressions and circuits using Bison and Flex.
To study the source files won't fully replace going through a proper Bison/Flex tutorial, but it certainly should give you a good start.
For someone who would have the exact same problem (I assume that this problem is quite rare :) )
With the good vocabulary, it's much easier to google the problem and find a solution.
The first notation
(NOT (IMP (AND (ALL R0 (IMP C0 C1)) (ALL R0 C0)) (ALL R0 C1)))
is the ALC format.
The other notation
begin
(([r0](~pO | p1) & [r0]p0) | [r0]p1)
end
is the InToHyLo format.
And there is a tool called the formula translation tool ("ftt") developed and bundled with Spartacus (http://www.ps.uni-saarland.de/spartacus/). It can translate between all the formats of provers.
Using this tool is a little hack who avoid dealing with the Flex/Bison languages.
One just needs to translate one problem to another, problems will be equivalent and it's very fast to translate.

Is there an easy way to make Jison calculator parser return symbolic results?

Jison parsers return the calculated result:
calculator.parse("2^3"); // returns 8
calculator.parse("x^2"); // gives a parse err
I would like that it return the symbolic expression:
calculator.parse("x^2");
// should return
// "Math.pow(x,2)"
And
calculator.parse("x^(x+1)");
// should return
// "Math.pow(x,x+1)"
And
calculator.parse("cos(x)");
// should return
// "Math.cos(x)"
If what you need is simple enough, you might get by by modifying the calculator. For instance:
Add an IDENTIFIER token after NUMBER in the list of tokens:
[a-z] return 'IDENTIFIER'
This allows a single lower case letter to serve as identifie.
Modify the e '^' e rule to return a string rather than the computed value:
| e '^' e
{$$ = "Math.pow(" + $1 + "," + $3 + ");"}
Add a new rule to the list of rules for e:
| IDENTIFIER
(No explicit action needed.)
With these changes parsing, x^2 results in "Math.pow(x,2);"
To support the operators, the other rules would have to be modified like the one for e '^' e to return strings rather than the result of the math.
This is extremely primitive and won't optimize things that could be optimized. For instance, 1^2 will be output as "Math.pow(1, 2)" when it could be optimized to 1.
(Based on #Louis answer and comments)
Here is the code for a basic symbolic calculator using jison:
/* description: Parses and executes mathematical expressions. */
/* lexical grammar */
%lex
%%
\s+ /* skip whitespace */
(acos|asin|atan|atan2|cos|log|sin|sqrt|tan) return 'FUNCTION'
[0-9]+("."[0-9]+)?\b return 'NUMBER'
[a-z] return 'IDENTIFIER'
"|" return '|'
"*" return '*'
"/" return '/'
"-" return '-'
"+" return '+'
"^" return '^'
"!" return '!'
"%" return '%'
"(" return '('
")" return ')'
"PI" return 'PI'
"E" return 'E'
<<EOF>> return 'EOF'
. return 'INVALID'
/lex
/* operator associations and precedence */
%left '+' '-'
%left '*' '/'
%left '^'
%right '!'
%right '%'
%left UMINUS
%start expressions
%% /* language grammar */
expressions
: e EOF
{ typeof console !== 'undefined' ? console.log($1) : print($1);
return $1; }
;
e
: e '+' e
{$$ = $1 + " + " + $3;}
| e '-' e
{$$ = $1 + "-" + $3;}
| e '*' e
{$$ = $1 + "*" + $3;}
| e '/' e
{$$ = $1 + "/" + $3;}
| e '^' e
{$$ = "Math.pow(" + $1 + ", " + $3 + ");"}
| e '!'
{{
$$ = (function fact (n) { return n==0 ? 1 : fact(n-1) * n })($1);
}}
| e '%'
{$$ = $1/100;}
| '-' e %prec UMINUS
{$$ = -$2;}
| '(' e ')'
{$$ = $2;}
| FUNCTION '(' e ')'
{$$ = "Math." + $1 + "(" + $3 + ")";}
| '|' e '|'
{$$ = "Math.abs(" + $2 + ")";}
| NUMBER
{$$ = Number(yytext);}
| E
{$$ = Math.E;}
| PI
{$$ = Math.PI;}
| IDENTIFIER
| FUNCTION
;
No.
Not an easy way. Depending on what you call 'easy'. You need to define tokens. And then symbolic manipulation of those tokens. And stuff.
I don't think that calculator would be a good starting point, and its such a trivial thing anyway that throwing it away and writing your JS symbolic parser grammar from scratch wouldn't be too much more of a problem.
Just to save some google time for everyone, this is what we are talking about: http://zaach.github.io/jison/demos/calc/

syntactic predicates - Upgrading from Antlr 3 to Antlr 4

I have syntactic predicated that I have to convert into the Antlr 4. The grammar is not written my me so I have no idea how to convert them in a meaningful way. These are the main variations of the grammar that I have to convert.
1.
simpleSelector
: elementName
((esPred)=>elementSubsequent)*
| ((esPred)=>elementSubsequent)+
;
esPred
: HASH | DOT | LBRACKET | COLON
;
elementSubsequent
: HASH
| cssClass
| attrib
| pseudo
;
2.
fragment EMS :; // 'em'
fragment EXS :; // 'ex'
fragment LENGTH :; // 'px'. 'cm', 'mm', 'in'. 'pt', 'pc'
fragment ANGLE :; // 'deg', 'rad', 'grad'
fragment TIME :; // 'ms', 's'
fragment FREQ :; // 'khz', 'hz'
fragment DIMENSION :; // nnn'Somethingnotyetinvented'
fragment PERCENTAGE :; // '%'
NUMBER
:(
'0'..'9' ('.' '0'..'9'+)?
| '.' '0'..'9'+
)
(
(E (M|X))=>
E
(
M { $type = EMS; } //action in lexer rule 'NUMBER' must be last element of single outermost alt
| X { $type = EXS; }
)
| (P(X|T|C))=>
P
(
X
| T
| C
)
{ $type = LENGTH; }
| (C M)=>
C M { $type = LENGTH; }
| (M (M|S))=>
M
(
M { $type = LENGTH; }
| S { $type = TIME; }
)
| (I N)=>
I N { $type = LENGTH; }
| (D E G)=>
D E G { $type = ANGLE; }
| (R A D)=>
R A D { $type = ANGLE; }
| (S)=>S { $type = TIME; }
| (K? H Z)=>
K? H Z { $type = FREQ; }
| IDENT { $type = DIMENSION; }
| '%' { $type = PERCENTAGE; }
| // Just a number
)
;
3.
URI : U R L
'('
((WS)=>WS)? (URL|STRING) WS?
')'
;
some guidance is greatly appreciated.
Edit:
Is it as below.
simpleSelector
: elementName
(elementSubsequent)*
| (elementSubsequent)+
;
Syntactic predicates were only used to work around a prediction weakness in ANTLR 3 that is not present in ANTLR 4. You can simply remove them during your transition to ANTLR 4.
Edit:
A syntactic predicate in ANTLR 3 had the following form:
(stuff) =>
Wherever you see that form in your grammar, just remove it. Here's what your second example looks like with the predicates removed.
NUMBER
:(
'0'..'9' ('.' '0'..'9'+)?
| '.' '0'..'9'+
)
(
E
(
M { $type = EMS; }
| X { $type = EXS; }
)
| P
(
X
| T
| C
)
{ $type = LENGTH; }
| C M { $type = LENGTH; }
| M
(
M { $type = LENGTH; }
| S { $type = TIME; }
)
| I N { $type = LENGTH; }
| D E G { $type = ANGLE; }
| R A D { $type = ANGLE; }
| S { $type = TIME; }
| K? H Z { $type = FREQ; }
| IDENT { $type = DIMENSION; }
| '%' { $type = PERCENTAGE; }
| // Just a number
)
;

Hashtable indexed on several fields

I'm currently programming an OCaml module defining a type corresponding to a CPU register. The interface of this module is the following :
(*
* Defines a type which represents a R3000 register.
*)
type t =
| R0 (* Always 0 *)
| AT (* Assembler temporary *)
| V0 | V1 (* Subroutine return values *)
| A0 | A1 | A2 | A3 (* Subroutine arguments *)
| T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 (* Temporary registers *)
| S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 (* Register variables *)
| T8 | T9 (* Temporary registers *)
| K0 | K1 (* Reserved for kernels *)
| GP | SP | FP (* Global/Stack/Frame pointer *)
| RA (* Return address *)
(*
* Conversion from/to [|0, 31|].
*)
val of_int : int -> t
val to_int : t -> int
(*
* Conversion to string for display.
*)
val of_string : string -> t
val to_string : t -> string
However, I would like the implementation to be fast and not too repetitive. For example, I could code the of_int function like this :
let of_int = function
| 0 -> R0
| 1 -> AT
(* ... *)
But it would be awful and unmaintainable. I do not want to do this as it conflicts with my programming religion. Moreover, I would need to do this kind of dirty code not only one time, but for the four functions.
The first solution I found would be to use a preprocessor (either Camlp4 or cpp) to generate the code I want. I find this to be overkill but would use this method if you can't help me with my second idea.
After a bit of thought, I thought I could do something like this :
type regdescr = {
reg : t ;
name : string ;
index : int
}
let regs =
let htbl = Hashtbl.create 32 in
let li = [ (* regdescr defs here *) ] in
List.iter (Hashtbl.add htbl) li ;
htbl
However, in this case, I must choose what field I want to hash. Is there another solution than using three different hashtables in this case ? Maybe a data-structure I do not know about is able to hash over three fields and perform searches on the three of them.
Sorry for the long question for which the answer may be trivial :) .
Looks like a perfect fit for deriving.
(*
* Defines a type which represents a R3000 register.
*)
type t =
| R0 (* Always 0 *)
| AT (* Assembler temporary *)
| V0 | V1 (* Subroutine return values *)
| A0 | A1 | A2 | A3 (* Subroutine arguments *)
| T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 (* Temporary registers *)
| S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 (* Register variables *)
| T8 | T9 (* Temporary registers *)
| K0 | K1 (* Reserved for kernels *)
| GP | SP | FP (* Global/Stack/Frame pointer *)
| RA (* Return address *)
deriving (Enum,Show)
let of_int x = Enum.to_enum<t>(x)
let to_int x = Enum.from_enum<t>(x)
let to_string x = Show.show<t>(x)
let pr = Printf.printf
let () =
pr "%i %i %i\n" (to_int R0) (to_int RA) (to_int T8);
pr "%s %s %s\n"
(to_string (of_int 0)) (to_string (of_int 31)) (to_string (of_int 24));
pr "%s %s %s\n"
(to_string (Enum.pred<t>(A1))) (to_string A1) (to_string (Enum.succ<t>(A1)));
()
Output :
0 31 24
R0 RA T8
A0 A1 A2
Compile with :
ocamlc -pp deriving -I ~/work/contrib/deriving/0.1.1-3.11.1-orig/lib deriving.cma q.ml -o q
Just have three separate hash tables?
Instead of using a hashtable for going from one partial representation of a register to another, have you thought of forcing yourself to always manipulate only pointers to complete descriptions, so that you can access any aspect you like (index, string representation, ...) with just a pointer dereference?
You can use the representation (your type regdescr) as the register.
How often do you need to pattern-match a value of type register?
If you never do, you can even do away with the reg field completely.
module Register :
sig
type t = private { name : string ; index : int }
val r0 : t
val at : t
val equal : t -> t -> bool
val hash : t -> int
val compare : t -> t -> int
end =
struct
type t = { name : string ; index : int }
let r0 = { name = "R0" ; index = 0 }
let at = { name = "AT" ; index = 1 }
let equal r1 r2 = r1.index = r2.index
let hash r1 = Hashtbl.hash (r1.index)
let compare r1 r2 = Pervasives.compare r1.index r2.index
end
Note: you can make the whole thing more readable by using files register.ml and register.mli to define the Register module.
If you sometimes need pattern-matching, you can keep the constructor field so that it is possible to write nice pattern-matchings:
match r.reg with
R0 -> ...
| AT -> ...
But force yourself to write only functions that accept (and pass their callees) the full Register.t.
EDIT: For indexing, first write the generic function below:
let all_registers = [ r0 ; at ]
let index projection =
let htbl = Hashtbl.create 32 in
let f r =
let key = projection r in
Hashtbl.add htbl key r
in
List.iter f all_registers ;
Hashtbl.find htbl
Then pass it all the projections you need:
let of_int = index (fun r -> r.index)
let of_name = index (fun r -> r.name)

Resources