Change the parsing language - logic

I'm using a modal-SAT solver. This solver is unfortunately using Flex and Bison, both languages that I don't master...
I wanted to change one syntax to another, but I've got some issue to do it, even after tutorials about Flex-Lexer and Bison.
So here is the problem :
I want to be able to parse such modal logic formulas :
In the previous notation, such formulas were written like this :
(NOT (IMP (AND (ALL R0 (IMP C0 C1)) (ALL R0 C0)) (ALL R0 C1)))
And here are the Flex/Bisons file used to parse them :
alc.y
%{
#include "fnode.h"
#define YYMAXDEPTH 1000000
fnode_t *formula_as_tree;
%}
%union {
int l;
int i;
fnode_t *f;
}
/* Tokens and types */
%token LP RP
%token ALL SOME
%token AND IMP OR IFF NOT
%token TOP BOT
%token RULE CONC
%token <l> NUM
%type <f> formula
%type <f> boolean_expression rule_expression atomic_expression
%type <f> other
%type <i> uboolop bboolop nboolop ruleop
%type <l> rule
%% /* Grammar rules */
input: formula {formula_as_tree = $1;}
;
formula: boolean_expression {$$ = $1;}
| rule_expression {$$ = $1;}
| atomic_expression {$$ = $1;}
;
boolean_expression: LP uboolop formula RP
{$$ = Make_formula_nary($2,empty_code,$3);}
| LP bboolop formula formula RP
{$$ = Make_formula_nary($2,empty_code, Make_operand_nary($3,$4));}
| LP nboolop formula other RP
{$$ = Make_formula_nary($2,empty_code,Make_operand_nary($3,$4));}
;
rule_expression: LP ruleop rule formula RP {$$ = Make_formula_nary($2,$3,$4);}
;
atomic_expression: CONC NUM {$$ = Make_formula_nary(atom_code,$2,Make_empty());}
| TOP {$$ = Make_formula_nary(top_code,empty_code,Make_empty());}
| BOT {$$ = Make_formula_nary(bot_code,empty_code,Make_empty());}
;
other: formula other {$$ = Make_operand_nary($1,$2);}
| {$$ = Make_empty();}
;
uboolop: NOT {$$ = not_code;}
;
bboolop: IFF {$$ = iff_code;}
| IMP {$$ = imp_code;}
;
nboolop: AND {$$ = and_code;}
| OR {$$ = or_code;}
;
ruleop: SOME {$$ = dia_code;}
| ALL {$$ = box_code;}
rule: RULE NUM {$$ = $2;}
;
%% /* End of grammar rules */
int yyerror(char *s)
{
printf("%s\n", s);
exit(0);
}
alc.lex
%{
#include <stdio.h>
#include "fnode.h"
#include "y.tab.h"
int number;
%}
%%
[ \n\t] ;
"(" return LP;
")" return RP;
"ALL" return ALL;
"SOME" return SOME;
"AND" return AND;
"IMP" return IMP;
"OR" return OR;
"IFF" return IFF;
"NOT" return NOT;
"TOP" return TOP;
"BOTTOM" return BOT;
"R" return RULE;
"C" return CONC;
0|[1-9][0-9]* {
sscanf(yytext,"%d",&number);
yylval.l=number;
return NUM;
}
. {
/* Error function */
fprintf(stderr,"Illegal character\n");
return -1;
}
%%
Now, let's write our example but in the new syntax that I want to use :
begin
(([r0](~pO | p1) & [r0]p0) | [r0]p1)
end
Major problems for me that are blocking me to parse this new syntax correctly is :
IMP (A B) is now written ~B | A (as in the boolean logic (A => B) <=> (~B v A)).
ALL RO is now written [r0].
SOME RO is now written <r0>.
IFF (A B) is now written (~B | A) & (~A | B). (IFF stands for if and only if)
Here is the small list of what are the new symbol, even if I don't know how to parse them :
"(" return LP;
")" return RP;
"[]" return ALL;
"<>" return SOME;
"&" return AND;
"IMP" return IMP;
"|" return OR;
"IFF" return IFF;
"~" return NOT;
"true" return TOP;
"false" return BOT;
"r" return RULE;
"p" return CONC;
I assume that only theses 2 files will change, Because it should still be able to read the previous syntaxe, by compiling the source code with other .y and .lex
But I'm asking your help to know exactly how to write it down :/
Thanks in advance !

Tommi Junttila's BC Package implements a language for Boolean expressions and circuits using Bison and Flex.
To study the source files won't fully replace going through a proper Bison/Flex tutorial, but it certainly should give you a good start.

For someone who would have the exact same problem (I assume that this problem is quite rare :) )
With the good vocabulary, it's much easier to google the problem and find a solution.
The first notation
(NOT (IMP (AND (ALL R0 (IMP C0 C1)) (ALL R0 C0)) (ALL R0 C1)))
is the ALC format.
The other notation
begin
(([r0](~pO | p1) & [r0]p0) | [r0]p1)
end
is the InToHyLo format.
And there is a tool called the formula translation tool ("ftt") developed and bundled with Spartacus (http://www.ps.uni-saarland.de/spartacus/). It can translate between all the formats of provers.
Using this tool is a little hack who avoid dealing with the Flex/Bison languages.
One just needs to translate one problem to another, problems will be equivalent and it's very fast to translate.

Related

How could I generate my abstract tree using this makefile? Why I see only an error at 1 line?

def.h
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
typedef enum
{
NPROGRAM,
NVARDECLLIST,
NFUNCDECLLIST,
NVARDECL,
NIDLIST,
NOPTPARAMLIST,
NSTATLIST,
NASSIGNSTAT,
NWHILESTAT,
NIFSTAT,
NFORSTAT,
NRELEXPR,
NRETURNSTAT,
NREADSTAT,
NLOGICEXPR,
NWRITESTAT,
NNEGEXPR,
NFUNCCALL,
NEXPRLIST,
NCONDEXPR,
NPARAMDECL,
NMATHEXPR,
NCASTING
} Nonterminal;
typedef enum
{
T_BREAK,
T_TYPE,
T_BOOLEAN,
T_INTCONST,
T_REALCONST,
T_BOOLCONST,
T_WRITEOP,
T_STRCONST,
T_ID,
T_NONTERMINAL
} Typenode;
typedef union
{
int ival;
float rval;
char *sval;
enum {FALSE, TRUE} bval;
} Value;
typedef struct snode
{
Typenode type;
Value value;
struct snode *p1, *p2, *p3;
} Node;
typedef Node *Pnode;
char *newstring(char*);
int yylex();
Pnode nontermnode(Nonterminal, int),
ntn(Nonterminal),
idnode(),
keynode(Typenode, int),
intconstnode(),
realconstnode(),
strconstnode(),
boolconstnode(),
newnode(Typenode);
void yyerror(),
treeprint(Pnode, int);
lexer.lex
%{
#include "parser.h"
#include "def.h"
int line = 1;
Value lexval;
%}
%option noyywrap
spacing ([ \t])+
commento "#"(.)*\n
letter [A-Za-z]
digit [0ยญ9]
intconst {digit}+
strconst \"([^\"])*\"
boolconst false|true
realconst {intconst}\.{digit}+
id {letter}({letter}|{digit})*
sugar [ \( \) : , ; \. \+ \- \* / ]
%%
{spacing} ;
\n {line++;}
integer {return(INTEGER);}
string {return(STRING);}
boolean {return(BOOLEAN);}
real {return(REAL);}
void {return(VOID);}
func {return(FUNC);}
body {return(BODY);}
end {return(END);}
else {return(ELSE);}
while {return(WHILE);}
do {return(DO);}
for {return(FOR);}
to {return(TO);}
return {return(RETURN);}
read {return(READ);}
write {return(WRITE);}
writeln {return(WRITELN);}
and {return(AND);}
or {return(OR);}
not {return(NOT);}
if {return(IF);}
then {return(THEN);}
break {return(BREAK);}
"<=" {return(LEQ);}
">=" {return(GEQ);}
"!=" {return(NEQ);}
"==" {return(EQU);}
"<" {return(LT);}
">" {return(GT);}
{intconst} {lexval.ival = atoi(yytext); return(INTCONST);}
{strconst} {lexval.sval = newstring(yytext); return(STRCONST);}
{boolconst} {lexval.bval = (yytext[0] == 'f' ? FALSE : TRUE); return(BOOLCONST);}
{realconst} {lexval.rval = atof(yytext); return(REALCONST);}
{id} {lexval.sval = newstring(yytext); return(ID);}
{sugar} {return(yytext[0]);}
. {return(ERROR);}
%%
char *newstring(char *s)
{
char *p;
p = malloc(strlen(s)+1);
strcpy(p, s);
return(p);
}
makefile
bup: lexer.o parser.o tree.o
cc -g -o bup lexer.o parser.o tree.o
lexer.o: lexer.c parser.h def.h
cc -g -c lexer.c
parser.o: parser.c def.h
cc -g -c parser.c
tree.o: tree.c def.h
cc -g -c tree.c
lexer.c: lexer.lex parser.y parser.h parser.c def.h
flex -o lexer.c lexer.lex
parser.h: parser.y def.h
bison -vd -o parser.c parser.y
parser.y
%{
#include "def.h"
#define YYSTYPE Pnode
extern char *yytext;
extern Value lexval;
extern int line;
extern FILE *yyin;
Pnode root = NULL;
%}
%token ID FUNC BODY END BREAK IF THEN ELSE TYPE WHILE DO FOR RETURN READ WRITE WRITELN
%token AND OR INTCONST REALCONST BOOLCONST STRCONST INTEGER REAL NOT STRING BOOLEAN VOID PLUS MINUS TIMES SLASH
%token LEQ GEQ NEQ EQU GT LT TO ERROR
%%
program : var-decl-list func-decl-list body '.' {root = $$ = ntn(NPROGRAM);
root->p1 = ntn(NVARDECLLIST);
root->p2 = ntn(NFUNCDECLLIST);
root->p1->p1 = $1;
root->p2->p1 = $2;
root->p3 = $3;}
;
var-decl-list : var-decl var-decl-list {$$ -> p1=$1;
$1->p3=$2;}
| {$$ = NULL;}
;
var-decl : id-list ':' type ';' {$$ = ntn(NVARDECL);
$$ -> p1 = ntn(NIDLIST);
$$->p1->p1=$1; $$ -> p1 -> p3 = $3;}
;
id-list : ID {$$ = idnode();} ',' id-list {$$ = $2;
$2 -> p3 = $4;}
| ID {$$ = idnode();}
;
type : INTEGER {$$ = keynode(T_TYPE, INTEGER);}
| REAL {$$ = keynode(T_TYPE, REAL);}
| STRING {$$ = keynode(T_TYPE, STRING);}
| BOOLEAN {$$ = keynode(T_TYPE, BOOLEAN);}
| VOID {$$ = keynode(T_TYPE, VOID); }
;
func-decl-list : func-decl func-decl-list {$$ -> p1 = $1;
$1 -> p3 = $2;}
| {$$ = NULL;}
;
func-decl : FUNC ID {$$ = idnode();} '(' opt-param-list ')' ':' type var-decl-list body ';' {$$ -> p1 = $3;
$$ -> p2 = ntn(NOPTPARAMLIST);
$$ -> p2 ->p1=$5;
$$ -> p2 -> p3 = $8;
$$ -> p2 -> p3->p3 = ntn(NVARDECLLIST);
$$ -> p2 -> p3->p3->p1 = $9;
$$ -> p2 -> p3->p3->p3 = $10;}
;
opt-param-list : param-list {$$ = $1;}
| {$$ = NULL;}
;
param-list : param-decl ',' param-list {$$ = $1;
$1 -> p3 = $3;}
| param-decl
;
param-decl : ID {$$ = idnode();} ':' type {$$=ntn(NPARAMDECL);
$$ -> p1 = $2;
$$ -> p2 = $4;}
;
body : BODY stat-list END {$$ = ntn(NSTATLIST);
$$->p1=$2;}
;
stat-list : stat ';' stat-list {$$ = $1;
$1 -> p3 = $3;}
| stat ';' {$$=$1;}
;
stat : assign-stat
| if-stat
| while-stat
| for-stat
| return-stat
| read-stat
| write-stat
| func-call
| BREAK {$$ = newnode(T_BREAK);}
;
assign-stat : ID {$$ = idnode();} '=' expr {$$ = ntn(NASSIGNSTAT);
$$ -> p1 = $2;
$$ -> p2 = $4;}
;
if-stat : IF expr THEN stat-list opt-else-stat END {$$ = ntn(NIFSTAT);
$$ -> p1 = $2;
$$ -> p2 = ntn(NSTATLIST);
$$ ->p2 -> p3 = $5;}
;
opt-else-stat : ELSE stat-list {$$ = ntn(NSTATLIST);
$$->p1=$2;}
| {$$ = NULL;}
;
while-stat : WHILE expr DO stat-list END {$$ = ntn(NWHILESTAT);
$$->p1=$2;
$$->p2=ntn(NSTATLIST);
$$->p2->p1=$4;}
;
for-stat : FOR ID {$$=idnode();} '=' expr TO expr DO stat-list END {$$ = ntn(NFORSTAT);
$$->p1=$3;
$$->p2=$5;
$$->p2->p3=$7;
$$->p2->p3->p3=ntn(NSTATLIST);
$$->p2->p3->p3->p1=$9;}
;
return-stat : RETURN opt-expr {$$ = ntn(NRETURNSTAT);
$$->p1=$2;}
;
opt-expr : expr {$$=$1;}
| {$$=NULL;}
;
read-stat : READ '(' id-list ')' {$$ = ntn(NREADSTAT);
$$->p1=ntn(NIDLIST);
$$->p1->p1=$3;}
;
write-stat : write-op '(' expr-list ')' {$$ = ntn(NWRITESTAT);
$$->p1=$1;
$$->p2=ntn(NEXPRLIST);
$$->p2->p1=$3;}
;
write-op : WRITE {$$ = keynode(T_WRITEOP, WRITE);}
| WRITELN {$$ = keynode(T_WRITEOP, WRITELN);}
;
expr-list : expr ',' expr-list {$$=$1;
$1->p3=$3;}
| expr
;
expr : expr logic-op bool-term { $$=$2;
$2->p1=$1;
$2->p2=$3;}
| bool-term
;
logic-op : AND {$$=nontermnode(NLOGICEXPR, AND);}
| OR {$$=nontermnode(NLOGICEXPR, OR);}
;
bool-term : rel-term rel-op rel-term {$$=$2;
$2->p1=$1;
$2->p2=$3;}
| rel-term
;
rel-op : EQU {$$=nontermnode(NRELEXPR, EQU);}
| NEQ {$$=nontermnode(NRELEXPR, NEQ);}
| GT {$$=nontermnode(NRELEXPR, GT);}
| GEQ {$$=nontermnode(NRELEXPR, GEQ);}
| LT {$$=nontermnode(NRELEXPR, LT);}
| LEQ {$$=nontermnode(NRELEXPR, LEQ);}
;
rel-term : rel-term low-prec-op low-term {$$=$2;
$2->p1=$1;
$2->p2=$3;}
| low-term
;
low-prec-op : PLUS {$$=nontermnode(NMATHEXPR, PLUS);}
| MINUS {$$=nontermnode(NMATHEXPR, MINUS);}
;
low-term : low-term high-prec-op factor {$$=$2;
$2->p1=$1;
$2->p2=$3;}
| factor
;
high-prec-op : TIMES {$$=nontermnode(NMATHEXPR, TIMES);}
| SLASH {$$=nontermnode(NMATHEXPR, SLASH);}
;
factor : unary-op factor {$$=$1;
$1->p3=$2;}
| '(' expr ')' {$$=$2;}
| ID {$$=idnode();}
| const {$$=$1;}
| func-call {$$=$1;}
| cond-expr {$$=$1;}
| cast '(' expr ')' {$$=$1;
$1->p3=$3;}
;
unary-op : MINUS {$$=nontermnode(NNEGEXPR, MINUS);}
| NOT {$$=nontermnode(NNEGEXPR, NOT);}
;
const : INTCONST {$$=intconstnode();}
| REALCONST {$$=realconstnode();}
| STRCONST {$$=strconstnode();}
| BOOLCONST {$$=boolconstnode();}
;
func-call : ID {$$=idnode();} '(' opt-expr-list ')' {$$ = ntn(NFUNCCALL);
$$->p1=$2; $$->p2=$4;}
;
opt-expr-list : expr-list {$$=ntn(NEXPRLIST); $$->p1=$1;}
| {$$=NULL;}
;
cond-expr : IF expr THEN expr ELSE expr END {$$=ntn(NCONDEXPR);
$$->p1=$2;
$$->p2=$4;
$$->p3=$6;}
;
cast : INTEGER {$$=nontermnode(NCASTING,INTEGER);}
| REAL {$$=nontermnode(NCASTING, REAL);}
;
%%
Pnode ntn(Nonterminal nonterm)
{
Pnode p = newnode(T_NONTERMINAL);
p->value.rval = nonterm;
return(p);
}
Pnode nontermnode(Nonterminal nonterm, int valore)
{
Pnode p = newnode(T_NONTERMINAL);
p->value.rval = nonterm;
p->value.ival = valore;
return(p);
}
Pnode idnode()
{
Pnode p = newnode(T_ID);
p->value.sval = lexval.sval;
return(p);
}
Pnode keynode(Typenode keyword, int valore)
{
Pnode p = newnode(keyword);
p->value.ival = valore;
return p;
}
Pnode intconstnode()
{
Pnode p = newnode(T_INTCONST);
p->value.ival = lexval.ival;
return(p);
}
Pnode realconstnode()
{
Pnode p = newnode(T_REALCONST);
p->value.rval = lexval.rval;
return(p);
}
Pnode strconstnode()
{
Pnode p = newnode(T_STRCONST);
p->value.sval = lexval.sval;
return(p);
}
Pnode boolconstnode()
{
Pnode p = newnode(T_BOOLCONST);
p->value.bval = lexval.bval;
return(p);
}
Pnode newnode(Typenode tnode)
{
Pnode p = malloc(sizeof(Node));
p->type = tnode;
p->p1 = p->p2 = p->p3 = NULL;
return(p);
}
int main()
{
int result;
printf("----------------------------------------------");
yyin = stdin;
if((result = yyparse()) == 0)
treeprint(root, 0);
return(result);
}
void yyerror()
{
fprintf(stderr, "Line %d: syntax error on symbol \"%s\"\n",
line, yytext);
exit(-1);
}
tree.c
#include "def.h"
char* tabtypes[] =
{
"T_BREAK",
"T_TYPE",
"T_BOOLEAN",
"T_INTCONST",
"T_REALCONST",
"T_BOOLCONST",
"T_WRITEOP",
"T_STRCONST",
"T_ID",
"T_NONTERMINAL"
};
char* tabnonterm[] =
{
"PROGRAM",
"NVARDECLLIST",
"NFUNCDECLLIST",
"NVARDECL",
"NIDLIST",
"NOPTPARAMLIST",
"NSTATLIST",
"NASSIGNSTAT",
"NWHILESTAT",
"NIFSTAT",
"NFORSTAT",
"NRELEXPR",
"NRETURNSTAT",
"NREADSTAT",
"NLOGICEXPR",
"NWRITESTAT",
"NNEGEXPR",
"NFUNCCALL",
"NEXPRLIST",
"NCONDEXPR",
"NPARAMDECL",
"NMATHEXPR",
"NCASTING"
};
void treeprint(Pnode root, int indent)
{
int i;
Pnode p;
for(i=0; i<indent; i++)
printf(" ");
printf("%s", (root->type == T_NONTERMINAL ? tabnonterm[root->value.ival] : tabtypes[root->type]));
if(root->type == T_ID || root->type == T_STRCONST)
printf(" (%s)", root->value.sval);
else if(root->type == T_INTCONST)
printf(" (%d)", root->value.ival);
else if(root->type == T_BOOLCONST)
printf(" (%s)", (root->value.ival == TRUE ? "true" : "false"));
printf("\n");
for(p=root->p1; p != NULL; p = p->p3)
treeprint(p, indent+1);
}
prog ( File with example of grammar)
numero: integer;
func fattoriale(n: integer): integer
fact: integer;
body
if n == 0 then
fact = 1;
else
fact = n * fattoriale(nยญ1);
end;
return fact;
end;
func stampaFattoriali(tot: integer): void
i, f: integer;
body
for i=0 to tot do
f = fattoriale(i);
writeln("Il fattoriale di ", i, "รจ ", f);
end;
end;
body
read(numero);
if numero < 0 then
writeln("Il numero ", numero, "non รจ valido");
else
stampaFattoriali(numero);
end.
When i type make on terminal , it create the files : tree.o parser.o parser.h parser.c lexer.c lexer.o bup.
When i execute the bup file , the terminal show me this error message :
"ine 1: syntax error on symbol "
So it don't generate the abstract tree.
I don't know if this error refers to the prog or lexer.lex or parse.y file.
Yacc/bison assign their own numbers to terminal tokens, and assume that the lexer will use those numbers. But you provide your own numbers in the def.h header, which yacc/bison knows absolutely nothing about. It will not correctly interpret the codes returned by yylex which will make it impossible to parse correctly.
So don't do that.
Let bison generate the token codes, use the header file it generates (parser.h with your settings), and don't step on its feet by trying to define the enum values yourself.
As a hint about debugging, that is really way too much code to have written before you start debugging, and that fact is exactly illustrated by your complaint at the end of your question that you don't know where to look for the error. Instead of writing the whole project and then hoping it works as a whole, write little pieces and debug them as you go. Although you need to parser to generate the token type values, you don't need to run the parser to test your scanner. You can write a simple program which repeatedly calls yylex and prints the returned types and values. (Or you can just enable flex debugging with the -d command line option, which is even simpler.)
Similarly, you should be able to test your AST methods by writing some test functions which use these methods to build, walk and print out an AST. Make sure that they produce the expected results.
Only once you have evidence that the lexer is producing the correct tokens and that your AST construction functions work should you start to debug your parser. Again, you will find it much easier if you use the built-in debugging facilities; see the Debugging your parser section of the Bison manual for instructions.
Good luck.

Is there an easy way to make Jison calculator parser return symbolic results?

Jison parsers return the calculated result:
calculator.parse("2^3"); // returns 8
calculator.parse("x^2"); // gives a parse err
I would like that it return the symbolic expression:
calculator.parse("x^2");
// should return
// "Math.pow(x,2)"
And
calculator.parse("x^(x+1)");
// should return
// "Math.pow(x,x+1)"
And
calculator.parse("cos(x)");
// should return
// "Math.cos(x)"
If what you need is simple enough, you might get by by modifying the calculator. For instance:
Add an IDENTIFIER token after NUMBER in the list of tokens:
[a-z] return 'IDENTIFIER'
This allows a single lower case letter to serve as identifie.
Modify the e '^' e rule to return a string rather than the computed value:
| e '^' e
{$$ = "Math.pow(" + $1 + "," + $3 + ");"}
Add a new rule to the list of rules for e:
| IDENTIFIER
(No explicit action needed.)
With these changes parsing, x^2 results in "Math.pow(x,2);"
To support the operators, the other rules would have to be modified like the one for e '^' e to return strings rather than the result of the math.
This is extremely primitive and won't optimize things that could be optimized. For instance, 1^2 will be output as "Math.pow(1, 2)" when it could be optimized to 1.
(Based on #Louis answer and comments)
Here is the code for a basic symbolic calculator using jison:
/* description: Parses and executes mathematical expressions. */
/* lexical grammar */
%lex
%%
\s+ /* skip whitespace */
(acos|asin|atan|atan2|cos|log|sin|sqrt|tan) return 'FUNCTION'
[0-9]+("."[0-9]+)?\b return 'NUMBER'
[a-z] return 'IDENTIFIER'
"|" return '|'
"*" return '*'
"/" return '/'
"-" return '-'
"+" return '+'
"^" return '^'
"!" return '!'
"%" return '%'
"(" return '('
")" return ')'
"PI" return 'PI'
"E" return 'E'
<<EOF>> return 'EOF'
. return 'INVALID'
/lex
/* operator associations and precedence */
%left '+' '-'
%left '*' '/'
%left '^'
%right '!'
%right '%'
%left UMINUS
%start expressions
%% /* language grammar */
expressions
: e EOF
{ typeof console !== 'undefined' ? console.log($1) : print($1);
return $1; }
;
e
: e '+' e
{$$ = $1 + " + " + $3;}
| e '-' e
{$$ = $1 + "-" + $3;}
| e '*' e
{$$ = $1 + "*" + $3;}
| e '/' e
{$$ = $1 + "/" + $3;}
| e '^' e
{$$ = "Math.pow(" + $1 + ", " + $3 + ");"}
| e '!'
{{
$$ = (function fact (n) { return n==0 ? 1 : fact(n-1) * n })($1);
}}
| e '%'
{$$ = $1/100;}
| '-' e %prec UMINUS
{$$ = -$2;}
| '(' e ')'
{$$ = $2;}
| FUNCTION '(' e ')'
{$$ = "Math." + $1 + "(" + $3 + ")";}
| '|' e '|'
{$$ = "Math.abs(" + $2 + ")";}
| NUMBER
{$$ = Number(yytext);}
| E
{$$ = Math.E;}
| PI
{$$ = Math.PI;}
| IDENTIFIER
| FUNCTION
;
No.
Not an easy way. Depending on what you call 'easy'. You need to define tokens. And then symbolic manipulation of those tokens. And stuff.
I don't think that calculator would be a good starting point, and its such a trivial thing anyway that throwing it away and writing your JS symbolic parser grammar from scratch wouldn't be too much more of a problem.
Just to save some google time for everyone, this is what we are talking about: http://zaach.github.io/jison/demos/calc/

mirror bits in char, limited operators +,<<,& no loops allowed, C language

Preparing for exam and got stuck at this question:
Allowed operators are <<,+,& no loops allowed and minimum temp variables.
Write a function in C, that gets 4-bit number (char) and returns mirrored (relative to center) bits.
Example: given b4,b3,b2,b1 return b1,b2,b3,b4
O_o thanks!
it might be not clear, but general language tools are allowed ('==',if,>,< etc..)
This is not possible given the constraints of only the operators <<, +, & and no other constructs besides return.
To move b3 from the 3rd position to the 2nd position, you will need a way to shift to the right which requires something like >> or /. Of the operators provided, none can be used with b3 to set the 2nd or 1st bit position.
if you can use if statements and the assignment operator =, it is possible. You can then write a messy solution such as
char flip(char c)
{
char f;
f = (c & 1) << 3 + (c & 2) << 1;
if (c & 4)
f = f + 2;
if (c & 8)
f = f + 1;
return f;
}
A more ugly but shorter one liner if you can use the similar to if ? operator.
char flip(char c)
{
return (c & 1) << 3 + (c & 2) << 1 + ((c & 4) ? 2 : 0) + ((c & 8) ? 1 : 0);
}

Hashtable indexed on several fields

I'm currently programming an OCaml module defining a type corresponding to a CPU register. The interface of this module is the following :
(*
* Defines a type which represents a R3000 register.
*)
type t =
| R0 (* Always 0 *)
| AT (* Assembler temporary *)
| V0 | V1 (* Subroutine return values *)
| A0 | A1 | A2 | A3 (* Subroutine arguments *)
| T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 (* Temporary registers *)
| S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 (* Register variables *)
| T8 | T9 (* Temporary registers *)
| K0 | K1 (* Reserved for kernels *)
| GP | SP | FP (* Global/Stack/Frame pointer *)
| RA (* Return address *)
(*
* Conversion from/to [|0, 31|].
*)
val of_int : int -> t
val to_int : t -> int
(*
* Conversion to string for display.
*)
val of_string : string -> t
val to_string : t -> string
However, I would like the implementation to be fast and not too repetitive. For example, I could code the of_int function like this :
let of_int = function
| 0 -> R0
| 1 -> AT
(* ... *)
But it would be awful and unmaintainable. I do not want to do this as it conflicts with my programming religion. Moreover, I would need to do this kind of dirty code not only one time, but for the four functions.
The first solution I found would be to use a preprocessor (either Camlp4 or cpp) to generate the code I want. I find this to be overkill but would use this method if you can't help me with my second idea.
After a bit of thought, I thought I could do something like this :
type regdescr = {
reg : t ;
name : string ;
index : int
}
let regs =
let htbl = Hashtbl.create 32 in
let li = [ (* regdescr defs here *) ] in
List.iter (Hashtbl.add htbl) li ;
htbl
However, in this case, I must choose what field I want to hash. Is there another solution than using three different hashtables in this case ? Maybe a data-structure I do not know about is able to hash over three fields and perform searches on the three of them.
Sorry for the long question for which the answer may be trivial :) .
Looks like a perfect fit for deriving.
(*
* Defines a type which represents a R3000 register.
*)
type t =
| R0 (* Always 0 *)
| AT (* Assembler temporary *)
| V0 | V1 (* Subroutine return values *)
| A0 | A1 | A2 | A3 (* Subroutine arguments *)
| T0 | T1 | T2 | T3 | T4 | T5 | T6 | T7 (* Temporary registers *)
| S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 (* Register variables *)
| T8 | T9 (* Temporary registers *)
| K0 | K1 (* Reserved for kernels *)
| GP | SP | FP (* Global/Stack/Frame pointer *)
| RA (* Return address *)
deriving (Enum,Show)
let of_int x = Enum.to_enum<t>(x)
let to_int x = Enum.from_enum<t>(x)
let to_string x = Show.show<t>(x)
let pr = Printf.printf
let () =
pr "%i %i %i\n" (to_int R0) (to_int RA) (to_int T8);
pr "%s %s %s\n"
(to_string (of_int 0)) (to_string (of_int 31)) (to_string (of_int 24));
pr "%s %s %s\n"
(to_string (Enum.pred<t>(A1))) (to_string A1) (to_string (Enum.succ<t>(A1)));
()
Output :
0 31 24
R0 RA T8
A0 A1 A2
Compile with :
ocamlc -pp deriving -I ~/work/contrib/deriving/0.1.1-3.11.1-orig/lib deriving.cma q.ml -o q
Just have three separate hash tables?
Instead of using a hashtable for going from one partial representation of a register to another, have you thought of forcing yourself to always manipulate only pointers to complete descriptions, so that you can access any aspect you like (index, string representation, ...) with just a pointer dereference?
You can use the representation (your type regdescr) as the register.
How often do you need to pattern-match a value of type register?
If you never do, you can even do away with the reg field completely.
module Register :
sig
type t = private { name : string ; index : int }
val r0 : t
val at : t
val equal : t -> t -> bool
val hash : t -> int
val compare : t -> t -> int
end =
struct
type t = { name : string ; index : int }
let r0 = { name = "R0" ; index = 0 }
let at = { name = "AT" ; index = 1 }
let equal r1 r2 = r1.index = r2.index
let hash r1 = Hashtbl.hash (r1.index)
let compare r1 r2 = Pervasives.compare r1.index r2.index
end
Note: you can make the whole thing more readable by using files register.ml and register.mli to define the Register module.
If you sometimes need pattern-matching, you can keep the constructor field so that it is possible to write nice pattern-matchings:
match r.reg with
R0 -> ...
| AT -> ...
But force yourself to write only functions that accept (and pass their callees) the full Register.t.
EDIT: For indexing, first write the generic function below:
let all_registers = [ r0 ; at ]
let index projection =
let htbl = Hashtbl.create 32 in
let f r =
let key = projection r in
Hashtbl.add htbl key r
in
List.iter f all_registers ;
Hashtbl.find htbl
Then pass it all the projections you need:
let of_int = index (fun r -> r.index)
let of_name = index (fun r -> r.name)

Equation (expression) parser with precedence?

I've developed an equation parser using a simple stack algorithm that will handle binary (+, -, |, &, *, /, etc) operators, unary (!) operators, and parenthesis.
Using this method, however, leaves me with everything having the same precedence - it's evaluated left to right regardless of operator, although precedence can be enforced using parenthesis.
So right now "1+11*5" returns 60, not 56 as one might expect.
While this is suitable for the current project, I want to have a general purpose routine I can use for later projects.
Edited for clarity:
What is a good algorithm for parsing equations with precedence?
I'm interested in something simple to implement and understand that I can code myself to avoid licensing issues with available code.
Grammar:
I don't understand the grammar question - I've written this by hand. It's simple enough that I don't see the need for YACC or Bison. I merely need to calculate strings with equations such as "2+3 * (42/13)".
Language:
I'm doing this in C, but I'm interested in an algorithm, not a language specific solution. C is low level enough that it'll be easy to convert to another language should the need arise.
Code Example
I posted the test code for the simple expression parser I was talking about above. The project requirements altered and so I never needed to optimize the code for performance or space as it wasn't incorporated into the project. It's in the original verbose form, and should be readily understandable. If I do anything further with it in terms of operator precedence, I'll probably choose the macro hack because it matches the rest of the program in simplicity. If I ever use this in a real project, though, I'll be going for a more compact/speedy parser.
Related question
Smart design of a math parser?
-Adam
The shunting yard algorithm is the right tool for this. Wikipedia is really confusing about this, but basically the algorithm works like this:
Say, you want to evaluate 1 + 2 * 3 + 4. Intuitively, you "know" you have to do the 2 * 3 first, but how do you get this result? The key is to realize that when you're scanning the string from left to right, you will evaluate an operator when the operator that follows it has a lower (or equal to) precedence. In the context of the example, here's what you want to do:
Look at: 1 + 2, don't do anything.
Now look at 1 + 2 * 3, still don't do anything.
Now look at 1 + 2 * 3 + 4, now you know that 2 * 3 has to to be evaluated because the next operator has lower precedence.
How do you implement this?
You want to have two stacks, one for numbers, and another for operators. You push numbers onto the stack all the time. You compare each new operator with the one at the top of the stack, if the one on top of the stack has higher priority, you pop it off the operator stack, pop the operands off the number stack, apply the operator and push the result onto the number stack. Now you repeat the comparison with the top of stack operator.
Coming back to the example, it works like this:
N = [ ]
Ops = [ ]
Read 1. N = [1], Ops = [ ]
Read +. N = [1], Ops = [+]
Read 2. N = [1 2], Ops = [+]
Read *. N = [1 2], Ops = [+ *]
Read 3. N = [1 2 3], Ops = [+ *]
Read +. N = [1 2 3], Ops = [+ *]
Pop 3, 2 and execute 2*3, and push result onto N. N = [1 6], Ops = [+]
+ is left associative, so you want to pop 1, 6 off as well and execute the +. N = [7], Ops = [].
Finally push the [+] onto the operator stack. N = [7], Ops = [+].
Read 4. N = [7 4]. Ops = [+].
You're run out off input, so you want to empty the stacks now. Upon which you will get the result 11.
There, that's not so difficult, is it? And it makes no invocations to any grammars or parser generators.
The hard way
You want a recursive descent parser.
To get precedence you need to think recursively, for example, using your sample string,
1+11*5
to do this manually, you would have to read the 1, then see the plus and start a whole new recursive parse "session" starting with 11... and make sure to parse the 11 * 5 into its own factor, yielding a parse tree with 1 + (11 * 5).
This all feels so painful even to attempt to explain, especially with the added powerlessness of C. See, after parsing the 11, if the * was actually a + instead, you would have to abandon the attempt at making a term and instead parse the 11 itself as a factor. My head is already exploding. It's possible with the recursive decent strategy, but there is a better way...
The easy (right) way
If you use a GPL tool like Bison, you probably don't need to worry about licensing issues since the C code generated by bison is not covered by the GPL (IANAL but I'm pretty sure GPL tools don't force the GPL on generated code/binaries; for example Apple compiles code like say, Aperture with GCC and they sell it without having to GPL said code).
Download Bison (or something equivalent, ANTLR, etc.).
There is usually some sample code that you can just run bison on and get your desired C code that demonstrates this four function calculator:
http://www.gnu.org/software/bison/manual/html_node/Infix-Calc.html
Look at the generated code, and see that this is not as easy as it sounds. Also, the advantages of using a tool like Bison are 1) you learn something (especially if you read the Dragon book and learn about grammars), 2) you avoid NIH trying to reinvent the wheel. With a real parser-generator tool, you actually have a hope at scaling up later, showing other people you know that parsers are the domain of parsing tools.
Update:
People here have offered much sound advice. My only warning against skipping the parsing tools or just using the Shunting Yard algorithm or a hand rolled recursive decent parser is that little toy languages1 may someday turn into big actual languages with functions (sin, cos, log) and variables, conditions and for loops.
Flex/Bison may very well be overkill for a small, simple interpreter, but a one off parser+evaluator may cause trouble down the line when changes need to be made or features need to be added. Your situation will vary and you will need to use your judgement; just don't punish other people for your sins [2] and build a less than adequate tool.
My favorite tool for parsing
The best tool in the world for the job is the Parsec library (for recursive decent parsers) which comes with the programming language Haskell. It looks a lot like BNF, or like some specialized tool or domain specific language for parsing (sample code [3]), but it is in fact just a regular library in Haskell, meaning that it compiles in the same build step as the rest of your Haskell code, and you can write arbitrary Haskell code and call that within your parser, and you can mix and match other libraries all in the same code. (Embedding a parsing language like this in a language other than Haskell results in loads of syntactic cruft, by the way. I did this in C# and it works quite well but it is not so pretty and succinct.)
Notes:
1 Richard Stallman says, in Why you should not use Tcl
The principal lesson of Emacs is that
a language for extensions should not
be a mere "extension language". It
should be a real programming language,
designed for writing and maintaining
substantial programs. Because people
will want to do that!
[2] Yes, I am forever scarred from using that "language".
Also note that when I submitted this entry, the preview was correct, but SO's less than adequate parser ate my close anchor tag on the first paragraph, proving that parsers are not something to be trifled with because if you use regexes and one off hacks you will probably get something subtle and small wrong.
[3] Snippet of a Haskell parser using Parsec: a four function calculator extended with exponents, parentheses, whitespace for multiplication, and constants (like pi and e).
aexpr = expr `chainl1` toOp
expr = optChainl1 term addop (toScalar 0)
term = factor `chainl1` mulop
factor = sexpr `chainr1` powop
sexpr = parens aexpr
<|> scalar
<|> ident
powop = sym "^" >>= return . (B Pow)
<|> sym "^-" >>= return . (\x y -> B Pow x (B Sub (toScalar 0) y))
toOp = sym "->" >>= return . (B To)
mulop = sym "*" >>= return . (B Mul)
<|> sym "/" >>= return . (B Div)
<|> sym "%" >>= return . (B Mod)
<|> return . (B Mul)
addop = sym "+" >>= return . (B Add)
<|> sym "-" >>= return . (B Sub)
scalar = number >>= return . toScalar
ident = literal >>= return . Lit
parens p = do
lparen
result <- p
rparen
return result
http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm
Very good explanation of different approaches:
Recursive-descent recognition
The shunting yard algorithm
The classic solution
Precedence climbing
Written in simple language and pseudo-code.
I like 'precedence climbing' one.
There's a nice article here about combining a simple recursive-descent parser with operator-precedence parsing. If you've been recently writing parsers, it should be very interesting and instructive to read.
Long time ago, I made up my own parsing algorithm, that I couldn't find in any books on parsing (like the Dragon Book). Looking at the pointers to the Shunting Yard algorithm, I do see the resemblance.
About 2 years ago, I made a post about it, complete with Perl source code, on http://www.perlmonks.org/?node_id=554516. It's easy to port to other languages: the first implementation I did was in Z80 assembler.
It's ideal for direct calculation with numbers, but you can use it to produce a parse tree if you must.
Update Because more people can read (or run) Javascript, I've reimplemented my parser in Javascript, after the code has been reorganized. The whole parser is under 5k of Javascript code (about 100 lines for the parser, 15 lines for a wrapper function) including error reporting, and comments.
You can find a live demo at http://users.telenet.be/bartl/expressionParser/expressionParser.html.
// operator table
var ops = {
'+' : {op: '+', precedence: 10, assoc: 'L', exec: function(l,r) { return l+r; } },
'-' : {op: '-', precedence: 10, assoc: 'L', exec: function(l,r) { return l-r; } },
'*' : {op: '*', precedence: 20, assoc: 'L', exec: function(l,r) { return l*r; } },
'/' : {op: '/', precedence: 20, assoc: 'L', exec: function(l,r) { return l/r; } },
'**' : {op: '**', precedence: 30, assoc: 'R', exec: function(l,r) { return Math.pow(l,r); } }
};
// constants or variables
var vars = { e: Math.exp(1), pi: Math.atan2(1,1)*4 };
// input for parsing
// var r = { string: '123.45+33*8', offset: 0 };
// r is passed by reference: any change in r.offset is returned to the caller
// functions return the parsed/calculated value
function parseVal(r) {
var startOffset = r.offset;
var value;
var m;
// floating point number
// example of parsing ("lexing") without aid of regular expressions
value = 0;
while("0123456789".indexOf(r.string.substr(r.offset, 1)) >= 0 && r.offset < r.string.length) r.offset++;
if(r.string.substr(r.offset, 1) == ".") {
r.offset++;
while("0123456789".indexOf(r.string.substr(r.offset, 1)) >= 0 && r.offset < r.string.length) r.offset++;
}
if(r.offset > startOffset) { // did that work?
// OK, so I'm lazy...
return parseFloat(r.string.substr(startOffset, r.offset-startOffset));
} else if(r.string.substr(r.offset, 1) == "+") { // unary plus
r.offset++;
return parseVal(r);
} else if(r.string.substr(r.offset, 1) == "-") { // unary minus
r.offset++;
return negate(parseVal(r));
} else if(r.string.substr(r.offset, 1) == "(") { // expression in parens
r.offset++; // eat "("
value = parseExpr(r);
if(r.string.substr(r.offset, 1) == ")") {
r.offset++;
return value;
}
r.error = "Parsing error: ')' expected";
throw 'parseError';
} else if(m = /^[a-z_][a-z0-9_]*/i.exec(r.string.substr(r.offset))) { // variable/constant name
// sorry for the regular expression, but I'm too lazy to manually build a varname lexer
var name = m[0]; // matched string
r.offset += name.length;
if(name in vars) return vars[name]; // I know that thing!
r.error = "Semantic error: unknown variable '" + name + "'";
throw 'unknownVar';
} else {
if(r.string.length == r.offset) {
r.error = 'Parsing error at end of string: value expected';
throw 'valueMissing';
} else {
r.error = "Parsing error: unrecognized value";
throw 'valueNotParsed';
}
}
}
function negate (value) {
return -value;
}
function parseOp(r) {
if(r.string.substr(r.offset,2) == '**') {
r.offset += 2;
return ops['**'];
}
if("+-*/".indexOf(r.string.substr(r.offset,1)) >= 0)
return ops[r.string.substr(r.offset++, 1)];
return null;
}
function parseExpr(r) {
var stack = [{precedence: 0, assoc: 'L'}];
var op;
var value = parseVal(r); // first value on the left
for(;;){
op = parseOp(r) || {precedence: 0, assoc: 'L'};
while(op.precedence < stack[stack.length-1].precedence ||
(op.precedence == stack[stack.length-1].precedence && op.assoc == 'L')) {
// precedence op is too low, calculate with what we've got on the left, first
var tos = stack.pop();
if(!tos.exec) return value; // end reached
// do the calculation ("reduce"), producing a new value
value = tos.exec(tos.value, value);
}
// store on stack and continue parsing ("shift")
stack.push({op: op.op, precedence: op.precedence, assoc: op.assoc, exec: op.exec, value: value});
value = parseVal(r); // value on the right
}
}
function parse (string) { // wrapper
var r = {string: string, offset: 0};
try {
var value = parseExpr(r);
if(r.offset < r.string.length){
r.error = 'Syntax error: junk found at offset ' + r.offset;
throw 'trailingJunk';
}
return value;
} catch(e) {
alert(r.error + ' (' + e + '):\n' + r.string.substr(0, r.offset) + '<*>' + r.string.substr(r.offset));
return;
}
}
It would help if you could describe the grammar you are currently using to parse. Sounds like the problem might lie there!
Edit:
The fact that you don't understand the grammar question and that 'you've written this by hand' very likely explains why you're having problems with expressions of the form '1+11*5' (i.e., with operator precedence). Googling for 'grammar for arithmetic expressions', for example, should yield some good pointers. Such a grammar need not be complicated:
<Exp> ::= <Exp> + <Term> |
<Exp> - <Term> |
<Term>
<Term> ::= <Term> * <Factor> |
<Term> / <Factor> |
<Factor>
<Factor> ::= x | y | ... |
( <Exp> ) |
- <Factor> |
<Number>
would do the trick for example, and can be trivially augmented to take care of some more complicated expressions (including functions for example, or powers,...).
I suggest you have a look at this thread, for example.
Almost all introductions to grammars/parsing treat arithmetic expressions as an example.
Note that using a grammar does not at all imply using a specific tool (a la Yacc, Bison,...). Indeed, you most certainly are already using the following grammar:
<Exp> :: <Leaf> | <Exp> <Op> <Leaf>
<Op> :: + | - | * | /
<Leaf> :: <Number> | (<Exp>)
(or something of the kind) without knowing it!
Have you thought about using Boost Spirit? It allows you to write EBNF-like grammars in C++ like this:
group = '(' >> expression >> ')';
factor = integer | group;
term = factor >> *(('*' >> factor) | ('/' >> factor));
expression = term >> *(('+' >> term) | ('-' >> term));
As you put your question there is no need for recursion whatsoever. The answer is three things: Postfix notation plus Shunting Yard algorithm plus Postfix expression evaluation:
1). Postfix notation = invented to eliminate the need for explicit precedence specification. Read more on the net but here is the gist of it: infix expression ( 1 + 2 ) * 3 while easy for humans to read and process not very efficient for computing via machine. What is? Simple rule that says "rewrite expression by caching in precedence,then always process it left-to-right". So infix ( 1 + 2 ) * 3 becomes a postfix 12+3*. POST because operator is placed always AFTER the operands.
2). Evaluating postfix expression. Easy. Read numbers off postfix string. Push them on a stack until an operator is seen. Check operator type - unary? binary? tertiary? Pop as many operands off stack as needed to evaluate this operator. Evaluate. Push result back on stack! And u r almost done. Keep doing so until stack has only one entry = value u r looking for.
Let's do ( 1 + 2 ) * 3 which is in postfix is "12+3*". Read first number = 1. Push it on stack. Read next. Number = 2. Push it on stack. Read next. Operator. Which one? +. What kind? Binary = needs two operands. Pop stack twice = argright is 2 and argleft is 1. 1 + 2 is 3. Push 3 back on stack. Read next from postfix string. Its a number. 3.Push. Read next. Operator. Which one? *. What kind? Binary = needs two numbers -> pop stack twice. First pop into argright, second time into argleft. Evaluate operation - 3 times 3 is 9.Push 9 on stack. Read next postfix char. It's null. End of input. Pop stack onec = that's your answer.
3). Shunting Yard is used to transform human (easily) readable infix expression into postfix expression (also human easily readable after some practice). Easy to code manually. See comments above and net.
I would suggest cheating and using the Shunting Yard Algorithm. It's an easy means of writing a simple calculator-type parser and takes precedence into account.
If you want to properly tokenise things and have variables, etc. involved then I would go ahead and write a recursive descent parser as suggested by others here, however if you simply require a calculator-style parser then this algorithm should be sufficient :-)
Another resource for precedence parsing is the Operator-precedence parser entry on Wikipedia. Covers Dijkstra's shunting yard algorithm, and a tree alternate algorithm, but more notably covers a really simple macro replacement algorithm that can be trivially implemented in front of any precedence ignorant parser:
#include <stdio.h>
int main(int argc, char *argv[]){
printf("((((");
for(int i=1;i!=argc;i++){
if(argv[i] && !argv[i][1]){
switch(argv[i]){
case '^': printf(")^("); continue;
case '*': printf("))*(("); continue;
case '/': printf("))/(("); continue;
case '+': printf(")))+((("); continue;
case '-': printf(")))-((("); continue;
}
}
printf("%s", argv[i]);
}
printf("))))\n");
return 0;
}
Invoke it as:
$ cc -o parenthesise parenthesise.c
$ ./parenthesise a \* b + c ^ d / e
((((a))*((b)))+(((c)^(d))/((e))))
Which is awesome in its simplicity, and very understandable.
Is there a language you want to use? ANTLR will let you do this from a Java perspective. Adrian Kuhn has an excellent writeup on how to write an executable grammar in Ruby; in fact, his example is almost exactly your arithmetic expression example.
It depends on how "general" you want it to be.
If you want it to be really really general such as be able to parse mathematical functions as well like sin(4+5)*cos(7^3) you will probably need a parse tree.
In which, I do not think that a complete implementation is proper to be pasted here. I'd suggest that you check out one of the infamous "Dragon book".
But if you just want precedence support, then you could do that by first converting the expression to postfix form in which an algorithm that you can copy-and-paste should be available from google or I think you can code it up yourself with a binary tree.
When you have it in postfix form, then it's piece of cake from then on since you already understand how the stack helps.
I found this on the PIClist about the Shunting Yard algorithm:
Harold writes:
I remember reading, a long time ago, of an algorithm that converted
algebraic expressions to RPN for easy evaluation. Each infix value or
operator or parenthesis was represented by a railroad car on a
track. One
type of car split off to another track and the other continued straight
ahead. I don't recall the details (obviously!), but always thought it
would be interesting to code. This is back when I was writing 6800 (not
68000) assembly code.
This is the "shunting yard algorythm"
and it is what most machine parsers
use. See the article on parsing in
Wikipedia. An easy way to code the
shunting yard algorythm is to use two
stacks. One is the "push" stack and
the other the "reduce" or "result"
stack. Example:
pstack = () // empty rstack = ()
input: 1+2*3 precedence = 10 // lowest
reduce = 0 // don't reduce
start: token '1': isnumber, put in
pstack (push) token '+': isoperator
set precedence=2 if precedence <
previous_operator_precedence then
reduce() // see below put '+' in
pstack (push) token '2': isnumber,
put in pstack (push) token '*':
isoperator, set precedence=1, put in
pstack (push) // check precedence as
// above token '3': isnumber, put in
pstack (push) end of input, need to
reduce (goal is empty pstack) reduce()
//done
to reduce, pop elements from the push
stack and put them into the result
stack, always swap the top 2 items on
pstack if they are of the form
'operator' 'number':
pstack: '1' '+' '2' '' '3' rstack: ()
... pstack: () rstack: '3' '2' '' '1'
'+'
if the expression would have been:
1*2+3
then the reduce trigger would have
been the reading of the token '+'
which has lower precendece than the
'*' already pushed, so it would have
done:
pstack: '1' '' '2' rstack: () ...
pstack: () rstack: '1' '2' ''
and then pushed '+' and then '3' and
then finally reduced:
pstack: '+' '3' rstack: '1' '2' ''
... pstack: () rstack: '1' '2' '' '3'
'+'
So the short version is: push numbers,
when pushing operators check the
precedence of the previous operator.
If it was higher than the operator's
that is to be pushed now, first
reduce, then push the current
operator. To handle parens simply save
the precedence of the 'previous'
operator, and put a mark on the pstack
that tells the reduce algorythm to
stop reducing when solving the inside
of a paren pair. The closing paren
triggers a reduction as does the end
of input, and also removes the open
paren mark from the pstack, and
restores the 'previous operation'
precedence so parsing can continue
after the close paren where it left
off. This can be done with recursion
or without (hint: use a stack to store
the previous precedence when
encountering a '(' ...). The
generalized version of this is to use
a parser generator implemented
shunting yard algorythm, f.ex. using
yacc or bison or taccle (tcl analog of
yacc).
Peter
-Adam
I have posted source for an ultra compact (1 class, < 10 KiB) Java Math Evaluator on my web site. This is a recursive descent parser of the type that caused the cranial explosion for the poster of the accepted answer.
It supports full precedence, parenthesis, named variables and single-argument functions.
i released an expression parser based on Dijkstra's Shunting Yard algorithm, under the terms of the Apache License 2.0:
http://projects.congrace.de/exp4j/index.html
I've implemented a recursive descent parser in Java in the MathEclipse Parser project. It could also be used in as a Google Web Toolkit module
I'm currently working on a series of articles building a regular expression parser as a learning tool for design patterns and readable programing. You can take a look at readablecode. The article presents a clear use of shunting yards algorithm.
I wrote an expression parser in F# and blogged about it here. It uses the shunting yard algorithm, but instead of converting from infix to RPN, I added a second stack to accumulate the results of calculations. It correctly handles operator precedence, but doesn't support unary operators. I wrote this to learn F#, not to learn expression parsing, though.
A Python solution using pyparsing can be found here. Parsing infix notation with various operators with precedence is fairly common, and so pyparsing also includes the infixNotation (formerly operatorPrecedence) expression builder. With it you can easily define boolean expressions using "AND", "OR", "NOT", for example. Or you can expand your four-function arithmetic to use other operators, such as ! for factorial, or '%' for modulus, or add P and C operators to compute permutations and combinations. You could write an infix parser for matrix notation, that includes handling of '-1' or 'T' operators (for inversion and transpose). The operatorPrecedence example of a 4-function parser (with '!' thrown in for fun) is here and a more fully featured parser and evaluator is here.
I know this is a late answer, but I've just written a tiny parser that allows all operators (prefix, postfix and infix-left, infix-right and nonassociative) to have arbitrary precedence.
I'm going to expand this for a language with arbitrary DSL support, but I just wanted to point out that one doesn't need custom parsers for operator precedence, one can use a generalized parser that doesn't need tables at all, and just looks up the precedence of each operator as it appears. People have been mentioning custom Pratt parsers or shunting yard parsers that can accept illegal inputs - this one doesn't need to be customized and (unless there's a bug) won't accept bad input. It isn't complete in a sense, it was written to test the algorithm and its input is in a form that will need some preprocessing, but there are comments that make it clear.
Note some common kinds of operators are missing for instance the sort of operator used for indexing ie table[index] or calling a function function(parameter-expression, ...)
I'm going to add those, but think of both as postfix operators where what comes between the delimeters '[' and ']' or '(' and ')' is parsed with a different instance of the expression parser. Sorry to have left that out, but the postfix part is in - adding the rest will probably almost double the size of the code.
Since the parser is just 100 lines of racket code, perhaps I should just paste it here, I hope this isn't longer than stackoverflow allows.
A few details on arbitrary decisions:
If a low precedence postfix operator is competing for the same infix blocks as a low precedence prefix operator the prefix operator wins. This doesn't come up in most languages since most don't have low precedence postfix operators.
- for instance: ((data a) (left 1 +) (pre 2 not)(data b)(post 3 !) (left 1 +) (data c))
is a+not b!+c where not is a prefix operator and ! is postfix operator and both have lower
precedence than + so they want to group in incompatible ways either as
(a+not b!)+c
or as
a+(not b!+c)
in these cases the prefix operator always wins, so the second is the way it parses
Nonassociative infix operators are really there so that you don't have to pretend that operators that return different types than they take make sense together, but without having different expression types for each it's a kludge. As such, in this algorithm, non-associative operators refuse to associate not just with themselves but with any operator with the same precedence. That's a common case as < <= == >= etc don't associate with each other in most languages.
The question of how different kinds of operators (left, prefix etc) break ties on precedence is one that shouldn't come up, because it doesn't really make sense to give operators of different types the same precedence. This algorithm does something in those cases, but I'm not even bothering to figure out exactly what because such a grammar is a bad idea in the first place.
#lang racket
;cool the algorithm fits in 100 lines!
(define MIN-PREC -10000)
;format (pre prec name) (left prec name) (right prec name) (nonassoc prec name) (post prec name) (data name) (grouped exp)
;for example "not a*-7+5 < b*b or c >= 4"
;which groups as: not ((((a*(-7))+5) < (b*b)) or (c >= 4))"
;is represented as '((pre 0 not)(data a)(left 4 *)(pre 5 -)(data 7)(left 3 +)(data 5)(nonassoc 2 <)(data b)(left 4 *)(data b)(right 1 or)(data c)(nonassoc 2 >=)(data 4))
;higher numbers are higher precedence
;"(a+b)*c" is represented as ((grouped (data a)(left 3 +)(data b))(left 4 *)(data c))
(struct prec-parse ([data-stack #:mutable #:auto]
[op-stack #:mutable #:auto])
#:auto-value '())
(define (pop-data stacks)
(let [(data (car (prec-parse-data-stack stacks)))]
(set-prec-parse-data-stack! stacks (cdr (prec-parse-data-stack stacks)))
data))
(define (pop-op stacks)
(let [(op (car (prec-parse-op-stack stacks)))]
(set-prec-parse-op-stack! stacks (cdr (prec-parse-op-stack stacks)))
op))
(define (push-data! stacks data)
(set-prec-parse-data-stack! stacks (cons data (prec-parse-data-stack stacks))))
(define (push-op! stacks op)
(set-prec-parse-op-stack! stacks (cons op (prec-parse-op-stack stacks))))
(define (process-prec min-prec stacks)
(let [(op-stack (prec-parse-op-stack stacks))]
(cond ((not (null? op-stack))
(let [(op (car op-stack))]
(cond ((>= (cadr op) min-prec)
(apply-op op stacks)
(set-prec-parse-op-stack! stacks (cdr op-stack))
(process-prec min-prec stacks))))))))
(define (process-nonassoc min-prec stacks)
(let [(op-stack (prec-parse-op-stack stacks))]
(cond ((not (null? op-stack))
(let [(op (car op-stack))]
(cond ((> (cadr op) min-prec)
(apply-op op stacks)
(set-prec-parse-op-stack! stacks (cdr op-stack))
(process-nonassoc min-prec stacks))
((= (cadr op) min-prec) (error "multiply applied non-associative operator"))
))))))
(define (apply-op op stacks)
(let [(op-type (car op))]
(cond ((eq? op-type 'post)
(push-data! stacks `(,op ,(pop-data stacks) )))
(else ;assume infix
(let [(tos (pop-data stacks))]
(push-data! stacks `(,op ,(pop-data stacks) ,tos)))))))
(define (finish input min-prec stacks)
(process-prec min-prec stacks)
input
)
(define (post input min-prec stacks)
(if (null? input) (finish input min-prec stacks)
(let* [(cur (car input))
(input-type (car cur))]
(cond ((eq? input-type 'post)
(cond ((< (cadr cur) min-prec)
(finish input min-prec stacks))
(else
(process-prec (cadr cur)stacks)
(push-data! stacks (cons cur (list (pop-data stacks))))
(post (cdr input) min-prec stacks))))
(else (let [(handle-infix (lambda (proc-fn inc)
(cond ((< (cadr cur) min-prec)
(finish input min-prec stacks))
(else
(proc-fn (+ inc (cadr cur)) stacks)
(push-op! stacks cur)
(start (cdr input) min-prec stacks)))))]
(cond ((eq? input-type 'left) (handle-infix process-prec 0))
((eq? input-type 'right) (handle-infix process-prec 1))
((eq? input-type 'nonassoc) (handle-infix process-nonassoc 0))
(else error "post op, infix op or end of expression expected here"))))))))
;alters the stacks and returns the input
(define (start input min-prec stacks)
(if (null? input) (error "expression expected")
(let* [(cur (car input))
(input-type (car cur))]
(set! input (cdr input))
;pre could clearly work with new stacks, but could it reuse the current one?
(cond ((eq? input-type 'pre)
(let [(new-stack (prec-parse))]
(set! input (start input (cadr cur) new-stack))
(push-data! stacks
(cons cur (list (pop-data new-stack))))
;we might want to assert here that the cdr of the new stack is null
(post input min-prec stacks)))
((eq? input-type 'data)
(push-data! stacks cur)
(post input min-prec stacks))
((eq? input-type 'grouped)
(let [(new-stack (prec-parse))]
(start (cdr cur) MIN-PREC new-stack)
(push-data! stacks (pop-data new-stack)))
;we might want to assert here that the cdr of the new stack is null
(post input min-prec stacks))
(else (error "bad input"))))))
(define (op-parse input)
(let [(stacks (prec-parse))]
(start input MIN-PREC stacks)
(pop-data stacks)))
(define (main)
(op-parse (read)))
(main)
Here is a simple case recursive solution written in Java. Note it does not handle negative numbers but you can do add that if you want to:
public class ExpressionParser {
public double eval(String exp){
int bracketCounter = 0;
int operatorIndex = -1;
for(int i=0; i<exp.length(); i++){
char c = exp.charAt(i);
if(c == '(') bracketCounter++;
else if(c == ')') bracketCounter--;
else if((c == '+' || c == '-') && bracketCounter == 0){
operatorIndex = i;
break;
}
else if((c == '*' || c == '/') && bracketCounter == 0 && operatorIndex < 0){
operatorIndex = i;
}
}
if(operatorIndex < 0){
exp = exp.trim();
if(exp.charAt(0) == '(' && exp.charAt(exp.length()-1) == ')')
return eval(exp.substring(1, exp.length()-1));
else
return Double.parseDouble(exp);
}
else{
switch(exp.charAt(operatorIndex)){
case '+':
return eval(exp.substring(0, operatorIndex)) + eval(exp.substring(operatorIndex+1));
case '-':
return eval(exp.substring(0, operatorIndex)) - eval(exp.substring(operatorIndex+1));
case '*':
return eval(exp.substring(0, operatorIndex)) * eval(exp.substring(operatorIndex+1));
case '/':
return eval(exp.substring(0, operatorIndex)) / eval(exp.substring(operatorIndex+1));
}
}
return 0;
}
}
Algorithm could be easily encoded in C as recursive descent parser.
#include <stdio.h>
#include <ctype.h>
/*
* expression -> sum
* sum -> product | product "+" sum
* product -> term | term "*" product
* term -> number | expression
* number -> [0..9]+
*/
typedef struct {
int value;
const char* context;
} expression_t;
expression_t expression(int value, const char* context) {
return (expression_t) { value, context };
}
/* begin: parsers */
expression_t eval_expression(const char* symbols);
expression_t eval_number(const char* symbols) {
// number -> [0..9]+
double number = 0;
while (isdigit(*symbols)) {
number = 10 * number + (*symbols - '0');
symbols++;
}
return expression(number, symbols);
}
expression_t eval_term(const char* symbols) {
// term -> number | expression
expression_t number = eval_number(symbols);
return number.context != symbols ? number : eval_expression(symbols);
}
expression_t eval_product(const char* symbols) {
// product -> term | term "*" product
expression_t term = eval_term(symbols);
if (*term.context != '*')
return term;
expression_t product = eval_product(term.context + 1);
return expression(term.value * product.value, product.context);
}
expression_t eval_sum(const char* symbols) {
// sum -> product | product "+" sum
expression_t product = eval_product(symbols);
if (*product.context != '+')
return product;
expression_t sum = eval_sum(product.context + 1);
return expression(product.value + sum.value, sum.context);
}
expression_t eval_expression(const char* symbols) {
// expression -> sum
return eval_sum(symbols);
}
/* end: parsers */
int main() {
const char* expression = "1+11*5";
printf("eval(\"%s\") == %d\n", expression, eval_expression(expression).value);
return 0;
}
next libs might be useful:
yupana - strictly arithmetic operations;
tinyexpr - arithmetic operations + C math functions + one provided by user;
mpc - parser combinators
Explanation
Let's capture sequence of symbols that represent algebraic expression.
First one is a number, that is a decimal digit repeated one or more times.
We will refer such notation as production rule.
number -> [0..9]+
Addition operator with its operands is another rule.
It is either number or any symbols that represents sum "*" sum sequence.
sum -> number | sum "+" sum
Try substitute number into sum "+" sum that will be number "+" number which in turn could be expanded into [0..9]+ "+" [0..9]+ that finally could be reduced to 1+8 which is correct addition expression.
Other substitutions will also produce correct expression: sum "+" sum -> number "+" sum -> number "+" sum "+" sum -> number "+" sum "+" number -> number "+" number "+" number -> 12+3+5
Bit by bit we could resemble set of production rules aka grammar that express all possible algebraic expression.
expression -> sum
sum -> difference | difference "+" sum
difference -> product | difference "-" product
product -> fraction | fraction "*" product
fraction -> term | fraction "/" term
term -> "(" expression ")" | number
number -> digit+
To control operator precedence alter position of its production rule against others. Look at grammar above and note that production rule for * is placed below + this will force product evaluate before sum.
Implementation just combines pattern recognition with evaluation and thus closely mirrors production rules.
expression_t eval_product(const char* symbols) {
// product -> term | term "*" product
expression_t term = eval_term(symbols);
if (*term.context != '*')
return term;
expression_t product = eval_product(term.context + 1);
return expression(term.value * product.value, product.context);
}
Here we eval term first and return it if there is no * character after it this is left choise in our production rule otherwise - evaluate symbols after and return term.value * product.value this is right choise in our production rule i.e. term "*" product
Actually there's a way to do this without recursion, which allows you to go through the entire expression once, character by character. This is O(n) for time and space. It takes all of 5 milliseconds to run even for a medium-sized expression.
First, you'd want to do a check to ensure that your parens are balanced. I'm not doing it here for simplicity. Also, I'm acting as if this were a calculator. Calculators do not apply precedence unless you wrap an expression in parens.
I'm using two stacks, one for the operands and another for the operators. I increase the priority of the operation whenever I reach an opening '(' paren and decrease the priority whenever I reach a closing ')' paren. I've even revised the code to add in numbers with decimals. This is in c#.
NOTE: This doesn't work for signed numbers like negative numbers. Probably is just a simple revision.
internal double Compute(string sequence)
{
int priority = 0;
int sequenceCount = sequence.Length;
for (int i = 0; i < sequenceCount; i++) {
char s = sequence[i];
if (Char.IsDigit(s)) {
double value = ParseNextNumber(sequence, i);
numberStack.Push(value);
i = i + value.ToString().Length - 1;
} else if (s == '+' || s == '-' || s == '*' || s == '/') {
Operator op = ParseNextOperator(sequence, i, priority);
CollapseTop(op, numberStack, operatorStack);
operatorStack.Push(op);
} if (s == '(') { priority++; ; continue; }
else if (s == ')') { priority--; continue; }
}
if (priority != 0) { throw new ApplicationException("Parens not balanced"); }
CollapseTop(new Operator(' ', 0), numberStack, operatorStack);
if (numberStack.Count == 1 && operatorStack.Count == 0) {
return numberStack.Pop();
}
return 0;
}
Then to test this out:
Calculator c = new Calculator();
double value = c.Compute("89.8+((9*3)+8)+(9*2)+1");
Console.WriteLine(string.Format("The sum of the expression is: {0}", (float)value));
//prints out The sum of the expression is: 143.8
Pure javascript, no dependencies needed
I very like bart's answer.
and I do some modifications to read it easier, and also add support some function(and easily extend)
function Parse(str) {
try {
return parseExpr(str.replaceAll(" ", "")) // Implement? See full code.
} catch (e) {
alert(e.message)
}
}
Parse("123.45+3*22*4")
It can support as below
const testArray = [
// ๐Ÿ‘‡ Basic Test
["(3+5)*4", ""],
["123.45+3*22*4", ""],
["8%2", ""],
["8%3", ""],
["7/3", ""],
["2*pi*e", 2 * Math.atan2(0, -1) * Math.exp(1)],
["2**3", ""],
// ๐Ÿ‘‡ unary Test
["3+(-5)", ""],
["3+(+5)", ""],
// ๐Ÿ‘‡ Function Test
["pow{2,3}*2", 16],
["4*sqrt{16}", 16],
["round{3.4}", 3],
["round{3.5}", 4],
["((1+e)*3/round{3.5})%2", ((1 + Math.exp(1)) * 3 / Math.round(3.5)) % 2],
["round{3.5}+pow{2,3}", Math.round(3.5)+Math.pow(2,3)],
]
Full code
// ๐Ÿ‘‡ Main
(() => {
window.onload = () => {
const nativeConsoleLogFunc = window.console.error
window.console.error = (...data) => { // Override native function, just for test.
const range = document.createRange()
const frag = range.createContextualFragment(`<div>${data}</div>`)
document.querySelector("body").append(frag)
nativeConsoleLogFunc(...data)
}
// Add Enter event
document.querySelector(`input`).onkeyup = (keyboardEvent) => {
if (keyboardEvent.key === "Enter") {
const result = Parse(document.getElementById('expr').value)
if (result !== undefined) {
alert(result)
}
}
}
const testArray = [
// ๐Ÿ‘‡ Basic Test
["(3+5)*4", ""],
["123.45+3*22*4", ""],
["8%2", ""],
["8%3", ""],
["7/3", ""],
["2*pi*e", 2 * Math.atan2(0, -1) * Math.exp(1)],
["2**3", ""],
// ๐Ÿ‘‡ unary
["3+(-5)", ""],
["3+(+5)", ""],
// ๐Ÿ‘‡ Function Test
["pow{2,3}*2", 16],
["4*sqrt{16}", 16],
["round{3.4}", 3],
["round{3.5}", 4],
["((1+e)*3/round{3.5})%2", ((1 + Math.exp(1)) * 3 / Math.round(3.5)) % 2],
["round{3.5}+pow{2,3}", Math.round(3.5) + Math.pow(2, 3)],
// ๐Ÿ‘‡ error test
["21+", ValueMissingError],
["21+*", ParseError],
["(1+2", ParseError], // miss ")"
["round(3.12)", MissingParaError], // should be round{3.12}
["help", UnknownVarError],
]
for (let [testString, expected] of testArray) {
if (expected === "") {
expected = eval(testString) // Why don't you use eval instead of writing the function yourself? Because the browser may disable eval due to policy considerations. [CSP](https://content-security-policy.com/)
}
const actual = Parse(testString, false)
if (actual !== expected) {
if (actual instanceof Error && actual instanceof expected) {
continue
}
console.error(`${testString} = ${actual}, value <code>${expected}</code> expected`)
}
}
}
})()
// ๐Ÿ‘‡ Script
class UnknownVarError extends Error {
}
class ValueMissingError extends Error {
}
class ParseError extends Error {
}
class MissingParaError extends Error {
}
/**
* #description Operator
* #param {string} sign "+", "-", "*", "/", ...
* #param {number} precedence
* #param {"L"|"R"} assoc associativity left or right
* #param {function} exec
* */
function Op(sign, precedence, assoc, exec = undefined) {
this.sign = sign
this.precedence = precedence
this.assoc = assoc
this.exec = exec
}
const OpArray = [
new Op("+", 10, "L", (l, r) => l + r),
new Op("-", 10, "L", (l, r) => l - r),
new Op("*", 20, "L", (l, r) => l * r),
new Op("/", 20, "L", (l, r) => l / r),
new Op("%", 20, "L", (l, r) => l % r),
new Op("**", 30, "R", (l, r) => Math.pow(l, r))
]
const VarTable = {
e: Math.exp(1),
pi: Math.atan2(0, -1), // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Math/atan2
pow: (x, y) => Math.pow(x, y),
sqrt: (x) => Math.sqrt(x),
round: (x) => Math.round(x),
}
/**
* #param {Op} op
* #param {Number} value
* */
function Item(op, value = undefined) {
this.op = op
this.value = value
}
class Stack extends Array {
constructor(...items) {
super(...items)
this.push(new Item(new Op("", 0, "L")))
}
GetLastItem() {
return this[this.length - 1] // fast then pop // https://stackoverflow.com/a/61839489/9935654
}
}
function Cursor(str, pos) {
this.str = str
this.pos = pos
this.MoveRight = (step = 1) => {
this.pos += step
}
this.PeekRightChar = (step = 1) => {
return this.str.substring(this.pos, this.pos + step)
}
/**
* #return {Op}
* */
this.MoveToNextOp = () => {
const opArray = OpArray.sort((a, b) => b.precedence - a.precedence)
for (const op of opArray) {
const sign = this.PeekRightChar(op.sign.length)
if (op.sign === sign) {
this.MoveRight(op.sign.length)
return op
}
}
return null
}
}
/**
* #param {Cursor} cursor
* */
function parseVal(cursor) {
let startOffset = cursor.pos
const regex = /^(?<OpOrVar>[^\d.])?(?<Num>[\d.]*)/g
const m = regex.exec(cursor.str.substr(startOffset))
if (m) {
const {groups: {OpOrVar, Num}} = m
if (OpOrVar === undefined && Num) {
cursor.pos = startOffset + Num.length
if (cursor.pos > startOffset) {
return parseFloat(cursor.str.substring(startOffset, startOffset + cursor.pos - startOffset)) // do not use string.substr() // It will be removed in the future. https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Deprecated_and_obsolete_features#string_methods
}
}
if ("+-(".indexOf(OpOrVar) !== -1) {
cursor.pos++
switch (OpOrVar) {
case "+": // unary plus, for example: (+5)
return parseVal(cursor)
case "-":
return -(parseVal(cursor))
case "(":
const value = parseExpr(cursor)
if (cursor.PeekRightChar() === ")") {
cursor.MoveRight()
return value
}
throw new ParseError("Parsing error: ')' expected")
}
}
}
// ๐Ÿ‘‡ below is for Variable or Function
const match = cursor.str.substring(cursor.pos).match(/^[a-z_][a-z0-9_]*/i) // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match
if (match) {
// ๐Ÿ‘‡ Variable
const varName = match[0]
cursor.MoveRight(varName.length)
const bracket = cursor.PeekRightChar(1)
if (bracket !== "{") {
if (varName in VarTable) {
const val = VarTable[varName]
if (typeof val === "function") {
throw new MissingParaError(`${varName} is a function, it needs big curly brackets`)
}
return val
}
}
// ๐Ÿ‘‡ is function
const regex = /{(?<Para>[^{]*)}/gm
const m = regex.exec(cursor.str.substring(cursor.pos))
if (m && m.groups.Para !== undefined) {
const paraString = m.groups.Para
const para = paraString.split(',')
cursor.MoveRight(paraString.length + 2) // 2 = { + }
return VarTable[varName](...para)
}
throw new UnknownVarError(`unknown variable ${varName}`)
}
// ๐Ÿ‘‡ Handle Error
if (cursor.str.length === cursor.pos) { // example: 1+2+
throw new ValueMissingError(`Parsing error at end of string: value expected.`)
} else { // example: 1+2+*
throw new ParseError("Parsing error: unrecognized value")
}
}
/**
* #param {string|Cursor} expr
* */
function parseExpr(expr) {
const stack = new Stack()
const cursor = (expr instanceof Cursor) ? expr : new Cursor(expr, 0)
while (1) {
let rightValue = parseVal(cursor)
const op = cursor.MoveToNextOp() ?? new Op("", 0, "L")
while (
op.precedence < stack.GetLastItem().op.precedence ||
(op.precedence === stack.GetLastItem().op.precedence && op.assoc === 'L')) {
const lastItem = stack.pop()
if (!lastItem.op.exec) { // end reached
return rightValue
}
rightValue = lastItem.op.exec(lastItem.value, rightValue)
}
stack.push(new Item(op, rightValue))
}
}
function Parse(str, alertError = true) {
try {
return parseExpr(str.replaceAll(" ", ""))
} catch (e) {
if (alertError) {
alert(e.message)
return undefined
}
return e
}
}
<input type="text" id="expr" name="expr" placeholder="123.45+3*22*4">
<button onclick="const x = Parse(document.getElementById('expr').value); if(x != null) alert(x);">
Calculate!
</button>

Resources