Ruby:parslet for a system verilog interface parser - ruby

I am using Ruby::Parslet.
I am parsing a document similar to an SV interface, eg:
interface my_intf;
protocol validonly;
transmit [Bool] valid;
transmit [Bool] pipeid;
transmit [5:0] incr;
transmit [Bool] sample;
endinterface
Here is my parser:
class myParse < Parslet::Parser
rule(:lparen) { space? >> str('(') >> space? }
rule(:rparen) { space? >> str(')') >> space? }
rule(:lbox) { space? >> str('[') >> space? }
rule(:rbox) { space? >> str(']') >> space? }
rule(:lcurly) { space? >> str('{') >> space? }
rule(:rcurly) { space? >> str('}') >> space? }
rule(:comma) { space? >> str(',') >> space? }
rule(:semicolon) { space? >> str(';') >> space? }
rule(:eof) { any.absent? }
rule(:space) { match["\t\s"] }
rule(:whitespace) { space.repeat }
rule(:space?) { whitespace.maybe }
rule(:blank_line) { space? >> newline.repeat(1) }
rule(:newline) { str("\n") }
# Things
rule(:integer) { space? >> match('[0-9]').repeat(1).as(:int) >> space? }
rule(:identifier) { match['a-z'].repeat(1) }
rule(:intf_start) { space? >> str('interface') >> space? >> (match['a-zA-Z_'].repeat(1,1) >> match['[:alnum:]_'].repeat(0)).as(:intf_name) >> space? >> str(';') >> space? >> str("\n") }
rule(:protocol) { space? >> str('protocol') >> whitespace >> (str('validonly').maybe).as(:protocol) >> space? >> str(';') >> space? >> str("\n") }
rule(:bool) { lbox >> space? >> str('Bool').as(:bool) >> space? >> rbox }
rule(:transmit_width) { lbox >> space? >> match('[0-9]').repeat.as(:msb) >> space? >> str(':') >> space? >> match('[0-9]').repeat.as(:lsb) >> space? >> rbox }
rule(:transmit) { space? >> str('transmit') >> whitespace >> (bool | transmit_width) >> whitespace >> (match['a-zA-Z_'].repeat(1,1) >> match['[:alnum:]_'].repeat(0)).as(:transmit_name) >> space? >> str(';') >> space? >> str("\n") }
rule(:interface_body) { (protocol | blank_line.maybe) }
rule(:interface) { intf_start >> interface_body }
rule(:expression) { ( interface ).repeat }
root :expression
end
I am having an issue making the rule for interface_body.
It can have 0 or more transmit lines and 0 or 1 protocol line and multiple blanks, comments etc.
Can someone help me out please? The rules I have written in the code snippet works with single transmit and single protocol, i.e. they properly match, but when I parse a whole interface it does not work.
Thanks in advance.

Ok... this parses the file you mentioned. I don't understand the desired format so I can't say it will work for all your files, but hopefully this will get you started.
require 'parslet'
class MyParse < Parslet::Parser
rule(:lparen) { space? >> str('(') }
rule(:rparen) { space? >> str(')') }
rule(:lbox) { space? >> str('[') }
rule(:rbox) { space? >> str(']') }
rule(:lcurly) { space? >> str('{') }
rule(:rcurly) { space? >> str('}') }
rule(:comma) { space? >> str(',') }
rule(:semicolon) { space? >> str(';') }
rule(:eof) { any.absent? }
rule(:space) { match["\t\s"] }
rule(:whitespace) { space.repeat(1) }
rule(:space?) { space.repeat(0) }
rule(:blank_line) { space? >> newline.repeat(1) }
rule(:newline) { str("\n") }
# Things
rule(:integer) { space? >> match('[0-9]').repeat(1).as(:int) >> space? }
rule(:identifier) { match['a-z'].repeat(1) }
def line( expression )
space? >>
expression >>
space? >>
str(';') >>
space? >>
str("\n")
end
rule(:expression?) { ( interface ).repeat(0) }
rule(:interface) { intf_start >> interface_body.repeat(0) >> intf_end }
rule(:interface_body) {
intf_end.absent? >>
interface_bodyline >>
blank_line.repeat(0)
}
rule(:intf_start) {
line (
str('interface') >>
space? >>
( match['a-zA-Z_'].repeat(1,1) >>
match['[:alnum:]_'].repeat(0)).as(:intf_name)
)
}
rule(:interface_bodyline) {
line ( protocol | transmit )
}
rule(:protocol) {
str('protocol') >> whitespace >>
(str('validonly').maybe).as(:protocol)
}
rule(:transmit) {
str('transmit') >> whitespace >>
(bool | transmit_width) >> whitespace >>
name.as(:transmit_name)
}
rule(:name) {
match('[a-zA-Z_]') >>
(match['[:alnum:]'] | str("_")).repeat(0)
}
rule(:bool) { lbox >> str('Bool').as(:bool) >> rbox }
rule(:transmit_width) {
lbox >>
space? >>
match('[0-9]').repeat(1).as(:msb) >>
space? >>
str(':') >>
space? >>
match('[0-9]').repeat(1).as(:lsb) >>
space? >>
rbox
}
rule(:intf_end) { str('endinterface') }
root :expression?
end
require 'rspec'
require 'parslet/rig/rspec'
RSpec.describe MyParse do
let(:parser) { MyParse.new }
context "simple_rule" do
it "should consume protocol line" do
expect(parser.interface_bodyline).to parse(' protocol validonly;
')
end
it 'name' do
expect(parser.name).to parse('valid')
end
it "bool" do
expect(parser.bool).to parse('[Bool]')
end
it "transmit line" do
expect(parser.transmit).to parse('transmit [Bool] valid')
end
it "transmit as bodyline'" do
expect(parser.interface_bodyline).to parse(' transmit [Bool] valid;
')
end
end
end
RSpec::Core::Runner.run(['--format', 'documentation'])
begin
doc = File.read("test.txt")
MyParse.new.parse(doc)
rescue Parslet::ParseFailed => error
puts error.cause.ascii_tree
end
The main changes...
Don't consume whitespace both side of your tokens.
You had expressions that parsed "[Bool] valid" as LBOX BOOL RBOX SPACE? then expected another WHITESPACE but couldn't find one (as the previous rule had consumed it).
When an expression can validly parse as a zero length (e.g. something with repeat(0)) and there is a problem with who it's written, then you get an odd error. The rule pass and match nothing, then the next rule will typically fail. I explicitly matched 'body lines' as 'not the end line' so it would fail with an error.
'repeat' defaults to (0) which I would love to change. I see mistakes around this all the time.
x.repeat(1,1) means make one match. That's the same as having x. :)
there were more whitespace problems
so....
Write your parser from the top down. Write tests from the bottom up.
When your tests get to the top you are done! :)
Good luck.

Related

shell script,how to print the newline in default position always?

i am using tput sc/rc/ed and printf '\E[n<A|B|C|D>'|printf '\E[y;xH',here two ex:
tty_esc(){ printf "\e[%s" "$1"; }
tty_cursor_locate(){ tty_esc "${2};${1}H"; }
tty_cursor_right(){ tty_esc ${1}C; }
print_center()
{
local _width=$(tput cols)
local _str=$1
local _row=$2
local _cols=$((((${_width} - ${#_str})) / 2))
tty_cursor_locate ${_cols:-100} ${_row:-1}
printf "%s\n" " ${_str} "
}
show_net_adapter()
{
local _addr _iface _count
local _origin=$1
iface_line_count=
tty_cursor_locate ${_origin:-0} 4
printf "%s\n" "Current connected adapter(s):"
for _iface in $(get_net_adapter);do
if [[ "${_iface}" != "lo" ]];then
_addr=$(get_net_addr ${_iface})
test -z "${_addr}" && continue
let _count+=1
let iface_line_count+=1
if [[ ${_count} != 1 ]];then
unset _count
printf '%s' "${tty_rever}"
fi
tty_cursor_right ${_origin:-0}
print_fill 50 ${_iface} ${_addr:--}
printf "${tty_reset}"
fi
done
print_line -s "=" ${line_origin}
}
as above, I should locate the cursor before I print something.
BTW
I use trap "myfunc" WINCH, it only works once. when I try again to change my crt. size, it doesn't work.
Not really sure what you mean by in default position always, either intentations or overwrite the text but the following function can do both.
#!/bin/sh
preent () { # $1=indent start line, $2=spaces, $3=reset cursor, $4=update (1=itself, 2=delete previous lines, 0=newline), $5=save cursor, $6=text
if text="$6""$7""$8""$9""${10}" awk -v col="$COLUMNS" -v cp="$_CURSOR_POS" -v id="$1" -v spc="$2" -v rc="$3" -v u="$4" -v sc="$5" '
BEGIN { t=ENVIRON["text"]; lt=sprintf("%d",col/4); idx=1; len=length(t); y=0; sp=sprintf("% " spc "s",""); delete A;
for(ln=1;idx<len;ln++) { if(ln==id) { col=col-spc; y=1 } bd=col-lt; f=0
for(i=idx+col-1;i>bd;i--) { c=substr(t,i,1);
if(c==" ") {
if(y) { A[ln]=sprintf("%s%s",sp,substr(t,idx,i-idx)); idx=i+1; f=1; break
} else { A[ln]=substr(t,idx,i-idx); idx=i+1; f=1; break }
} else if(c=="") {
if(y) { A[ln]=sprintf("%s%s",sp,substr(t,idx,i-idx)); idx=i; f=1; break
} else { A[ln]=substr(t,idx,i-idx); idx=i; f=1; break; }
}
}
if(!f) {
if(y) { A[ln]=sprintf("%s%s",sp,substr(t,idx,col)); idx=idx+col
} else { A[ln]=substr(t,idx,col); idx=idx+col }
}
} if(rc=="true") cp=0;
if(u=="1") { for(i=1;i<ln;i++) printf("\x1B[1A\x1B[K"); cp=cp+1-i
} else if(u=="2") { for(i=0;i<cp;i++) printf("\x1B[1A\x1B[K"); cp=0 }
for(i=1;i<ln;i++) printf("%s\n",A[i]);
if(sc=="true") { exit ln-1+cp } else { exit 0 }
}'; then _CURSOR_POS="$?"; else _CURSOR_POS="$?"; fi
}
Example 1 (Long text, 5 spaces indent, indent starts from 2nd line)
preent 2 5 'false' 0 'false' 'You never say good-bye Handong-an monghani udukoni anja Dashi saenggakhatjiman ' \
'Momchul sun optkesso Ontong kudae saenggak hal subakke omnun ' \
"Nae jashini miwo Don't you let me go Baby don't you let me down"
Output
You never say good-bye Handong-an monghani
udukoni anja Dashi saenggakhatjiman Momchul
sun optkesso Ontong kudae saenggak hal
subakke omnun Nae jashini miwo Don't you
let me go Baby don't you let me down
Example 2 (Long text with no spaces)
preent 2 5 'false' 0 'false' 'Youneversaygood-byeHandong-anmonghaniudukonianjaDashisaenggakhatjiman' \
'MomchulsunoptkessoOntongkudaesaenggakhalsubakkeomnun' \
"NaejashinimiwoDon'tyouletmegoBabydon'tyouletmedown"
Output
Youneversaygood-byeHandong-anmonghaniudukonianjaD
ashisaenggakhatjimanMomchulsunoptkessoOntong
kudaesaenggakhalsubakkeomnunNaejashinimiwoDo
n'tyouletmegoBabydon'tyouletmedown

SystemStackError: when parsing SCIM 2.0 filter query using Parslet

I am writing a SCIM 2.0 filter parser using Parslet. When I try to parse the following query, I end up with a SystemStackError.
'title pr or userType eq "Intern"'
I have converted the ABNF notation from https://www.rfc-editor.org/rfc/rfc7644#page-21 into a Parslet parser shown in the example code.
class Filter < Parslet::Parser
root :filter
# FILTER = attrExp / logExp / valuePath / *1"not" "(" FILTER ")"
rule(:filter) do
attribute_expression | logical_expression | value_path | not_op >> lparen >> filter >> rparen
end
# valuePath = attrPath "[" valFilter "]" ; FILTER uses sub-attributes of a parent attrPath
rule(:value_path) do
attribute_path.as(:attribute) >> lbracket >> value_filter >> rbracket
end
# valFilter = attrExp / logExp / *1"not" "(" valFilter ")"
rule(:value_filter) do
attribute_expression | logical_expression | not_op >> lparen >> value_filter >> rparen
end
# attrExp = (attrPath SP "pr") / (attrPath SP compareOp SP compValue)
rule(:attribute_expression) do
(attribute_path.as(:attribute) >> space >> presence) | attribute_path.as(:attribute) >> space >> comparison_operator.as(:comparison_operator) >> space >> comparison_value.as(:comparison_value)
end
# logExp = FILTER SP ("and" / "or") SP FILTER
rule(:logical_expression) do
filter >> space >> (and_op | or_op) >> space >> filter
end
# compValue = false / null / true / number / string ; rules from JSON (RFC 7159)
rule(:comparison_value) do
falsey | null | truthy | number | string
end
# compareOp = "eq" / "ne" / "co" / "sw" / "ew" / "gt" / "lt" / "ge" / "le"
rule(:comparison_operator) do
equal | not_equal | contains | starts_with | ends_with |
greater_than | less_than | less_than_equals | greater_than_equals
end
# attrPath = [URI ":"] ATTRNAME *1subAttr ; SCIM attribute name ; URI is SCIM "schema" URI
rule(:attribute_path) do
(uri >> colon).repeat(0, 1) >> attribute_name >> sub_attribute.repeat(0, 1)
end
# ATTRNAME = ALPHA *(nameChar)
rule(:attribute_name) do
alpha >> name_character.repeat(0, nil)
end
# nameChar = "-" / "_" / DIGIT / ALPHA
rule(:name_character) { hyphen | underscore | digit | alpha }
# subAttr = "." ATTRNAME ; a sub-attribute of a complex attribute
rule(:sub_attribute) { dot >> attribute_name }
# uri = 1*ALPHA 1*(":" 1*ALPHA)
rule(:uri) do
# alpha.repeat(1, nil) >> (colon >> (alpha.repeat(1, nil) | version)).repeat(1, nil)
str('urn:ietf:params:scim:schemas:') >> (
str('core:2.0:User') |
str('core:2.0:Group') | (
str('extension') >>
colon >>
alpha.repeat(1) >>
colon >>
version >>
colon >>
alpha.repeat(1)
)
)
end
rule(:presence) { str('pr').as(:presence) }
rule(:and_op) { str('and').as(:and) }
rule(:or_op) { str('or').as(:or) }
rule(:not_op) { str('not').repeat(0, 1).as(:not) }
rule(:falsey) { str('false').as(:false) }
rule(:truthy) { str('true').as(:true) }
rule(:null) { str('null').as(:null) }
rule(:number) do
str('-').maybe >> (
str('0') | (match('[1-9]') >> digit.repeat)
) >> (
str('.') >> digit.repeat(1)
).maybe >> (
match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
).maybe
end
rule(:equal) { str('eq') }
rule(:not_equal) { str('ne') }
rule(:contains) { str('co') }
rule(:starts_with) { str('sw') }
rule(:ends_with) { str('ew') }
rule(:greater_than) { str('gt') }
rule(:less_than) { str('lt') }
rule(:greater_than_equals) { str('ge') }
rule(:less_than_equals) { str('le') }
rule(:string) do
quote >> (str('\\') >> any | str('"').absent? >> any).repeat >> quote
end
rule(:lparen) { str('(') }
rule(:rparen) { str(')') }
rule(:lbracket) { str('[') }
rule(:rbracket) { str(']') }
rule(:digit) { match('\d') }
rule(:quote) { str('"') }
rule(:single_quote) { str("'") }
rule(:space) { match('\s') }
rule(:alpha) { match['a-zA-Z'] }
rule(:dot) { str('.') }
rule(:colon) { str(':') }
rule(:hyphen) { str('-') }
rule(:underscore) { str('_') }
rule(:version) { digit >> dot >> digit }
end
I am expecting that Parslet should be able to handle the recursion safely instead of raising a SystemStackError. The base atom sets the def cached? to return true by default. If I monkey patch the Atom base class to return false from the cached? method, then I am able to parse this query. I am not sure what I am doing wrong.
Always consume something before you recurse.
For example:
Don't define a list of numbers as
NumList = NumList >> "," >> NumList | Number
Defined it as
NumList = Number >> ("," >> NumList).maybe
or even
NumList = Number >> ("," >> Number).repeat(0)
So for logican_expression...
# logExp = FILTER SP ("and" / "or") SP FILTER
rule(:logical_expression) do
filter >> space >> (and_op | or_op) >> space >> filter
end
You need the first filter to be something that can't be a logical_expression.
# FILTER = attrExp / logExp / valuePath / *1"not" "(" FILTER ")"
rule(:filter) do
logical_expression | filter_atom
end
rule(:filter_atom) do
(not_op? >> lparen >> filter >> rparen) | attribute_expression | value_path
end
# logExp = FILTER SP ("and" / "or") SP FILTER
rule(:logical_expression) do
filter_atom >> space >> (and_op | or_op) >> space >> filter
end

Boost Spirit failing on empty string input

I am trying to parse the following string and extract the parts inside the parenthesis.
This string fails:
_FIND('Something', '')_
Should return
part1 = 'Something'
part2 = ''
This string passes:
_FIND('Something', '*')_
Returns
part1 = 'Something'
part2 = '*'
I assume the problem lies with the "quoted_string"
find_parser() : find_parser::base_type(start)
{
using qi::lit;
using qi::lexeme;
using standard_wide::char_;
/// simple quoted string.
quoted_string %= lexeme['\'' >> +(char_ - '\'') >> '\''];
start %=
-(lit("$(")) // optional
>> lit("_FIND")
>> '('
>> quoted_string
>> -(',' >> quoted_string) // 2nd parameter optional
>> ")_"
>> -(lit(")")) // optional
;
}
I tried added an "empty" string lexeme like this, but it does not work.
quoted_string %= lexeme['\'' >> +(char_ - '\'') >> '\''];
empty_quoted_string %= lexeme['\'' >> +(qi::space - '\'') >> '\''];
start %=
lit("_FIND")
>> '('
>> (quoted_string|empty_quoted_string)
>> -(',' >> (quoted_string|empty_quoted_string)) // 2nd parameter optional
>> ")_"
;
I know it must be a simple thing, but I cannot put my finger on it.
Thanks for any inputs, hints or tips.
lexeme['\'' >> +(char_ - '\'') >> '\''];
+p means that p must match one-or-more times. If an empty string must be accepted, use the Kleene-star operator, which allows zero-or-more matches.
lexeme['\'' >> *(char_ - '\'') >> '\''];
Live Demo
Some inefficiencies/style issues resolves
Also, an incorrectness, where "$(_FIND('')" or "_FIND('')" would parse as "correct"
Live On Coliru
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
using Params = std::pair<std::string, std::string>;
namespace qi = boost::spirit::qi;
template <typename It>
struct find_parser : qi::grammar<It, Params()> {
find_parser() : find_parser::base_type(start)
{
using namespace qi;
start = skip(space) [ "$(" >> find >> ")" | find ];
find
= '_' >> lit("FIND") >> lit('(')
>> quoted_string >> -(',' >> quoted_string) // 2nd parameter optional
>> ')' >> '_'
;
quoted_string = "'" >> *~char_("'") >> "'";
BOOST_SPIRIT_DEBUG_NODES((start)(find)(quoted_string))
}
private:
qi::rule<It, Params()> start;
// rules with skipper
qi::rule<It, Params(), qi::space_type> find;
// implicit lexemes
qi::rule<It, std::string()> quoted_string;
};
int main() {
using It = std::string::const_iterator;
find_parser<It> const p;
for (std::string const input : {
"_FIND('Something', 'Something else')_",
"_ FIND('Something', 'Something else') _",
"$(_FIND('Something', 'Something else')_)",
"$( _FIND( 'Something', 'Something else' )_ )",
// second arg empty
"_FIND('Something', '')_",
"_ FIND('Something', '') _",
"$(_FIND('Something', '')_)",
"$( _FIND( 'Something', '' )_ )",
// optional args omitted
"_FIND('Something')_",
"_ FIND('Something') _",
"$(_FIND('Something')_)",
"$( _FIND( 'Something' )_ )",
})
{
std::cout << "-------- " << input << " ------------\n";
It f = input.begin(), l = input.end();
Params parsed;
if (parse(f, l, p, parsed))
std::cout << "Parsed: '" << parsed.first << "', '" << parsed.second << "'\n";
else
std::cout << "Parsing failed\n";
if (f!=l)
std::cout << " -- Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}

How to test for function return false?

Can someone tell me what
if(!IsNumeric(aNumber))
{
do something
}
is in valid VBScript? I've tried
!IsNumeric(aNumber)
already.
The logical negation operator is called Not in VBScript:
>> b = 1 = 1
>> WScript.Echo TypeName(b), CStr(b), CStr(Not b)
>>
Boolean True False
>>
2nd sample:
>> For Each e In Split("1 a 2")
>> If Not IsNumeric(e) Then
>> WScript.Echo e, "not a number"
>> Else
>> WScript.Echo e, "a numerical string"
>> End If
>> Next
>>
1 a numerical string
a not a number
2 a numerical string
>>

Ruby parse through string

I have a string that looks like below, and I have to remove everything between the first bracket and the last bracket. All bets are off, on what's in between (regarding other brackets). What would be the best aproach, thanks.
'[
{ "foo":
{"bar":"foo",
"bar": {
["foo":"bar", "foo":"bar"]
}
}
}
],
"foo":"bar","foo":"bar"'
result:
',
"foo":"bar","foo":"bar"'
If your data really does look like that and you don't have an brackets in the bit at the end then:
s.gsub(/\[.*\]/m, '')
If you want to be a little more paranoid, then you can look for ], followed by an end-of-line:
s.gsub(/\[.*\],$/m, ',')
Hard to say any more than that without a specification of your data format.
Here you go:
string.gsub(/\[.*\]/m, '')
You need to use the m flag for the . to match newline characters. .* is already greedy, so it will match any number of brackets in between.
It's difficult to tell what you're trying to achieve, but that looks like JSON to me so it would probably be much easier to parse it and then manipulate it that way.
you need multi-line mode:
str.gsub(/\[.*\]/m, '')
You could use something like Parslet to write a parser.
Here's an example I wrote, based on the JSON grammer from http://www.json.org/
require 'parslet'
#This needs a few more 'as' calls to annotate the output
class JSONParser < Parslet::Parser
rule(:space) { match('[\s\n]').repeat(1)}
rule(:space?) { space.maybe }
rule(:digit) { match('[0-9]') }
rule(:hexdigit) { match('[0-9a-fA-F]') }
rule(:number) { space? >> str('-').maybe >>
(str('0') | (match('[1-9]') >> digit.repeat)) >>
(str('.') >> digit.repeat).maybe >>
((str('e')| str('E')) >> (str('+')|str('-')).maybe >> digit.repeat ).maybe }
rule(:escaped_character) { str('\\') >> (match('["\\\\/bfnrt]') | (str('u') >> hexdigit.repeat(4,4))) }
rule(:string) { space? >> str('"') >> (match('[^\"\\\\]') | escaped_character).repeat >> str('"') }
rule(:value) { space? >> (string | number | object | array | str('true') | str('false') | str('null')) }
rule(:pair) { string >> str(":") >> value }
rule(:pair_list) { pair >> (space? >> str(',') >> pair).repeat }
rule(:object) { str('{') >> space? >> pair_list.maybe >> space? >> str('}') }
rule(:value_list) { value >> (space? >> str(',') >> value).repeat }
rule(:array) { space? >> str('[') >> space? >> value_list.maybe >> space? >> str(']') >> space?}
rule(:json) { value.as('value') >> (space? >> str(',') >> value.as('value')).repeat }
root(:json)
end
# I've changed your doc to be a list of JSON values
doc = '[
{ "foo":
{"bar":"foo",
"bar": [
{"foo":"bar", "foo":"bar"}
]
}
}
],
{"foo":"bar"},{"foo":"bar"}'
puts JSONParser.new.parse(doc)[1..-1].map{|value| value["value"]}.join(",")
# => {"foo":"bar"},{"foo":"bar"}
However as your document isn't valid JSON (as far as I know).. then you can change the above...
require 'parslet'
class YourFileParser < Parslet::Parser
rule(:space) { match('[\s\n]').repeat(1)}
rule(:space?) { space.maybe }
rule(:digit) { match('[0-9]') }
rule(:hexdigit) { match('[0-9a-fA-F]') }
rule(:number) { space? >> str('-').maybe >>
(str('0') | (match('[1-9]') >> digit.repeat)) >>
(str('.') >> digit.repeat).maybe >>
((str('e')| str('E')) >> (str('+')|str('-')).maybe >> digit.repeat ).maybe }
rule(:escaped_character) { str('\\') >> (match('["\\\\/bfnrt]') | (str('u') >> hexdigit.repeat(4,4))) }
rule(:string) { space? >> str('"') >> (match('[^\"\\\\]') | escaped_character).repeat >> str('"') }
rule(:value) { space? >> (string | number | object | array | str('true') | str('false') | str('null')) }
rule(:pair) { string >> str(":") >> value }
rule(:pair_list) { (pair|value) >> (space? >> str(',') >> (pair|value)).repeat }
rule(:object) { str('{') >> space? >> pair_list.maybe >> space? >> str('}') }
rule(:value_list) { (pair|value) >> (space? >> str(',') >> (pair|value)).repeat }
rule(:array) { space? >> str('[') >> space? >> value_list.maybe >> space? >> str(']') >> space?}
rule(:yourdoc) { (pair|value).as('value') >> (space? >> str(',') >> (pair|value).as('value')).repeat }
root(:yourdoc)
end
doc = '[
{ "foo":
{"bar":"foo",
"bar": {
["foo":"bar", "foo":"bar"]
}
}
}
],
"foo":"bar","foo":"bar"'
puts YourFileParser.new.parse(doc)[1..-1].map{|value| value["value"]}.join(",")

Resources