Suppose:
The events are A perceived, B perceived or Ping perceived.
A possible sequence of events could be A,A,A,B,Ping.
The states are InA, InB, PingMissing.
The rules are
No Ping in all events -> PingMissing.
A -> InA
B -> InB
(Only Ping events -> InA)
I would like to have one recommended action/ state.
I see three possibilities for the transition function f(s,e)->s:
Create a pseudo event likePingMissing perceived. Hence everything is in one function.
Two separate transition functions and combining the result.
One transition function with two states as a tuple and combining the result.
Any thoughts? Best practices?
Implementation of 2. in F# (language doesn't really matter):
type Event =
| A
| B
| Ping
type State1 =
| InA
| InB
type State2 =
| PingReceived
| PingMissing
type StateCombined =
| InA'
| InB'
| PingMissing'
let f1 s e :State1 =
match s,e with
| _, A -> InA
| _, B -> InB
| _, _ -> s
let f2 s e :State2 =
match s,e with
| _, Ping -> PingReceived
| _, _ -> s
let fCombined events =
let finalState1 = events |> Seq.fold f1 InA
let finalState2 = events |> Seq.fold f2 PingMissing
match finalState1, finalState2 with
| _, PingMissing -> PingMissing'
| InA, _ -> InA'
| InB, _ -> InB'
fCombined [A;A;A;B]
// PingMissing'
fCombined [A;A;A;B;Ping]
// InB'
I would tend to model the unified state as a tuple of the two substates (broadly in this case: "has a ping been received" and "if a ping has been received, was the last perception an A or a B"). A convenience function can then distill that into a recommendation.
This has the advantage of not reusing the sequence of events, so is a bit more compatible with a view of the events as a stream: at the very least this results in not having to refetch the events from an event store or keep the entire sequence of events in memory.
For example, in Scala (and explicitly modeling the situation where no A nor B has been perceived yet):
sealed trait Event
case object A extends Event
case object B extends Event
case object Ping extends Event
sealed trait PingState
case object PingReceived extends Event // Don't strictly need...
case object PingMissing extends Event
sealed trait LastPerceivedState
case object InA extends Event
case object InB extends Event
// ... could just as well be (Option[PingMissing], Option[LastPerceivedState])...
type State = (PingState, Option[LastPerceivedState])
// ... in which case, this is (Some(PingMissing), None)
val InitialState = PingMissing -> None
def distilledState(state: State): Either[PingMissing, Option[LastPerceivedState]] =
state match {
case (PingMissing, _) => Left(PingMissing)
case (_, lpsOpt) => Right(lpsOpt)
}
The transition function could then be written directly (taking advantage of the fact that the events can be partitioned into events which affect PingState or LastPerceivedState but never both):
val transitionF = { (state: State, evt: Event) =>
val (ps, lpsOpt) = state
evt match {
case A => ps -> Some(InA)
case B => ps -> Some(InB)
case Ping => PingReceived -> lpsOpt
}
}
In the event that there are events which affect both, then decomposing into subhandlers might simplify the code (at the expense of some possibly redundant invocations):
val pingStateTransition = { (ps: PingState, evt: Event) =>
if (ps == PingReceived) PingReceived
else if (evt == Ping) PingReceived
else ps
}
val lastPerceivedStateTransition = { (lpsOpt: Option[LastPerceivedState], evt: Event) =>
evt match {
case A => Some(InA)
case B => Some(InB)
case _ => lpsOpt
}
}
val transitionF = { (state: State, evt: Evt) =>
pingStateTransition(state._1, evt) -> lastPerceivedStateTransition(state._2, evt)
}
What are the current community preferred libraries to parse and work with YAML and how do you use them to serialize and deserialize a record like this:
type book = {
title: string;
authors: string list
}
This is how I got string -> record and back.
$ opam update
$ opam install yaml ppx_deriving_yaml
Update dune with the preprocess clause:
; `dune` file
(executable
(name main)
(libraries yaml)
(preprocess
(pps ppx_deriving_yaml)))
Short version:
let serialize_book (book_rec : book) : (string, string) result =
let res = Yaml.to_string (book_to_yaml book_rec) in
map_error ~f:(fun (`Msg m) -> m) res
let deserialize_book (book_str : string) : (book, string) result =
let res =
Yaml.of_string book_str >>= fun yaml_value -> book_of_yaml yaml_value
in
map_error ~f:(fun (`Msg m) -> m) res
More verbose/descriptive version:
(* Define a record *)
(* `[##deriving yaml]` generates a bunch of functions, one being `book_to_yaml` to convert the record into a Yaml type, another `book_of_yaml` to convert Yaml type to record *)
type book = {
title: string;
authors: string list
} [##deriving yaml]
let serialize =
let (v: book) = { title = "Cryptonomicon"; authors = [ "Neal Stephenson" ] } in
(* `book_to_yaml` converts from record to `yaml res` where res is a Result *)
let yaml_structure = book_to_yaml v in
(* `to_string` converts from a `yaml` type'ed data structure to string *)
match Yaml.to_string yaml_structure with
| Ok s ->
print_endline ("Serialize:");
print_endline (s)
| Error (`Msg e) -> print_endline e
let deserialize =
let str = "title: Cryptonomicon\nauthors:\n- Neal Stephenson" in
(* `of_string converts from string to a `yaml res` data structure, where `res` is Result *)
match Yaml.of_string str with
| Ok yaml_value ->
(* `book_of_yaml` is generated by `[##deriving yaml]` *)
(* `book_of_yaml` converts from `yaml` type to `book res` where res is Result *)
(match book_of_yaml yaml_value with
| Ok t ->
print_endline ("Deserialize:");
print_endline ("Title: " ^ t.title);
print_endline ("Authors: " ^ String.concat ", " t.authors);
| Error `Msg e -> print_endline ("Error - convert to book: " ^ e))
| Error `Msg e -> print_endline ("Error - parsing: " ^ e)
In the Protocol Buffers Version 3 Language Specification
The EBNF syntax for an option is
option = "option" optionName "=" constant ";"
optionName = ( ident | "(" fullIdent ")" ) { "." ident }
constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | strLit | boolLit
ident = letter { letter | decimalDigit | "_" }
fullIdent = ident { "." ident }
strLit = ( "'" { charValue } "'" ) | ( '"' { charValue } '"' )
charValue = hexEscape | octEscape | charEscape | /[^\0\n\\]/
hexEscape = '\' ( "x" | "X" ) hexDigit hexDigit
octEscape = '\' octalDigit octalDigit octalDigit
charEscape = '\' ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | '\' | "'" | '"' )
Or in plain English, an option may be assigned a dotted.notation.identifier, an integer, a float, a boolean, or a single- or double-quoted string, which MUST NOT have "raw" newline characters.
And yet, I'm encountering .proto files in various projects such as grpc-gateway and googleapis, where the rhs of the assignment is not quoted and spans multiple lines. For example in googleapis/google/api/http.proto there is this service definition in a comment block:
// service Messaging {
// rpc UpdateMessage(Message) returns (Message) {
// option (google.api.http) = {
// patch: "/v1/messages/{message_id}"
// body: "*"
// };
// }
// }
In other files, the use of semicolons (and occasionally commas) as separators seems somewhat arbitrary, and I have also seen keys repeated, which in JSON or JavaScript would result in loss of data due to overwriting.
Are there any canonical extensions to the language specification, or are people just Microsofting? (Yes, that's a verb now.)
I posted a similar question on the Protocol Buffers Google Group, and received a private message from a fellow at Google stating the following
This syntax is correct and valid for setting fields on a proto option field which is itself a field referencing a message type. This form is based on the TextFormat spec which I'm unclear if its super well documented, but here's an implementation of it: https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.text_format
When I have time, I will try to unpack what I learn from analyzing TextFormat.
update
I received an answer on the Groups forum
I think for better or worse, "what protoc implements" takes precedence over whatever the spec says. The spec came later and as far as I know we have not put a lot of effort into ensuring that it comprehensively matches the format that protoc expects. I believe the syntax you are looking at is missing from the .proto file format spec but is mentioned here as the "aggregate syntax."
The link above is to a section titled Custom Options in the Language Guide (proto2) page. If you scroll all the way to the end of that section, there is the following snippet that mentions TextFormat:
message FooOptions {
optional int32 opt1 = 1;
optional string opt2 = 2;
}
extend google.protobuf.FieldOptions {
optional FooOptions foo_options = 1234;
}
// usage:
message Bar {
optional int32 a = 1 [(foo_options).opt1 = 123, (foo_options).opt2 = "baz"];
// alternative aggregate syntax (uses TextFormat):
optional int32 b = 2 [(foo_options) = { opt1: 123 opt2: "baz" }];
}
I've been trying to change match_char function to accept only JSON messages when reading data from a socket.
I have 2 implementations (one does not work and the other one works but I don't think it's efficient).
1- First approach (working)
typedef boost::asio::buffers_iterator<boost::asio::streambuf::const_buffers_type> buffer_iterator;
static std::pair<buffer_iterator, bool> match_json2(const buffer_iterator begin,
const buffer_iterator end) {
buffer_iterator i = begin;
while (i != end) {
if ((*i == ']') || (*i == '}')) {
return std::make_pair(i, true);
}
*i++;
}
return std::make_pair(i, false);
}
With this definition, I read in a loop and reconstruct the json. This is a working version, but if I receive a message different from a valid json, I stay in the loop, can't clear tmp_response and never recover from it...
std::string read_buffer_string() {
std::string response;
bool keepReading = true;
while (keepReading) {
std::string tmp_response;
async_read_until(s, ba::dynamic_buffer(tmp_response), match_json2, yc);
if (!tmp_response.empty()) {
response += tmp_response;
if (nlohmann::json::accept(response)) {
keepReading = false;
}
}
}
return response;
}
Second approach (not working). Ideally I would like something like this one (this implementation doesn't work because begin iterator doesn't always point to the start of the message - I guess some data is already been transferred to the buffer-, and therefore match_json returns invalid values.
static std::pair<buffer_iterator, bool> match_json(const buffer_iterator begin,
const buffer_iterator end) {
buffer_iterator i = begin;
while (i != end) {
if ((*i == ']') || (*i == '}')) {
std::string _message(begin, i);
std::cout << _message << std::endl;
if (nlohmann::json::accept(_message)) {
return std::make_pair(i, true);
}
}
*i++;
}
return std::make_pair(i, false);
}
And then call it like this:
std::string read_buffer_string() {
std::string response;
async_read_until(s, ba::dynamic_buffer(response), match_json, yc);
return response;
}
Does anybody now a more efficient way to do it?
Thanks in advance! :)
Of course, right after posting my other answer I remembered that Boost has accepted Boost JSON in 1.75.0.
It does stream parsing way more gracefully: https://www.boost.org/doc/libs/1_75_0/libs/json/doc/html/json/ref/boost__json__stream_parser.html#json.ref.boost__json__stream_parser.usage
It actually deals with trailing data as well!
stream_parser p; // construct a parser
std::size_t n; // number of characters used
n = p.write_some( "[1,2" ); // parse some of a JSON
assert( n == 4 ); // all characters consumed
n = p.write_some( ",3,4] null" ); // parse the remainder of the JSON
assert( n == 6 ); // only some characters consumed
assert( p.done() ); // we have a complete JSON
value jv = p.release(); // take ownership of the value
I would also submit that this could be a better match for a CompletionCondition: see https://www.boost.org/doc/libs/1_75_0/doc/html/boost_asio/reference/read/overload3.html
Here's an implementation that I tested with:
template <typename Buffer, typename SyncReadStream>
static size_t read_json(SyncReadStream& s, Buffer buf,
boost::json::value& message, boost::json::parse_options options = {})
{
boost::json::stream_parser p{{}, options};
size_t total_parsed = 0;
boost::asio::read(s, buf, [&](boost::system::error_code ec, size_t /*n*/) {
size_t parsed = 0;
for (auto& contiguous : buf.data()) {
parsed += p.write_some(
boost::asio::buffer_cast<char const*>(contiguous),
contiguous.size(), ec);
}
buf.consume(parsed);
total_parsed += parsed;
return ec || p.done(); // true means done
});
message = p.release(); // throws if incomplete
return total_parsed;
}
Adding a delegating overload for streambufs:
template <typename SyncReadStream, typename Alloc>
static size_t read_json(SyncReadStream& s,
boost::asio::basic_streambuf<Alloc>& buf,
boost::json::value& message,
boost::json::parse_options options = {})
{
return read_json(s, boost::asio::basic_streambuf_ref<Alloc>(buf), message, options);
}
Demo Program
This demo program adds the test-cases from earlier as well as a socket client with some benchmark statistics added. Arguments:
test to run the tests instead of the socket client
streambuf to use the streambuf overload instead of std::string dynamic buffer
comments to allow comments in the JSON
trailing_commas to allow trailing commas in the JSON
invalid_utf8 to allow invalid utf8 in the JSON
Live On Compiler Explorer¹
#include <boost/spirit/home/x3.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>
#include <iostream>
namespace x3 = boost::spirit::x3;
int main() {
std::string const s =
"? 8==2 : true ! false"
"? 9==3 : 'book' ! 'library'";
using expression = std::string;
using ternary = std::tuple<expression, expression, expression>;
std::vector<ternary> parsed;
auto expr_ = x3::lexeme [+~x3::char_("?:!")];
auto ternary_ = "?" >> expr_ >> ":" >> expr_ >> "!" >> expr_;
std::cout << "=== parser approach:\n";
if (x3::phrase_parse(begin(s), end(s), *x3::seek[ ternary_ ], x3::space, parsed)) {
for (auto [cond, e1, e2] : parsed) {
std::cout
<< " condition " << std::quoted(cond) << "\n"
<< " true expression " << std::quoted(e1) << "\n"
<< " else expression " << std::quoted(e2) << "\n"
<< "\n";
}
} else {
std::cout << "non matching" << '\n';
}
}
With test prints:
----- valid test cases
Testing {} -> Success {}
Testing {"a":4, "b":5} -> Success {"a":4,"b":5}
Testing [] -> Success []
Testing [4, "b"] -> Success [4,"b"]
----- incomplete test cases
Testing { -> (incomplete...)
Testing {"a":4, "b" -> (incomplete...)
Testing [ -> (incomplete...)
Testing [4, " -> (incomplete...)
----- invalid test cases
Testing } -> syntax error
Testing "a":4 } -> Success "a" -- remaining `:4 }`
Testing ] -> syntax error
----- excess input test cases
Testing {}{"a":4, "b":5} -> Success {} -- remaining `{"a":4, "b":5}`
Testing []["a", "b"] -> Success [] -- remaining `["a", "b"]`
Testing {} bogus trailing data -> Success {} -- remaining `bogus trailing data`
With socket client some demos:
Mean packet size: 16 in 2 packets
Request: 28 bytes
Request: {"a":4,"b":"5"} bytes
Remaining data: "bye
"
took 0.000124839s, ~0.213899MiB/s
With a large (448MiB) location_history.json:
Mean packet size: 511.999 in 917791 packets
Request: 469908167 bytes
(large request output suppressed)
took 3.30509s, ~135.59MiB/s
¹ linking non-header only liobraries is not supported on Compiler Explorer
TL/DR;
Seriously, just add framing to your wire protocol. E.g. even HTTP responses do this (e.g. via the content length headers, and maybe chunked encoding)
UPDATE:
Instead of handrolling you can go with Boost JSON as I added in another answer
The first approach is flawed, because you are using "async_read_until" yet treat the operation as if it were synchronous.
The second problem is, neither json::parse nor json::accept can report the location of a complete/broken parse. This means that you really do need framing in your wire protocol, because you CANNOT detect message boundaries.
The rest of this answer will first dive in to expose how the limitations of the nlohmann::json library make your task impossible¹.
So even though it's commendable for you to use an existing library, we look for alternatives.
Making It Work(?)
You could use the approach that Beast uses (http::read(s, buf, http::message<>). That is: have a reference to the entire buffer.
flat_buffer buf;
http::request<http::empty_body> m;
read(s, buf, m); // is a SyncStream like socket
Here, read is a composed operation over the message as well as the buffer. This makes it easy to check the completion criteria. In our case, let's make a reader that also serves as a match-condition:
template <typename DynamicBuffer_v1>
struct JsonReader {
DynamicBuffer_v1 _buf;
nlohmann::json message;
JsonReader(DynamicBuffer_v1 buf) : _buf(buf) {}
template <typename It>
auto operator()(It dummy, It) {
using namespace nlohmann;
auto f = buffers_begin(_buf.data());
auto l = buffers_end(_buf.data());
bool ok = json::accept(f, l);
if (ok) {
auto n = [&] {
std::istringstream iss(std::string(f, l));
message = json::parse(iss);
return iss.tellg(); // detect consumed
}();
_buf.consume(n);
assert(n);
std::advance(dummy, n);
return std::pair(dummy, ok);
} else {
return std::pair(dummy, ok);
}
}
};
namespace boost::asio {
template <typename T>
struct is_match_condition<JsonReader<T>> : public boost::true_type { };
}
This is peachy and works on the happy path. But you run into big trouble on edge/error cases:
you can't distinguish incomplete data from invalid data, so you MUST assume that unaccepted input is just incomplete (otherwise you would never wait for data to be complete)
you will wait until infinity for data to become "valid" if it's just invalid or
worse still: keep reading indefinitely, possibly running out of memory (unless you limit the buffer size; this could lead to a DoS)
perhaps worst of all, if you read more data than the single JSON message (which you can not in general prevent in the context of stream sockets), the original message will be rejected due to "excess input". Oops
Testing It
Here's the test cases that confirm the analysis conclusios predicted:
Live On Compiler Explorer
#include <boost/asio.hpp>
#include <nlohmann/json.hpp>
#include <iostream>
#include <iomanip>
template <typename Buffer>
struct JsonReader {
static_assert(boost::asio::is_dynamic_buffer_v1<Buffer>::value);
Buffer _buf;
nlohmann::json message;
JsonReader() = default;
JsonReader(Buffer buf) : _buf(buf) {}
template <typename It>
auto operator()(It dummy, It) {
using namespace nlohmann;
auto f = buffers_begin(_buf.data());
auto l = buffers_end(_buf.data());
bool ok = json::accept(f, l);
if (ok) {
auto n = [&] {
std::istringstream iss(std::string(f, l));
message = json::parse(iss);
return iss.tellg(); // detect consumed
}();
_buf.consume(n);
assert(n);
//std::advance(dummy, n);
return std::pair(dummy, ok);
} else {
return std::pair(dummy, ok);
}
}
};
namespace boost::asio {
template <typename T>
struct is_match_condition<JsonReader<T>> : public boost::true_type { };
}
static inline void run_tests() {
std::vector<std::string> valid {
R"({})",
R"({"a":4, "b":5})",
R"([])",
R"([4, "b"])",
},
incomplete {
R"({)",
R"({"a":4, "b")",
R"([)",
R"([4, ")",
},
invalid {
R"(})",
R"("a":4 })",
R"(])",
},
excess {
R"({}{"a":4, "b":5})",
R"([]["a", "b"])",
R"({} bogus trailing data)",
};
auto run_tests = [&](auto& cases) {
for (std::string buf : cases) {
std::cout << "Testing " << std::left << std::setw(22) << buf;
bool ok = JsonReader { boost::asio::dynamic_buffer(buf) }
(buf.begin(), buf.end())
.second;
std::cout << " -> " << std::boolalpha << ok << std::endl;
if (ok && !buf.empty()) {
std::cout << " -- remaining buffer " << std::quoted(buf) << "\n";
}
}
};
std::cout << " ----- valid test cases \n";
run_tests(valid);
std::cout << " ----- incomplete test cases \n";
run_tests(incomplete);
std::cout << " ----- invalid test cases \n";
run_tests(invalid);
std::cout << " ----- excess input test cases \n";
run_tests(excess);
}
template <typename SyncReadStream, typename Buffer>
static void read(SyncReadStream& s, Buffer bufarg, nlohmann::json& message) {
using boost::asio::buffers_begin;
using boost::asio::buffers_end;
JsonReader reader{bufarg};;
read_until(s, bufarg, reader);
message = reader.message;
}
int main() {
run_tests();
}
Prints
----- valid test cases
Testing {} -> true
Testing {"a":4, "b":5} -> true
Testing [] -> true
Testing [4, "b"] -> true
----- incomplete test cases
Testing { -> false
Testing {"a":4, "b" -> false
Testing [ -> false
Testing [4, " -> false
----- invalid test cases
Testing } -> false
Testing "a":4 } -> false
Testing ] -> false
----- excess input test cases
Testing {}{"a":4, "b":5} -> false
Testing []["a", "b"] -> false
Testing {} bogus trailing data -> false
Looking For Alternatives
You could roll your own as I did in the past:
Parse a substring as JSON using QJsonDocument
Or we can look at another library that DOES allow us to either detect boundaries of valid JSON fragments OR detect and leave trailing input.
Hand-Rolled Approach
Here's a simplistic translation to more modern Spirit X3 of that linked answer:
// Note: first iterator gets updated
// throws on known invalid input (like starting with `]' or '%')
template <typename It>
bool tryParseAsJson(It& f, It l)
{
try {
return detail::x3::parse(f, l, detail::json);
} catch (detail::x3::expectation_failure<It> const& ef) {
throw std::runtime_error("invalid JSON data");
}
}
The crucial point is that this *in addition to return true/false will update the start iterator according to how far it consumed the input.
namespace JsonDetect {
namespace detail {
namespace x3 = boost::spirit::x3;
static const x3::rule<struct value_> value{"value"};
static auto primitive_token
= x3::lexeme[ x3::lit("false") | "null" | "true" ];
static auto expect_value
= x3::rule<struct expect_value_> { "expect_value" }
// array, object, string, number or other primitive_token
= x3::expect[&(x3::char_("[{\"0-9.+-") | primitive_token | x3::eoi)]
>> value
;
// 2.4. Numbers
// Note our spirit grammar takes a shortcut, as the RFC specification is more restrictive:
//
// However non of the above affect any structure characters (:,{}[] and double quotes) so it doesn't
// matter for the current purpose. For full compliance, this remains TODO:
//
// Numeric values that cannot be represented as sequences of digits
// (such as Infinity and NaN) are not permitted.
// number = [ minus ] int [ frac ] [ exp ]
// decimal-point = %x2E ; .
// digit1-9 = %x31-39 ; 1-9
// e = %x65 / %x45 ; e E
// exp = e [ minus / plus ] 1*DIGIT
// frac = decimal-point 1*DIGIT
// int = zero / ( digit1-9 *DIGIT )
// minus = %x2D ; -
// plus = %x2B ; +
// zero = %x30 ; 0
static auto number = x3::double_; // shortcut :)
// 2.5 Strings
static const x3::uint_parser<uint32_t, 16, 4, 4> _4HEXDIG;
static auto char_ = ~x3::char_("\"\\") |
x3::char_(R"(\)") >> ( // \ (reverse solidus)
x3::char_(R"(")") | // " quotation mark U+0022
x3::char_(R"(\)") | // \ reverse solidus U+005C
x3::char_(R"(/)") | // / solidus U+002F
x3::char_(R"(b)") | // b backspace U+0008
x3::char_(R"(f)") | // f form feed U+000C
x3::char_(R"(n)") | // n line feed U+000A
x3::char_(R"(r)") | // r carriage return U+000D
x3::char_(R"(t)") | // t tab U+0009
x3::char_(R"(u)") >> _4HEXDIG ) // uXXXX U+XXXX
;
static auto string = x3::lexeme [ '"' >> *char_ >> '"' ];
// 2.2 objects
static auto member
= x3::expect [ &(x3::eoi | '"') ]
>> string
>> x3::expect [ x3::eoi | ':' ]
>> expect_value;
static auto object
= '{' >> ('}' | (member % ',') >> '}');
// 2.3 Arrays
static auto array
= '[' >> (']' | (expect_value % ',') >> ']');
// 2.1 values
static auto value_def = primitive_token | object | array | number | string;
BOOST_SPIRIT_DEFINE(value)
// entry point
static auto json = x3::skip(x3::space)[expect_value];
} // namespace detail
} // namespace JsonDetect
Obviously you put the implementation in a TU, but on Compiler Explorer we can't: Live On Compiler Explorer, using an adjusted JsonReader prints:
SeheX3Detector
==============
----- valid test cases
Testing {} -> true
Testing {"a":4, "b":5} -> true
Testing [] -> true
Testing [4, "b"] -> true
----- incomplete test cases
Testing { -> false
Testing {"a":4, "b" -> false
Testing [ -> false
Testing [4, " -> false
----- invalid test cases
Testing } -> invalid JSON data
Testing "a":4 } -> true -- remaining `:4 }`
Testing ] -> invalid JSON data
----- excess input test cases
Testing {}{"a":4, "b":5} -> true -- remaining `{"a":4, "b":5}`
Testing []["a", "b"] -> true -- remaining `["a", "b"]`
Testing {} bogus trailing data -> true -- remaining ` bogus trailing data`
NlohmannDetector
================
----- valid test cases
Testing {} -> true
Testing {"a":4, "b":5} -> true
Testing [] -> true
Testing [4, "b"] -> true
----- incomplete test cases
Testing { -> false
Testing {"a":4, "b" -> false
Testing [ -> false
Testing [4, " -> false
----- invalid test cases
Testing } -> false
Testing "a":4 } -> false
Testing ] -> false
----- excess input test cases
Testing {}{"a":4, "b":5} -> false
Testing []["a", "b"] -> false
Testing {} bogus trailing data -> false
Note how we now achieved some of the goals.
accepting trailing data - so we don't clobber any data after our message
failing early on some inputs that cannot possibly become valid JSON
However, we can't fix the problem of waiting indefinitely on /possibly/ incomplete valid data
Interestingly, one of our "invalid" test cases was wrong (!). (It is always a good sign when test cases fail). This is because "a" is actually a valid JSON value on its own.
Conclusion
In the general case it is impossible to make such a "complete message" detection work without at least limiting buffer size. E.g. a valid input could start with a million spaces. You don't want to wait for that.
Also, a valid input could open a string, object or array², and not terminate that within a few gigabytes. If you stop parsing before hand you'll never know whether it was ultimately a valid message.
Though you'll inevitably have to deal with network timeout anyways you will prefer to be proactive about knowing what to expect. E.g. send the size of the payload ahead of time, so you can use boost::asio::transfer_exactly and validate precisely what you expected to get.
¹ practically. If you don't care about performance, you could iteratively run accept on increasing lengths of buffer
² god forbid, a number like 0000....00001 though that's subject to parser implementation differences
I have a problem with the if statment.
I have a program with commandline-arguments and utf8 (€ - Symbol).
The error is in works_not in the if statement.
class EURO
insert ARGUMENTS
create {ANY}
make
feature {ANY}
make
do
works_not
works
end
works_not
local ok: BOOLEAN
do
print ("%N%NAnzahl Argumente : " + argument_count.to_string + "%N")
print ("%NArgument -> Programmname : " + argument(0))
print ("%NArgument -> Wert : " + argument(1))
print ("%NArgument -> Währung : " + argument(2) + "%N")
ok := argument(2) = "€"
print ("%NArgument(2) ist Euro ? " + ok.to_string + "%N%N")
print ("don't work")
io.put_new_line
if argument(2) = "€" then
euro_in_dm(argument(1).to_real)
else
dm_in_euro(argument(1).to_real)
end
end
works
do
print ("works ")
io.put_new_line
if argument_count /= 2 then
print("%N%N Error (1) %N%N")
else
inspect
argument(2)
when "€" then
euro_in_dm(argument(1).to_real)
when "DM","dm" then
dm_in_euro(argument(1).to_real)
else
print("%N%N Error (2) %N%N")
end
end
end
feature
euro_in_dm (a: REAL)
do
io.put_string("%N Euro -> DM ")
io.put_real(a * 1.95583)
io.put_string("%N%N")
end
dm_in_euro (a: REAL)
do
io.put_string("%N DM -> Euro ")
io.put_real(a / 1.95583)
io.put_string("%N%N")
end
end
The issue is in the comparison operator argument(2) = "€".
In Eiffel strings have a reference type, so the equality operator = compares references to the string objects, not their contents. If you want to compare the string values instead, you need to use an operator ~ that internally calls is_equal after checking that types of both operands are exactly the same, or a more robust version same_string (provided that it is available in your version of the environment). To summarize, you can try one of the following instead of the equality:
argument(2).same_string ("€")
argument(2) ~ "€"
argument(2).is_equal ("€")