Using Boost Xpressive (static expression) , I noticed that pattern searching is much slower when the expression is built from sub regexpression.
Did I miss something ? or is it inherent with the design ? Xpresive docs says
https://www.boost.org/doc/libs/1_80_0/doc/html/xpressive/user_s_guide.html#boost_xpressive.user_s_guide.grammars_and_nested_matches.embedding_a_regex_by_value
it is as if the regex were embedded by value; that is, a copy of the nested regex is stored by the enclosing regex. The inner regex is invoked by the outer regex during pattern matching. The inner regex participates fully in the match, back-tracking as needed to make the match succeed.
Consider these 2 ways of defining a regexp matching an uri (probably sub-optimal and not 100%, but the point is not on this topic).
If the expression is defined in one go, execution is around 6x faster than if the same regex is built from 3 sub regex.
Consider this code snippet
#include <iostream>
#include <string>
#include <chrono>
#include <boost/xpressive/xpressive.hpp>
using namespace boost::xpressive;
void bench_regex(const sregex& regex)
{
std::string positive = "asdas http://www.foo.io/bar asdp https://www.bar.io ";
std::string negative = "sdaoas dof jdfjo fds dsf http:/www.nonono .sa ";
const int nb_iterations = 100'000;
int nmatch = 0;
smatch what;
std::chrono::steady_clock::time_point begin0 = std::chrono::steady_clock::now();
for (int i = 0 ; i < nb_iterations; ++i)
{
if (regex_search( positive, what, regex ))
nmatch++;
if (regex_search( negative, what, regex ))
nmatch++;
}
std::chrono::steady_clock::time_point end0 = std::chrono::steady_clock::now();
std::cout << "nb matchs " << nmatch << std::endl;
std::cout << "search time " << std::chrono::duration_cast<std::chrono::microseconds>(end0-begin0).count()/1000.0f <<"ms" << std::endl << std::endl;
}
int main()
{
{
std::cout << "regex in one piece" << std::endl;
const sregex regex_uri_standalone = alpha >> *alnum >> "://" >> + ~(set= ' ','/') >> !( *('/' >> ~(set=' ')));
bench_regex(regex_uri_standalone);
}
{
std::cout << "regex built from part" << std::endl;
const sregex scheme = alpha >> *alnum;
const sregex hostname = + ~(set= ' ','/');
const sregex path = !( *('/' >> ~(set=' ')));
const sregex regex_uri_built_from_subregex = scheme >> "://" >> hostname >> path;
bench_regex(regex_uri_built_from_subregex);
}
}
This is particularly annoying because a main force of Xpressive is the ability to construct complex regexp from simplier one, which can be quickly become a nightmare if using pcre or equivalent.
But if it comes with such a performance cost, the benefit looks annihilated.
btw, is the library still maintained ? according to boost changelog, no change since boost 1.55 (11Nov 2013 !)
https://www.boost.org/users/history/
No you can't get around the function invocation / type erasure overhead of sregex instance, because the expression template has already been compiled.
What you can do instead, is use deduced type for the sub-expressions:
using boost::proto::deep_copy;
std::cout << "regex built from auto" << std::endl;
const auto scheme = deep_copy(xp::alpha >> *xp::alnum);
const auto hostname = deep_copy(+~(xp::set = ' ', '/'));
const auto path = deep_copy(!(*('/' >> ~(xp::set = ' '))));
Be aware that the deep_copy is absolutely necessary to avoid dangling references to temporaries since you're naming the expressions now. The good news is that on my system, the result is slightly faster than before:
Live On Coliru
Printing
regex in one piece nb matchs 100000 search time 180.01ms
regex built from auto nb matchs 100000 search time 170.172ms
But Let's Speed It Up
There's Boost Spirit, which has a very similar parser expression language. I think it's just simply more modern. Let's try and compare!
Live On Coliru
#include <chrono>
#include <iostream>
#include <string>
double do_xp_test();
double do_x3_test();
int main() {
auto xp = do_xp_test();
auto x3 = do_x3_test();
std::cout << "x3 took ~" << (x3/xp*100.0) << "% of the xpressive time\n";
}
auto now = std::chrono::steady_clock::now;
using namespace std::chrono_literals;
static std::string const positive = "asdas http://www.foo.io/bar asdp https://www.bar.io ";
static std::string const negative = "sdaoas dof jdfjo fds dsf http:/www.nonono .sa ";
constexpr int nb_iterations = 100'000;
#include <boost/xpressive/xpressive.hpp>
namespace xp = boost::xpressive;
double bench_regex(const xp::sregex& regex) {
unsigned nmatch = 0;
xp::smatch what;
auto begin0 = now();
for (int i = 0; i < nb_iterations; ++i) {
if (regex_search(positive, what, regex))
nmatch++;
if (regex_search(negative, what, regex))
nmatch++;
}
auto elapsed = (now() - begin0) / 1.0ms;
std::cout << "nb matchs " << nmatch << "\telapsed " << elapsed << "ms\n";
return elapsed;
}
double do_xp_test() {
using boost::proto::deep_copy;
std::cout << "regex built from auto\t";
const auto scheme = deep_copy(xp::alpha >> *xp::alnum);
const auto hostname = deep_copy(+~(xp::set = ' ', '/'));
const auto path = deep_copy(!(*('/' >> ~(xp::set = ' '))));
const xp::sregex regex_uri_built_from_subregex =
scheme >> "://" >> hostname >> path;
return bench_regex(regex_uri_built_from_subregex);
}
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
double bench_x3(auto parser_expression) {
auto const search_expr = x3::seek[x3::raw[parser_expression]];
[[maybe_unused]] std::string what;
auto begin0 = now();
unsigned nmatch = 0;
for (int i = 0; i < nb_iterations; ++i) {
if (parse(begin(positive), end(positive), search_expr/*, what*/))
nmatch++;
if (parse(begin(negative), end(negative), search_expr/*, what*/))
nmatch++;
}
auto elapsed = (now() - begin0) / 1.0ms;
std::cout << "nb matchs " << nmatch << "\telapsed " << elapsed << "ms\n";
return elapsed;
}
double do_x3_test() {
std::cout << "spirit x3 from subs\t";
const auto scheme = x3::alpha >> *x3::alnum;
const auto hostname = +~x3::char_(" /");
const auto path = *('/' >> +~x3::char_(' '));
const auto uri_built_from_subs = scheme >> "://" >> hostname >> path;
return bench_x3(uri_built_from_subs);
}
Prints
regex built from auto nb matchs 100000 elapsed 156.939ms
spirit x3 from subs nb matchs 100000 elapsed 93.3622ms
x3 took ~59.4897% of the xpressive time
Or on my system,
Related: Boost URL
For actual handling of URLs (not detection) I suggest using the newly accepted Boost URL library.
I want to read a chunk of data which is just one frame of many frames stored in one dataset. The shape of the whole dataset is (10, 11214,3), 10 frames each frame has 11214 rows and 4 columns. Here is the file. The chunk I want to read would have the shape (11214,3). I can print the predefined array using, but I'm not sure how can I read data from a hdf5 file. Here is my code,
#include <h5xx/h5xx.hpp>
#include <boost/multi_array.hpp>
#include <iostream>
#include <vector>
#include <cstdio>
typedef boost::multi_array<int, 2> array_2d_t;
const int NI=10;
const int NJ=NI;
void print_array(array_2d_t const& array)
{
for (unsigned int j = 0; j < array.shape()[1]; j++)
{
for (unsigned int i = 0; i < array.shape()[0]; i++)
{
printf("%2d ", array[j][i]);
}
printf("\n");
}
}
void write_int_data(std::string const& filename, array_2d_t const& array)
{
h5xx::file file(filename, h5xx::file::trunc);
std::string name;
{
// --- create dataset and fill it with the default array data (positive values)
name = "integer array";
h5xx::create_dataset(file, name, array);
h5xx::write_dataset(file, name, array);
// --- create a slice object (aka hyperslab) to specify the location in the dataset to be overwritten
std::vector<int> offset; int offset_raw[2] = {4,4}; offset.assign(offset_raw, offset_raw + 2);
std::vector<int> count; int count_raw[2] = {2,2}; count.assign(count_raw, count_raw + 2);
h5xx::slice slice(offset, count);
}
}
void read_int_data(std::string const& filename)
{
h5xx::file file(filename, h5xx::file::in);
std::string name = "integer array";
// read and print the full dataset
{
array_2d_t array;
// --- read the complete dataset into array, the array is resized and overwritten internally
h5xx::read_dataset(file, name, array);
printf("original integer array read from file, negative number patch was written using a slice\n");
print_array(array);
printf("\n");
}
}
int main(int argc, char** argv)
{
std::string filename = argv[0];
filename.append(".h5");
// --- do a few demos/tests using integers
{
array_2d_t array(boost::extents[NJ][NI]);
{
const int nelem = NI*NJ;
int data[nelem];
for (int i = 0; i < nelem; i++)
data[i] = i;
array.assign(data, data + nelem);
}
write_int_data(filename, array);
read_int_data(filename);
}
return 0;
}
I'm using the h5xx — a template-based C++ wrapper for the HDF5 library link and boost library.
The datasets are stored in particles/lipids/box/positions path. The dataset name value holds the frames.
argv[0] is not what you want (arguments start at 1, 0 is the program name). Consider bounds checking as well:
std::vector<std::string> const args(argv, argv + argc);
std::string const filename = args.at(1) + ".h5";
the initialization can be done directly, without a temporary array (what is multi_array for, otherwise?)
for (size_t i = 0; i < array.num_elements(); i++)
array.data()[i] = i;
Or indeed, make it an algorithm:
std::iota(array.data(), array.data() + array.num_elements(), 0);
same with vectors:
std::vector<int> offset; int offset_raw[2] = {4,4}; offset.assign(offset_raw, offset_raw + 2);
std::vector<int> count; int count_raw[2] = {2,2}; count.assign(count_raw, count_raw + 2);
besides being a formatting mess can be simply
std::vector offset{4,4}, count{2,2};
h5xx::slice slice(offset, count);
On To The Real Question
The code has no relevance to the file. At all. I created some debug/tracing code to dump the file contents:
void dump(h5xx::group const& g, std::string indent = "") {
auto dd = g.datasets();
auto gg = g.groups();
for (auto it = dd.begin(); it != dd.end(); ++it) {
std::cout << indent << " ds:" << it.get_name() << "\n";
}
for (auto it = gg.begin(); it != gg.end(); ++it) {
dump(*it, indent + "/" + it.get_name());
}
}
int main()
{
h5xx::file xaa("xaa.h5", h5xx::file::mode::in);
dump(xaa);
}
Prints
/particles/lipids/box/edges ds:box_size
/particles/lipids/box/edges ds:step
/particles/lipids/box/edges ds:time
/particles/lipids/box/edges ds:value
/particles/lipids/box/positions ds:step
/particles/lipids/box/positions ds:time
/particles/lipids/box/positions ds:value
Now we can drill down to the dataset. Let's see whether we can figure out the correct type. It certainly is NOT array_2d_t:
h5xx::dataset ds(xaa, "particles/lipids/box/positions/value");
array_2d_t a;
h5xx::datatype detect(a);
std::cout << "type: " << std::hex << ds.get_type() << std::dec << "\n";
std::cout << "detect: " << std::hex << detect.get_type_id() << std::dec << "\n";
Prints
type: 30000000000013b
detect: 30000000000000c
That's a type mismatch. I guess I'll have to learn to read that gibberish as well...
Let's add some diagnostics:
void diag_type(hid_t type)
{
std::cout << " Class " << ::H5Tget_class(type) << std::endl;
std::cout << " Size " << ::H5Tget_size(type) << std::endl;
std::cout << " Sign " << ::H5Tget_sign(type) << std::endl;
std::cout << " Order " << ::H5Tget_order(type) << std::endl;
std::cout << " Precision " << ::H5Tget_precision(type) << std::endl;
std::cout << " NDims " << ::H5Tget_array_ndims(type) << std::endl;
std::cout << " NMembers " << ::H5Tget_nmembers(type) << std::endl;
}
int main()
{
h5xx::file xaa("xaa.h5", h5xx::file::mode::in);
// dump(xaa);
{
h5xx::group g(xaa, "particles/lipids/box/positions");
h5xx::dataset ds(g, "value");
std::cout << "dataset: " << std::hex << ds.get_type() << std::dec << std::endl;
diag_type(ds.get_type());
}
{
array_2d_t a(boost::extents[NJ][NI]);
h5xx::datatype detect(a);
std::cout << "detect: " << std::hex << detect.get_type_id() << std::dec << std::endl;
diag_type(detect.get_type_id());
}
}
Prints
dataset: 30000000000013b
Class 1
Size 4
Sign -1
Order 0
Precision 32
NDims -1
NMembers -1
detect: 30000000000000c
Class 0
Size 4
Sign 1
Order 0
Precision 32
NDims -1
NMembers -1
At least we know that HST_FLOAT (class 1) is required. Let's modify array_2d_t:
using array_2d_t = boost::multi_array<float, 2>;
array_2d_t a(boost::extents[11214][3]);
This at least makes the data appear similarly. Let's ... naively try to read:
h5xx::read_dataset(ds, a);
Oops, that predictably throws
terminate called after throwing an instance of 'h5xx::error'
what(): /home/sehe/Projects/stackoverflow/deps/h5xx/h5xx/dataset/boost_multi_array.hpp:176:read_dataset(): dataset "/particles/lipi
ds/box/positions/value" and target array have mismatching dimensions
No worries, we can guess:
using array_3d_t = boost::multi_array<float, 3>;
array_3d_t a(boost::extents[10][11214][3]);
h5xx::read_dataset(ds, a);
At least this does work. Adapting the print function:
template <typename T> void print_array(T const& array) {
for (auto const& row : array) {
for (auto v : row) printf("%5f ", v);
printf("\n");
}
}
Now we can print the first frame:
h5xx::read_dataset(ds, a);
print_array(*a.begin()); // print the first frame
This prints:
80.480003 35.360001 4.250000
37.450001 3.920000 3.960000
18.530001 -9.690000 4.680000
55.389999 74.339996 4.600000
22.110001 68.709999 3.850000
-4.130000 24.040001 3.730000
40.160000 6.390000 4.730000
-5.400000 35.730000 4.850000
36.669998 22.450001 4.080000
-3.680000 -10.660000 4.180000
(...)
That checks out with h5ls -r -d xaa.h5/particles/lipids/box/positions/value:
particles/lipids/box/positions/value Dataset {75/Inf, 11214, 3}
Data:
(0,0,0) 80.48, 35.36, 4.25, 37.45, 3.92, 3.96, 18.53, -9.69, 4.68,
(0,3,0) 55.39, 74.34, 4.6, 22.11, 68.71, 3.85, -4.13, 24.04, 3.73,
(0,6,0) 40.16, 6.39, 4.73, -5.4, 35.73, 4.85, 36.67, 22.45, 4.08, -3.68,
(0,9,1) -10.66, 4.18, 35.95, 36.43, 5.15, 57.17, 3.88, 5.08, -23.64,
(0,12,1) 50.44, 4.32, 6.78, 8.24, 4.36, 21.34, 50.63, 5.21, 16.29,
(0,15,1) -1.34, 5.28, 22.26, 71.25, 5.4, 19.76, 10.38, 5.34, 78.62,
(0,18,1) 11.13, 5.69, 22.14, 59.7, 4.92, 15.65, 47.28, 5.22, 82.41,
(0,21,1) 2.09, 5.24, 16.87, -11.68, 5.35, 15.54, -0.63, 5.2, 81.25,
(...)
The Home Stretch: Adding The Slice
array_2d_t read_frame(int frame_no) {
h5xx::file xaa("xaa.h5", h5xx::file::mode::in);
h5xx::group g(xaa, "particles/lipids/box/positions");
h5xx::dataset ds(g, "value");
array_2d_t a(boost::extents[11214][3]);
std::vector offsets{frame_no, 0, 0}, counts{1, 11214, 3};
h5xx::slice slice(offsets, counts);
h5xx::read_dataset(ds, a, slice);
return a;
}
There you have it. Now we can print any frame:
print_array(read_frame(0));
Printing the same as before. Let's try the last frame:
print_array(read_frame(9));
Prints
79.040001 36.349998 3.990000
37.250000 3.470000 4.140000
18.600000 -9.270000 4.900000
55.669998 75.070000 5.370000
21.920000 67.709999 3.790000
-4.670000 24.770000 3.690000
40.000000 6.060000 5.240000
-5.340000 36.320000 5.410000
36.369999 22.490000 4.130000
-3.520000 -10.430000 4.280000
(...)
Checking again with h5ls -r -d xaa.h5/particles/lipids/box/positions/value |& grep '(9' | head confirms:
(9,0,0) 79.04, 36.35, 3.99, 37.25, 3.47, 4.14, 18.6, -9.27, 4.9, 55.67,
(9,3,1) 75.07, 5.37, 21.92, 67.71, 3.79, -4.67, 24.77, 3.69, 40, 6.06,
(9,6,2) 5.24, -5.34, 36.32, 5.41, 36.37, 22.49, 4.13, -3.52, -10.43,
(9,9,2) 4.28, 35.8, 36.43, 4.99, 56.6, 4.09, 5.04, -23.37, 49.42, 3.81,
(9,13,0) 6.31, 8.83, 4.56, 22.01, 50.38, 5.43, 16.3, -2.92, 5.4, 22.02,
(9,16,1) 70.09, 5.36, 20.23, 11.12, 5.66, 78.48, 11.34, 6.09, 20.26,
(9,19,1) 61.45, 5.35, 14.25, 48.32, 5.35, 79.95, 1.71, 5.38, 17.56,
(9,22,1) -11.61, 5.39, 15.64, -0.19, 5.06, 80.43, 71.77, 5.29, 75.54,
(9,25,1) 35.14, 5.26, 22.45, 56.86, 5.56, 16.47, 52.97, 6.16, 20.62,
(9,28,1) 65.12, 5.26, 19.68, 71.2, 5.52, 23.39, 49.84, 5.28, 22.7,
Full Listing
#include <boost/multi_array.hpp>
#include <h5xx/h5xx.hpp>
#include <iostream>
using array_2d_t = boost::multi_array<float, 2>;
template <typename T> void print_array(T const& array)
{
for (auto const& row : array) { for (auto v : row)
printf("%5f ", v);
printf("\n");
}
}
void dump(h5xx::group const& g, std::string indent = "") {
auto dd = g.datasets();
auto gg = g.groups();
for (auto it = dd.begin(); it != dd.end(); ++it) {
std::cout << indent << " ds:" << it.get_name() << std::endl;
}
for (auto it = gg.begin(); it != gg.end(); ++it) {
dump(*it, indent + "/" + it.get_name());
}
}
array_2d_t read_frame(int frame_no) {
h5xx::file xaa("xaa.h5", h5xx::file::mode::in);
h5xx::group g(xaa, "particles/lipids/box/positions");
h5xx::dataset ds(g, "value");
array_2d_t arr(boost::extents[11214][3]);
std::vector offsets{frame_no, 0, 0}, counts{1, 11214, 3};
h5xx::slice slice(offsets, counts);
h5xx::read_dataset(ds, arr, slice);
return arr;
}
int main()
{
print_array(read_frame(9));
}