Почитать про boost::spirit::x3.
x3::rule<ID, Attribute>
std::string
и std::vector<char>
)
int_
, short_
, long_
, int_(42)
, … — парсеры целых чисел
double_
, float_
, double_(42.2)
, … — парсеры действительных чисел
bool_
, true_
, false_
— булевы парсеры
lit("abc")
, char_
, char_("A-Za-z")
, … — литеральные парсеры (точное соответствие строке)
alnum
, blank
, space
, lower
, … — классификаторы
parse
— разбор выражения
phrase_parse
— более "тонкий" разбор выражения
namespace x3 = boost::spirit::x3; std::string_view text{"123546"}; bool parsed = x3::parse(text.begin(), text.end(), x3::int_);
Description | PEG | Spirit X3 | Example |
---|---|---|---|
Sequence |
a b |
a >> b |
|
Alternative |
a | b |
a | b |
|
Zero or more (Kleene) |
a* |
*a |
|
One or more (Plus) |
a+ |
+a |
|
Optional |
a? |
-a |
|
And-predicate |
&a |
&a |
|
Not-predicate |
!a |
~a |
|
Difference |
a - b |
|
|
Expectation |
a > b |
|
|
List |
a % b |
|
on_error
)
on_success
)
#include <iostream> #include <string> #include <boost/spirit/home/x3.hpp> int main() { namespace x3 = boost::spirit::x3; std::string_view text{"123 45.6"}; bool parsed = x3::parse(text.begin(), text.end(), x3::int_ >> ' ' >> x3::double_); std::cout << "Parsed value is " << std::boolalpha << parsed << "\n"; }
Parsed value is true
#include <iostream> #include <string> #include <boost/spirit/home/x3.hpp> int main() { namespace x3 = boost::spirit::x3; std::string_view text{R"(foo: bar, gorp : smart , falcou : "crazy frenchman", name:sam)"}; auto name = x3::rule<class name, std::string>{} // явное прописывание тэга и атрибута = x3::alpha >> *x3::alnum; // переменные в C-стиле auto quote = x3::lexeme['"' >> *~x3::char_('"') >> '"']; // строковые значения в кавычках auto it = text.begin(); bool parsed = x3::phrase_parse(it, text.end(), (name >> ':' >> (quote | name)) % ',', x3::space); std::cout << "Is parsed: " << std::boolalpha << parsed << "\n" << "Position: " << std::distance(text.begin(), it) << " == " << text.size() << "\n"; }
Is parsed: true
Position: 85 == 85
auto name = x3::rule<class name, std::string>{} // явное прописывание тэга и атрибута = x3::alpha >> *x3::alnum; // переменные в C-стиле auto quote = x3::lexeme['"' >> *~x3::char_('"') >> '"']; // строковые значения в кавычках
int_
, double_
, …) имеют примитивные по типу атрибуты (int
, double
, …)
x3::rule<ID, A>
имеют атрибут A
Оператор | Его синтезируемый атрибут |
---|---|
|
|
|
|
|
|
|
|
|
нет атрибута |
|
|
#include <iostream> #include <string> #include <boost/spirit/home/x3.hpp> #include <boost/fusion/adapted/std_pair.hpp> int main() { namespace x3 = boost::spirit::x3; std::string_view text1{"pizza"}; std::string result; // совместимо с атрибутом std::vector<char> x3::parse(text1.begin(), text1.end(), *x3::char_, result); std::cout << "Result: " << result << "\n"; std::string_view text2{"cosmic pizza"}; std::pair<std::string, std::string> presult; x3::parse(text2.begin(), text2.end(), *~x3::char_(' ') >> ' ' >> *x3::char_, presult); std::cout << "Result of pair: " << presult.first << ", " << presult.second << "\n"; }
Result: pizza
Result of pair: cosmic, pizza
std::string_view text{R"(foo: bar, gorp : smart , falcou : "crazy frenchman", name:sam)"}; auto name = x3::alpha >> *x3::alnum; auto qoute = '"' >> x3::lexeme[*(~x3::char_('"'))] >> '"'; auto item = x3::rule<class item, std::pair<std::string, std::string>>>{} = name >> ':' >> (quote | name); std::map<std::string, std::string> dict; x3::phrase_parse(text.begin(), text.end(), item % ',', x3::space, dict);
a: char, b: vector<char> → (a >> b): tuple<char, vector<char>> → vector<char> → string
a: unused, b: vector<char>, c: unused → (a >> b >> c): vector<char> → string
a: string, b: string → (a | b): variant<string, string> → string
a: string, b: unused, c: string → (a >> b >> c): tuple<string, string>
a: pair<string, string>, b: unused → (a % b): vector<pair<string, string>> → map<string, string>
struct my_type { ... }; struct my_rule_class; const x3::rule<my_rule_class, my_type> my_rule_type = "my_rule"; const auto my_rule_def = x3::lexeme[(x3::alpha | '_') >> *(x3::alnum | '_')]; BOOST_SPIRIT_DEFINE(my_rule)
boost::forward_ast
>
"abc" > x3::attr(10)
x3
??
![]() Рисунок 1. Структура
boost::spirit |
![]() Рисунок 1. Разница между qi и karma
|
csv
/* An example of CSV: * kind,of,header * abc,with space,"quote" * "comma , inside",132, spaces dot * * CSV (comma separated value) EBNF appr. specification (http://www.rfc-editor.org/rfc/rfc4180.txt) * string := [^,\n]+ * qstring := " [^"]* " * cell := qstring | string * row := cell (, cell)* \n * csv := row+ */
x3::phrase_parse
template<class Container> auto parse(const Container& cnt) { types::csv::csv result; auto it = cnt.begin(); if (!x3::phrase_parse(it, cnt.end(), csv, x3::ascii::space, result)) throw std::runtime_error("Invalid input data"); if (it != cnt.end()) throw std::out_of_range("Parsing is not complete"); return result; }
std::string
std::vector<std::string>
std::vector<std::vector<std::string>>
/* * kind,of,header * abc,with space,"quote" * "comma , inside",132, spaces dot */ namespace types::csv { using csv = std::vector<std::vector<std::string>>; }
x3::rule
/* EBNF: * string := [^,\n]+ * qstring := " [^"]* " * cell := qstring | string * row := cell (, cell)* \n * csv := row+ */ const auto string = x3::lexeme[+~x3::char_(",\n")]; const auto qstring = x3::lexeme['"' >> *(('\\' >> x3::char_) | ~x3::char_("\"")) >> '"']; const auto cell = qstring | string; const auto row = x3::rule<class row, std::vector<std::string>>{} = (cell % ',') >> x3::no_skip[x3::eol]; const auto csv = x3::rule<class csv, types::csv::csv>{} = +row;
#include <iostream> #include <stdexcept> #include <string> #include <vector> #include <boost/spirit/home/x3.hpp> #include <boost/range/adaptor/indexed.hpp> /* An example of CSV: * kind,of,header * abc,with space,"quote" * "comma , inside",132, spaces dot * * CSV (comma separated value) EBNF appr. specification (http://www.rfc-editor.org/rfc/rfc4180.txt) * string := [^,\n]+ * qstring := " [^"]* " * cell := qstring | string * row := cell (, cell)* \n * csv := row+ */ namespace types::csv { using csv = std::vector<std::vector<std::string>>; } namespace parser::csv { namespace x3 = boost::spirit::x3; const auto string = x3::lexeme[+~x3::char_(",\n")]; const auto qstring = x3::lexeme['"' >> *(('\\' >> x3::char_) | ~x3::char_("\"")) >> '"']; const auto cell = qstring | string; const auto row = x3::rule<class row, std::vector<std::string>>{} = (cell % ',') >> x3::no_skip[x3::eol]; const auto csv = x3::rule<class csv, types::csv::csv>{} = +row; template<class Container> auto parse(const Container& cnt) { types::csv::csv result; auto it = cnt.begin(); if (!x3::phrase_parse(it, cnt.end(), csv, x3::ascii::space, result)) throw std::runtime_error("Invalid input data"); if (it != cnt.end()) throw std::out_of_range("Parsing is not complete"); return result; } } int main() { std::string csv_data = R"( kind,of,header abc,with space,"quote" "comma , inside",132, spaces dot new text, quote \"escaped, " similar\" " empty,"","" )"; auto result = parser::csv::parse(csv_data); for (const auto& row : result | boost::adaptors::indexed()) { std::cout << "Row #" << row.index() << ":"; for (const auto& cell : row.value() | boost::adaptors::indexed()) std::cout << " F#" << cell.index() << ": [" << cell.value() << "]"; std::cout << "\n"; } }
Row #0: F#0: [kind] F#1: [of] F#2: [header]
Row #1: F#0: [abc] F#1: [with space] F#2: [quote]
Row #2: F#0: [comma , inside] F#1: [132] F#2: [spaces dot]
Row #3: F#0: [new text] F#1: [quote \"escaped] F#2: [ similar" ]
Row #4: F#0: [empty] F#1: [] F#2: []
\n
, а не \r\n
\
для экранирования кавычек, а не парные двойные кавычки
empty,,
должно выдавать 3 ячейки