Adobe Source Libraries 1.49.0
A collection of C++ libraries.
|
A relatively lightweight and simple xml (subset) parser. More...
#include <adobe/xml_parser.hpp>
Public Types | |
using | callback_proc_t |
using | preorder_predicate_t |
using | token_type |
Public Member Functions | |
xml_parser_t (uchar_ptr_t first, uchar_ptr_t last, const line_position_t &position, preorder_predicate_t predicate, callback_proc_t callback, O output) | |
xml_parser_t (const xml_parser_t &rhs) | |
xml_parser_t & | operator= (const xml_parser_t &rhs) |
virtual | ~xml_parser_t () |
const line_position_t & | next_position () |
void | set_preorder_predicate (preorder_predicate_t pred) |
void | parse_element_sequence () |
void | parse_content () |
void | parse_document () |
Protected Member Functions | |
const token_type & | get_token () |
void | putback () |
bool | is_token (xml_lex_token_set_t name, token_range_t &value) |
bool | is_token (xml_lex_token_set_t name) |
void | require_token (xml_lex_token_set_t name, token_range_t &value) |
void | require_token (xml_lex_token_set_t name) |
void | throw_exception (const char *error_string) |
void | throw_exception (xml_lex_token_set_t found, xml_lex_token_set_t expected) |
bool | is_element (token_range_t &element) |
bool | is_content (token_range_t &element) |
bool | is_e_tag (token_range_t &name, token_range_t &close_tag) |
bool | is_attribute_set (attribute_set_t &attribute_set) |
bool | is_attribute (token_range_t &name, token_range_t &value) |
bool | is_prolog () |
bool | is_bom (token_range_t &bom) |
bool | is_xml_decl (token_range_t &xml_decl) |
void | content_callback (token_range_t &result_element, const token_range_t &old_element, const token_range_t &start_tag, const attribute_set_t attribute_set, const token_range_t &content, bool preorder_parent) |
Protected Attributes | |
preorder_predicate_t | pred_m |
callback_proc_t | callback_m |
O | output_m |
Related Symbols | |
(Note that these are not member symbols.) | |
template<typename O> | |
xml_parser_t< O > | make_xml_parser (uchar_ptr_t first, uchar_ptr_t last, const line_position_t &position, typename xml_parser_t< O >::preorder_predicate_t predicate, typename xml_parser_t< O >::callback_proc_t callback, O output) |
Create an object that will parse the indicated content range using the preorder and content functions indicated. |
adobe::make_xml_parser(start_of_xml_document, end_of_xml_document, line_position_t("sample document"), my_preorder_predicate, my_content_callback, my_output_iterator).parse_document();Note that in this usage, the parser is never even stored in a local variable. Instead, the result of make_xml_parser is immediately told to parse the document. This is a very common coding pattern for creating and using xml_parser_t.
<parent id="node1"> <simple-child id="node2"/> <complex-child id="node3"> <grandchild id="node4"/> </complex-child> </parent>
static const token_range_t target_tag_k( static_token_range("replace-me") ); token_range_t lookup_replacement_text(const token_range_t&); bool my_preorder_predicate(const token_range_t& tag_name) { return token_range_equal(tag_name, target_tag_k); } token_range_t my_content_callback( const token_range_t& /* entire_element_range */, const token_range_t& /* name */, const attribute_set_t& attribute_set, const token_range_t& value) { static const token_range_t id_attr_k( static_token_range("id") ); const token_range_t id( attribute_set[id_addr_k] ); if (0 == adobe::token_range_size(id)) { throw std::runtime_error("replace-me tags require an id attribute"); } return lookup_replacement_text(id); }With this application (and appropriate pre-population of a replacement dictionary), xml input data like this
Dear <replace-me id="their-name"/>, Thank you for your recent letter of <replace-me id="date"/>. Yadda Yadda Yadda. Sincerely, <replace-me id="my-name"/>might come out looking like this
Dear Mr. Smith, Thank you for your recent letter of 17 June. Yadda Yadda Yadda. Sincerely, John Q. Public
std::string perform_markup_replacement(const std::string& input) { std::string result; make_xml_parser( input.begin(), input.end(), line_position_t("markup replacement string"), my_preorder_predicate, my_content_callback, std::back_inserter(result)).parse_content(); return result; }
<canvas> <rect sides="0 0 100 100"/> <circle center="5 5" radius="10"/> <polygon vertices="1 2 6 8 1 8"/> </canvas>Assuming that the parser's preorder predicate can be convinced to return true for all tags in the document (we'll do this later), this document can be parsed with a simple content callback.
token_range_t my_content_callback( const token_range_t& /* entire_element_range */, const token_range_t& name, const attribute_set_t& attribute_set, const token_range_t& value, graphic_context_t& graphics) { static const token_range_t canvas_tag_k( static_token_range("canvas") ); static const token_range_t rect_tag_k( static_token_range("rect") ); static const token_range_t circle_tag_k( static_token_range("circle") ); static const token_range_t polygon_tag_k( static_token_range("polygon") ); if (token_range_equal(canvas_tag_k, name)) { make_xml_document(value.first, value.second, line_position_t("canvas"), adobe::always_true<token_range_t>(), std::bind(my_simple_content_callback, _1, _2, _3, _4, std::ref(graphics)), adobe::null_output_t()).parse_content(); } else if (token_range_equal(rect_tag_k, name)) { if (0 != adobe::token_range_size(value)) { throw std::runtime_error("rect elements must be empty"); } draw_rectangle(attribute_set, graphics); } else if (token_range_equal(circle_tag_k, name)) { if (0 != adobe::token_range_size(value)) { throw std::runtime_error("circle elements must be empty"); } draw_circle(attribute_set, graphics); } else if (token_range_equal(polygon_tag_k, name)) { if (0 != adobe::token_range_size(value)) { throw std::runtime_error("polygon elements must be empty"); } draw_polygon(attribute_set, graphics); } else { throw std::runtime_error("encountered unrecognized tag"); } return token_range_t(); } void draw_graphics(const std::string& xml_shape, graphics_context_t& graphics) { make_xml_document(xml_shape.begin(), xml_shape.end(), line_position_t("xml shape"), adobe::always_true<token_range_t>(), std::bind(my_content_callback, _1, _2, _3, _4, std::ref(graphics)), adobe::null_output_t()).parse_document(); }With this content callback function and external entry function, we have implemented a simple system that draws graphics based on the contents of an xml document and provides a modicum of error checking on the document contents (e.g. tags that are not allowed to have complex content confirm this fact). One potential problem is that this application does not guarantee that the root element is a canvas. Indeed, a document with a single <rect ... /> element is conformant with the application, as written above. This may either be good or bad, depending on the design of your specific application.
<canvas> <rect sides="0 0 100 100"/> <circle center="5 5" radius="10"/> <group translation="5 10"> <polygon> <vertex xy="1 2"/> <vertex xy="6 8"/> <vertex xy="1 8"/> </polygon> <rect sides="3 3 10 10"/> </group> </canvas>Here, the polygon element's vertices are elements within the polygon's content instead of attributes. This requires polygon's content to be parsed to create an appropriate primitive to draw. Similarly, the document grammar has added a group element that groups primitives together within a coordinate transformation.
token_range_t my_polygon_callback( const token_range_t& /* entire_element_range */, const token_range_t& name, const attribute_set_t& attribute_set, const token_range_t& /* value */, polygon_t& polygon) { static const token_range_t vertex_tag_k( static_token_range("vertex") ); if (token_range_equal(vertex_tag_k, name)) { polygon.add_vertex( make_vertex(attribute_set) ); } else { throw std::runtime_error("encountered expected tag inside polygon content"); } return token_range_t(); } token_range_t my_group_callback( const token_range_t& /* entire_element_range */, const token_range_t& name, const attribute_set_t& attribute_set, const token_range_t& value, graphic_context_t& graphics) { static const token_range_t rect_tag_k( static_token_range("rect") ); static const token_range_t circle_tag_k( static_token_range("circle") ); static const token_range_t group_tag_k( static_token_range("group") ); static const token_range_t polygon_tag_k( static_token_range("polygon") ); if (token_range_equal(group_tag_k, name)) { graphic_context_t translated_graphics( graphics, attribute_set ); make_xml_document(value.first, value.second, line_position_t("group"), adobe::always_true<token_range_t>(), std::bind(my_group_callback, _1, _2, _3, _4, std::ref(translated_graphics)), adobe::null_output_t()).parse_content(); } else if (token_range_equal(rect_tag_k, name)) { if (0 != adobe::token_range_size(value)) { throw std::runtime_error("rect elements must be empty"); } draw_rectangle(attribute_set, graphics); } else if (token_range_equal(circle_tag_k, name)) { if (0 != adobe::token_range_size(value)) { throw std::runtime_error("circle elements must be empty"); } draw_circle(attribute_set, graphics); } else if (token_range_equal(polygon_tag_k, name)) { polygon_t polygon; make_xml_document(value.first, value.second, line_position_t("polygon"), adobe::always_true<token_range_t>(), std::bind(my_polygon_callback, _1, _2, _3, _4, std::ref(polygon)), adobe::null_output_t()).parse_content(); draw_polygon(polygon, graphics); } else { throw std::runtime_error("encountered unrecognized tag in group"); } return token_range_t(); } token_range_t my_canvas_callback( const token_range_t& /* entire_element_range */, const token_range_t& name, const attribute_set_t& attribute_set, const token_range_t& value, graphic_context_t& graphics) { static const token_range_t canvas_tag_k( static_token_range("canvas") ); if (token_range_equal(canvas_tag_k, name)) { make_xml_document(value.first, value.second, line_position_t("canvas"), adobe::always_true<token_range_t>(), std::bind(my_group_callback, _1, _2, _3, _4, std::ref(graphics)), adobe::null_output_t()).parse_content(); } else { throw std::runtime_error("encountered unrecognized tag in document"); } return token_range_t(); } void draw_graphics(const std::string& xml_shape, graphics_context_t& graphics) { make_xml_document(xml_shape.begin(), xml_shape.end(), line_position_t("xml shape"), adobe::always_true<token_range_t>(), std::bind(my_canvas_callback, _1, _2, _3, _4, std::ref(graphics)), adobe::null_output_t()).parse_document(); }Note that this solution guarantees that the root element is a canvas, accomodates groups that contain graphic primitives and other groups, and parses elements within polygon content as vertices. An interesting side effect of the refactoring that produced this system is that the functions tend to be smaller and more composable than the single monolithic function in our first example.
Definition at line 417 of file xml_parser.hpp.
using callback_proc_t |
Definition at line 419 of file xml_parser.hpp.
using preorder_predicate_t |
Definition at line 420 of file xml_parser.hpp.
using token_type |
Definition at line 421 of file xml_parser.hpp.
xml_parser_t | ( | uchar_ptr_t | first, |
uchar_ptr_t | last, | ||
const line_position_t & | position, | ||
preorder_predicate_t | predicate, | ||
callback_proc_t | callback, | ||
O | output ) |
Definition at line 423 of file xml_parser.hpp.
xml_parser_t | ( | const xml_parser_t< O > & | rhs | ) |
Definition at line 428 of file xml_parser.hpp.
|
virtual |
Definition at line 442 of file xml_parser.hpp.
xml_parser_t & operator= | ( | const xml_parser_t< O > & | rhs | ) |
Definition at line 432 of file xml_parser.hpp.
const line_position_t & next_position | ( | ) |
Definition at line 444 of file xml_parser.hpp.
void set_preorder_predicate | ( | preorder_predicate_t | pred | ) |
Allows the client to specify a different preorder predicate after object instantiation
[in] | pred | predicate that indicates whether the client wants a given element to be parsed pre-order or in-order |
Definition at line 452 of file xml_parser.hpp.
void parse_element_sequence | ( | ) |
Parses the content range as a sequence of xml elements. Each element encountered in the content range is processed by the application. Character data between top-level elements in the content range is ignored by the parser and is not processed.
<top-level type="simple">element 1</top-level> these characters are ignored <top-level type="complex">element 2<embedded/></top-level>
Definition at line 1056 of file xml_parser.hpp.
void parse_content | ( | ) |
Parses the content range as the content of an xml element.
<?xml encoding="UTF-8" version="1.0" ?> <root> <content>sample document content</content> </root>
Definition at line 1070 of file xml_parser.hpp.
void parse_document | ( | ) |
Parses the content range as a well-formed xml document.
Definition at line 1093 of file xml_parser.hpp.
|
protected |
Definition at line 546 of file xml_parser.hpp.
|
protected |
Definition at line 547 of file xml_parser.hpp.
|
protected |
Definition at line 644 of file xml_parser.hpp.
|
protected |
Definition at line 661 of file xml_parser.hpp.
|
protected |
Definition at line 675 of file xml_parser.hpp.
|
protected |
Definition at line 687 of file xml_parser.hpp.
|
protected |
Definition at line 557 of file xml_parser.hpp.
|
protected |
Definition at line 560 of file xml_parser.hpp.
|
protected |
Definition at line 743 of file xml_parser.hpp.
|
protected |
Definition at line 845 of file xml_parser.hpp.
|
protected |
Definition at line 943 of file xml_parser.hpp.
|
protected |
Definition at line 957 of file xml_parser.hpp.
|
protected |
Definition at line 1041 of file xml_parser.hpp.
|
protected |
Definition at line 970 of file xml_parser.hpp.
|
protected |
Definition at line 991 of file xml_parser.hpp.
|
protected |
Definition at line 1027 of file xml_parser.hpp.
|
protected |
Definition at line 697 of file xml_parser.hpp.
|
[in] | first | the start of the content range (analagous to a begin iterator) |
[in] | last | the end of the content range (analagous to an end iterator) |
[in] | position | an annotation of the line number at which then content range begins. Used when errors are encountered while parsing the content range. |
[in] | predicate | a predicate that indicates whether the application's content callback will be called pre-order or in-order for a given element |
[in] | callback | the applications content callback function |
[in] | output | an object that models OutputIterator to which the parser will insert the result of processing the content range |
Definition at line 1125 of file xml_parser.hpp.
|
protected |
Definition at line 577 of file xml_parser.hpp.
|
protected |
Definition at line 578 of file xml_parser.hpp.
|
protected |
Definition at line 579 of file xml_parser.hpp.