8#ifndef ADOBE_XML_PARSER_HPP
9#define ADOBE_XML_PARSER_HPP
20#include <adobe/implementation/parser_shared.hpp>
21#include <adobe/implementation/xml_lex.hpp>
22#include <adobe/implementation/xml_token.hpp>
27#include <boost/iterator/iterator_facade.hpp>
28#include <boost/noncopyable.hpp>
29#include <boost/operators.hpp>
59 return token_range_less(x.first, y.first) ||
60 (!token_range_less(y.first, x.first) && token_range_less(x.second, y.second));
71 return token_range_less(x.first, y.first);
89 return result != set_m.write().end() && token_range_equal(result->first, attribute.first);
113 return result != set_m->end() && token_range_equal(result->first, attribute.first);
133 set_type::const_iterator result;
136 return result->second;
159 adobe::set_union(*set_m, *other_set.set_m, std::back_inserter(merged.set_m.write()),
177 set_type::iterator result;
180 result->second = attribute.second;
182 set_m.write().insert(result, attribute);
195 template <
typename I>
197 for (; first != last; ++first)
254 inline bool empty()
const {
return set_m->empty(); }
280 void clear() { set_m.write().clear(); }
304 return x.set_m->size() == y.set_m->size() && x.
count_same(y) == x.set_m->size();
321 attribute_set_t::set_type::const_iterator first(attribute_set.set_m->begin());
322 attribute_set_t::set_type::const_iterator last(attribute_set.set_m->end());
323 bool not_first(
false);
325 for (; first != last; ++first) {
331 adobe::copy(first->first, std::ostream_iterator<char>(s));
335 adobe::copy(first->second, std::ostream_iterator<char>(s));
346 bool mapped_matters)
const {
347 std::size_t result(0);
359 std::cerr <<
" count_same:\n"
360 <<
" orig: " << *
this <<
"\n"
361 <<
" test: " << other_set <<
"\n"
362 <<
" result: " << result << std::endl;
371 attribute_set_t::set_type::const_iterator first(set_m->begin());
372 attribute_set_t::set_type::const_iterator last(set_m->end());
374 for (; first != last; ++first) {
375 set_type::const_iterator result;
378 !token_range_equal(result->second, first->second))
388 attribute_set_t::set_type::const_iterator first(set_m->begin());
389 attribute_set_t::set_type::const_iterator last(set_m->end());
390 std::size_t collision_count(0);
392 for (; first != last; ++first) {
393 set_type::const_iterator result;
395 if (other_set.
lower_bound(*first, result) && result->second != first->second)
399 return collision_count;
407 const token_range_t& name,
409 const token_range_t& value);
426 token_stream_m(first, last, position), preorder_mode_m(false) {}
430 token_stream_m(rhs.token_stream_m), preorder_mode_m(rhs.preorder_mode_m) {}
436 token_stream_m = rhs.token_stream_m;
437 preorder_mode_m = rhs.preorder_mode_m;
549 bool is_token(xml_lex_token_set_t name, token_range_t& value);
561 throw_parser_exception(token_to_string(found), token_to_string(expected),
next_position());
566 bool is_e_tag(token_range_t& name, token_range_t& close_tag);
575 const token_range_t& content,
bool preorder_parent);
582 xml_lex_t token_stream_m;
583 bool preorder_mode_m;
589 const token_range_t& ,
591 const token_range_t& ) {
592 return entire_element_range;
598 const token_range_t& ,
600 const token_range_t& value) {
607 const token_range_t& name,
609 const token_range_t& value) {
610 if (token_range_equal(name, static_token_range(
"br")) && attribute_set.
empty() &&
611 adobe::token_range_size(value) == 0) {
612#if ADOBE_PLATFORM_WIN
613 return static_token_range(
"&cr;&lf;");
614#elif ADOBE_PLATFORM_MAC
615 return static_token_range(
"&cr;");
616#elif ADOBE_PLATFORM_UNIX || ADOBE_PLATFORM_LINUX || ADOBE_PLATFORM_BSD || \
617 ADOBE_PLATFORM_SOLARIS || ADOBE_PLATFORM_IRIX || ADOBE_PLATFORM_HPUX || \
618 ADOBE_PLATFORM_CYGWIN || ADOBE_PLATFORM_AIX
619 return static_token_range(
"&lf;");
622 "Line ending for platform unknown - please configure and report the results to stlab.adobe.com"
631namespace implementation {
635token_range_t transform_reference(
const token_range_t& reference);
647 if (result.enum_m == token_name) {
648 token_range = result.range_m;
664 if (result.enum_m == token_name)
678 if (result.enum_m != token_name)
681 token_range = result.range_m;
690 if (result.enum_m != token_name)
698 const token_range_t& old_element,
699 const token_range_t& start_tag,
701 const token_range_t& content,
bool preorder_parent) {
702 if (preorder_parent) {
708 token_range_t new_content(
callback_m(old_element, start_tag, attribute_set, content));
710 if (old_element == new_content) {
729 preorder_mode_m =
false;
736 result_element = old_element;
748 token_range_t open_tag;
749 token_range_t close_tag;
751 if (!
is_token(xml_token_open_tag_k, open_tag))
754 token_range_t start_tag;
755 token_range_t end_tag;
759 bool preorder_parent(
false);
769 if (!preorder_mode_m &&
pred_m) {
773 preorder_mode_m =
pred_m(start_tag);
780 preorder_parent = preorder_mode_m;
785 if (
is_token(xml_token_slash_close_tag_k, close_tag)) {
786 if (preorder_mode_m) {
788 attribute_set, token_range_t(), preorder_parent);
793 token_range_t result(
callback_m(token_range_t(open_tag.first, close_tag.second),
794 start_tag, attribute_set, token_range_t()));
802 token_range_t close_of_open_tag;
806 token_range_t content;
812 if (!preorder_mode_m)
813 std::copy(open_tag.first, close_of_open_tag.second,
output_m);
816 throw std::runtime_error(
"Content expected but not found.");
819 throw std::runtime_error(
"End tag expected but not found.");
821 if (!token_range_equal(start_tag, end_tag))
822 throw std::runtime_error(
"Start tag and end tag do not have the same name.");
824 if (!preorder_mode_m) {
836 attribute_set, content, preorder_parent);
846 content = token_range_t();
848 token_range_t char_data;
856 if (
is_token(xml_token_char_data_k, char_data)) {
861 if (preorder_mode_m) {
873 token_range_t result;
875 if (
is_token(xml_token_reference_k, result)) {
876 if (adobe::token_range_size(result)) {
877 if (preorder_mode_m) {
883 content.first = result.first;
885 content.second = result.second;
895 if (adobe::token_range_size(result)) {
896 if (preorder_mode_m) {
902 content.first = result.first;
904 content.second = result.second;
912 }
else if (
is_token(xml_token_comment_k, result)) {
923 if (
is_token(xml_token_char_data_k, char_data)) {
929 if (preorder_mode_m) {
930 content.second = char_data.second;
944 if (!
is_token(xml_token_open_slash_tag_k))
958 token_range_t att_name;
959 token_range_t att_value;
962 attribute_set.
insert(att_name, att_value);
972 token_range_t xml_decl;
992 const token_range_t utf8_bom = static_token_range(
"\xEF\xBB\xBF");
993 const token_range_t utf16_be_bom = static_token_range(
"\xFE\xFF");
994 const token_range_t utf16_le_bom = static_token_range(
"\xFF\xFE");
999 token_stream_m.set_skip_white_space(
false);
1001 if (
is_token(xml_token_char_data_k, bom)) {
1002 if (adobe::token_range_size(utf8_bom) <= adobe::token_range_size(bom) &&
1004 bom.second = bom.first;
1005 std::advance(bom.second, adobe::token_range_size(utf8_bom));
1008 }
else if (adobe::token_range_size(utf16_be_bom) <= adobe::token_range_size(bom) &&
1011 throw_exception(
"utf16be bom encountered; xml_parser_t only supports utf8 encoding");
1012 }
else if (adobe::token_range_size(utf16_le_bom) <= adobe::token_range_size(bom) &&
1015 throw_exception(
"utf16le bom encountered; xml_parser_t only supports utf8 encoding");
1019 token_stream_m.set_skip_white_space(
true);
1026template <
typename O>
1028 if (
is_token(xml_token_processing_instruction_k, xml_decl)) {
1040template <
typename O>
1042 if (
is_token(xml_token_name_k, name)) {
1055template <
typename O>
1059 token_range_t dummy;
1061 token_stream_m.set_skip_white_space(
false);
1069template <
typename O>
1071 token_range_t content;
1073 token_stream_m.set_skip_white_space(
false);
1079 if (adobe::token_range_size(content)) {
1080 token_range_t result(
1092template <
typename O>
1094 token_range_t dummy;
1096 token_stream_m.set_skip_white_space(
true);
1123template <
typename O>
1128 return xml_parser_t<O>(first, last, position, predicate, callback, output);
1137template <
typename Result,
typename InputIterator>
1138InputIterator
xatoi(InputIterator first, InputIterator last, Result& result) {
1141 while (first != last && std::isxdigit(*first)) {
1142 typename std::iterator_traits<InputIterator>::value_type c(*first);
1146 if (std::isdigit(c)) {
1149 c = std::use_facet<std::ctype<char>>(std::locale()).tolower(c);
1151 result += c -
'a' + 10;
1166template <
typename Result,
typename InputIterator>
1167InputIterator
datoi(InputIterator first, InputIterator last, Result& result) {
1170 while (first != last && std::isdigit(*first)) {
1173 result += *first -
'0';
A relatively lightweight and simple xml (subset) parser.
xml_lex_t::token_type token_type
xml_parser_t & operator=(const xml_parser_t &rhs)
void throw_exception(xml_lex_token_set_t found, xml_lex_token_set_t expected)
bool is_token(xml_lex_token_set_t name, token_range_t &value)
bool is_content(token_range_t &element)
void content_callback(token_range_t &result_element, const token_range_t &old_element, const token_range_t &start_tag, const attribute_set_t attribute_set, const token_range_t &content, bool preorder_parent)
bool is_bom(token_range_t &bom)
std::function< bool(const token_range_t &)> preorder_predicate_t
void parse_element_sequence()
bool is_element(token_range_t &element)
void require_token(xml_lex_token_set_t name)
void require_token(xml_lex_token_set_t name, token_range_t &value)
xml_parser_t(const xml_parser_t &rhs)
xml_element_proc_t callback_proc_t
callback_proc_t callback_m
bool is_attribute(token_range_t &name, token_range_t &value)
const token_type & get_token()
void throw_exception(const char *error_string)
bool is_attribute_set(attribute_set_t &attribute_set)
bool is_xml_decl(token_range_t &xml_decl)
void set_preorder_predicate(preorder_predicate_t pred)
xml_parser_t(uchar_ptr_t first, uchar_ptr_t last, const line_position_t &position, preorder_predicate_t predicate, callback_proc_t callback, O output)
preorder_predicate_t pred_m
bool is_e_tag(token_range_t &name, token_range_t &close_tag)
xml_parser_t< O > make_xml_parser(uchar_ptr_t first, uchar_ptr_t last, const line_position_t &position, typename xml_parser_t< O >::preorder_predicate_t predicate, typename xml_parser_t< O >::callback_proc_t callback, O output)
Create an object that will parse the indicated content range using the preorder and content functions...
const line_position_t & next_position()
bool is_token(xml_lex_token_set_t name)
std::function< implementation_xml_element_proc_t > xml_element_proc_t
OutputIterator copy(const InputRange &range, OutputIterator result)
copy implementation
bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate pred)
std::tuple_element< I, T > element
Deprecated, use std::tuple_element instead.
OutputIterator set_union(const InputRange1 &range1, const InputRange2 &range2, OutputIterator result)
set implementation
OutputIterator set_intersection(const InputRange1 &range1, const InputRange2 &range2, OutputIterator result)
set implementation
I lower_bound(I f, I l, const T &x)
InputIterator xatoi(InputIterator first, InputIterator last, Result &result)
InputIterator datoi(InputIterator first, InputIterator last, Result &result)
token_range_t xml_element_echo(const token_range_t &entire_element_range, const token_range_t &, const attribute_set_t &, const token_range_t &)
token_range_t xml_element_linefeed(const token_range_t &, const token_range_t &name, const attribute_set_t &attribute_set, const token_range_t &value)
stlab::copy_on_write< T > copy_on_write
token_range_t implementation_xml_element_proc_t(const token_range_t &entire_element_range, const token_range_t &name, const attribute_set_t &attribute_set, const token_range_t &value)
token_range_t xml_element_strip(const token_range_t &, const token_range_t &, const attribute_set_t &, const token_range_t &value)
bool operator()(const value_type &x, const value_type &y) const
bool operator()(const value_type &x, const value_type &y) const
An associated array based on adobe::token_range_t. A utility class for the xml_parser_t.
std::size_t count_same(const attribute_set_t &other_set, bool mapped_matters=true) const
bool lower_bound(const key_type &key, set_type::iterator &result)
const_iterator begin() const
bool has_collisions(const attribute_set_t &other_set) const
token_range_t mapped_type
bool lower_bound(const value_type &attribute, set_type::const_iterator &result) const
std::pair< key_type, mapped_type > value_type
std::size_t count_collisions(const attribute_set_t &other_set) const
friend std::ostream & operator<<(std::ostream &s, const attribute_set_t &attribute_set)
mapped_type operator[](const key_type &key) const
std::vector< value_type > set_type
set_type::size_type size_type
void insert(I first, I last)
bool lower_bound(const key_type &key, set_type::const_iterator &result) const
void insert(const value_type &attribute)
const_iterator end() const
void insert(const key_type &key, const mapped_type &value)
set_type::const_iterator const_iterator
friend bool operator==(const attribute_set_t &x, const attribute_set_t &y)
attribute_set_t merge(const attribute_set_t &other_set) const
bool lower_bound(const value_type &attribute, set_type::iterator &result)
A type detailing parser position information.