Adobe Source Libraries 1.49.0
A collection of C++ libraries.
Loading...
Searching...
No Matches
xml_parser.hpp
Go to the documentation of this file.
1/*
2 Copyright 2013 Adobe
3 Distributed under the Boost Software License, Version 1.0.
4 (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5*/
6/**************************************************************************************************/
7
8#ifndef ADOBE_XML_PARSER_HPP
9#define ADOBE_XML_PARSER_HPP
10
11/**************************************************************************************************/
12
13#include <adobe/config.hpp>
14
16#include <adobe/any_regular.hpp>
17#include <adobe/array.hpp>
19#include <adobe/dictionary.hpp>
20#include <adobe/implementation/parser_shared.hpp>
21#include <adobe/implementation/xml_lex.hpp>
22#include <adobe/implementation/xml_token.hpp>
23#include <adobe/istream.hpp>
24#include <adobe/name.hpp>
25#include <adobe/string.hpp>
26
27#include <boost/iterator/iterator_facade.hpp>
28#include <boost/noncopyable.hpp>
29#include <boost/operators.hpp>
30
31#include <cassert>
32#include <functional>
33#include <utility>
34
35/**************************************************************************************************/
36
37namespace adobe {
38
39/**************************************************************************************************/
40
41// NOTE (fbrereto) : Class declaration for the documentation is in xml_parser.dox
42struct attribute_set_t : public boost::equality_comparable<attribute_set_t> {
43 typedef token_range_t key_type;
44 typedef token_range_t mapped_type;
45 typedef std::pair<key_type, mapped_type> value_type;
46 typedef std::vector<value_type> set_type;
47 typedef set_type::size_type size_type;
48 typedef set_type::const_iterator const_iterator;
50
57 struct less_t {
58 bool operator()(const value_type& x, const value_type& y) const {
59 return token_range_less(x.first, y.first) ||
60 (!token_range_less(y.first, x.first) && token_range_less(x.second, y.second));
61 }
62 };
63
70 bool operator()(const value_type& x, const value_type& y) const {
71 return token_range_less(x.first, y.first);
72 }
73 };
74
86 bool lower_bound(const value_type& attribute, set_type::iterator& result) {
87 result = adobe::lower_bound(set_m.write(), attribute, less_key_only_t());
88
89 return result != set_m.write().end() && token_range_equal(result->first, attribute.first);
90 }
91
103 bool lower_bound(const key_type& key, set_type::iterator& result) {
104 return lower_bound(value_type(key, mapped_type()), result);
105 }
106
110 bool lower_bound(const value_type& attribute, set_type::const_iterator& result) const {
111 result = adobe::lower_bound(*set_m, attribute, less_key_only_t());
112
113 return result != set_m->end() && token_range_equal(result->first, attribute.first);
114 }
115
119 bool lower_bound(const key_type& key, set_type::const_iterator& result) const {
120 return lower_bound(value_type(key, mapped_type()), result);
121 }
122
132 mapped_type operator[](const key_type& key) const {
133 set_type::const_iterator result;
134
135 if (lower_bound(key, result))
136 return result->second;
137
138 return mapped_type();
139 }
140
155 attribute_set_t merge(const attribute_set_t& other_set) const {
156
157 attribute_set_t merged;
158
159 adobe::set_union(*set_m, *other_set.set_m, std::back_inserter(merged.set_m.write()),
161
162 return merged;
163 }
164
176 void insert(const value_type& attribute) {
177 set_type::iterator result;
178
179 if (lower_bound(attribute, result))
180 result->second = attribute.second;
181 else
182 set_m.write().insert(result, attribute);
183 }
184
195 template <typename I> // I models InputIterator
196 inline void insert(I first, I last) {
197 for (; first != last; ++first)
198 insert(*first);
199 }
200
209 inline void insert(const key_type& key, const mapped_type& value) {
210 insert(value_type(key, value));
211 }
212
223 std::size_t count_same(const attribute_set_t& other_set, bool mapped_matters = true) const;
224
238 bool has_collisions(const attribute_set_t& other_set) const;
239
249 std::size_t count_collisions(const attribute_set_t& other_set) const;
250
254 inline bool empty() const { return set_m->empty(); }
255
260 inline size_type size() const { return set_m->size(); }
261
266 const_iterator begin() const { return set_m->begin(); }
267
272 const_iterator end() const { return set_m->end(); }
273
280 void clear() { set_m.write().clear(); }
281
282private:
283 friend bool operator==(const attribute_set_t& x, const attribute_set_t& y);
284 friend std::ostream& operator<<(std::ostream& s, const attribute_set_t& attribute_set);
285
287};
288
289/**************************************************************************************************/
290
303inline bool operator==(const attribute_set_t& x, const attribute_set_t& y) {
304 return x.set_m->size() == y.set_m->size() && x.count_same(y) == x.set_m->size();
305}
306
307/**************************************************************************************************/
308
320inline std::ostream& operator<<(std::ostream& s, const attribute_set_t& attribute_set) {
321 attribute_set_t::set_type::const_iterator first(attribute_set.set_m->begin());
322 attribute_set_t::set_type::const_iterator last(attribute_set.set_m->end());
323 bool not_first(false);
324
325 for (; first != last; ++first) {
326 if (not_first)
327 s << " ";
328 else
329 not_first = true;
330
331 adobe::copy(first->first, std::ostream_iterator<char>(s));
332
333 s << "='";
334
335 adobe::copy(first->second, std::ostream_iterator<char>(s));
336
337 s << "'";
338 }
339
340 return s;
341}
342
343/**************************************************************************************************/
344
345inline std::size_t attribute_set_t::count_same(const attribute_set_t& other_set,
346 bool mapped_matters) const {
347 std::size_t result(0);
348
349 if (mapped_matters)
350 result =
351 adobe::set_intersection(*set_m, *other_set.set_m, counting_output_iterator(), less_t())
352 .count();
353 else
354 result = adobe::set_intersection(*set_m, *other_set.set_m, counting_output_iterator(),
356 .count();
357
358#if 0
359 std::cerr << " count_same:\n"
360 << " orig: " << *this << "\n"
361 << " test: " << other_set << "\n"
362 << " result: " << result << std::endl;
363#endif
364
365 return result;
366}
367
368/**************************************************************************************************/
369
370inline bool attribute_set_t::has_collisions(const attribute_set_t& other_set) const {
371 attribute_set_t::set_type::const_iterator first(set_m->begin());
372 attribute_set_t::set_type::const_iterator last(set_m->end());
373
374 for (; first != last; ++first) {
375 set_type::const_iterator result;
376
377 if (other_set.lower_bound(*first, result) &&
378 !token_range_equal(result->second, first->second))
379 return true;
380 }
381
382 return false;
383}
384
385/**************************************************************************************************/
386
387inline std::size_t attribute_set_t::count_collisions(const attribute_set_t& other_set) const {
388 attribute_set_t::set_type::const_iterator first(set_m->begin());
389 attribute_set_t::set_type::const_iterator last(set_m->end());
390 std::size_t collision_count(0);
391
392 for (; first != last; ++first) {
393 set_type::const_iterator result;
394
395 if (other_set.lower_bound(*first, result) && result->second != first->second)
396 ++collision_count;
397 }
398
399 return collision_count;
400}
401
402/**************************************************************************************************/
403
404// REVISIT (sparent) : Extra typedef just for the doxygen tool.
405
406typedef token_range_t(implementation_xml_element_proc_t)(const token_range_t& entire_element_range,
407 const token_range_t& name,
408 const attribute_set_t& attribute_set,
409 const token_range_t& value);
410
411using xml_element_proc_t = std::function<implementation_xml_element_proc_t>;
412
413/**************************************************************************************************/
414
415// NOTE (fbrereto) : Class declaration for the documentation is in xml_parser.dox
416template <typename O> // O models OutputIterator
417class xml_parser_t : public boost::noncopyable {
418public:
420 using preorder_predicate_t = std::function<bool(const token_range_t&)>;
421 using token_type = xml_lex_t::token_type;
422
423 xml_parser_t(uchar_ptr_t first, uchar_ptr_t last, const line_position_t& position,
424 preorder_predicate_t predicate, callback_proc_t callback, O output)
425 : pred_m(predicate), callback_m(callback), output_m(output),
426 token_stream_m(first, last, position), preorder_mode_m(false) {}
427
430 token_stream_m(rhs.token_stream_m), preorder_mode_m(rhs.preorder_mode_m) {}
431
433 pred_m = rhs.pred_m;
435 output_m = rhs.output_m;
436 token_stream_m = rhs.token_stream_m;
437 preorder_mode_m = rhs.preorder_mode_m;
438
439 return *this;
440 }
441
442 virtual ~xml_parser_t() {}
443
444 const line_position_t& next_position() { return token_stream_m.next_position(); }
445
453
482
534
539
540 /*
541 REVISIT (sparent) : We should provide a protected call to get the token stream and allow
542 subclasses to access it directly - but for now we'll stick with the law of Demiter.
543 */
544
545protected:
546 const token_type& get_token() { return token_stream_m.get(); }
547 void putback() { token_stream_m.putback(); }
548
549 bool is_token(xml_lex_token_set_t name, token_range_t& value);
550 bool is_token(xml_lex_token_set_t name);
551 void require_token(xml_lex_token_set_t name, token_range_t& value);
552 void require_token(xml_lex_token_set_t name);
553
554 /* REVISIT (sparent) : Should these be const? And is there a way to specify the class to throw?
555 */
556
557 void throw_exception(const char* error_string) {
558 throw_parser_exception(error_string, next_position());
559 }
560 void throw_exception(xml_lex_token_set_t found, xml_lex_token_set_t expected) {
561 throw_parser_exception(token_to_string(found), token_to_string(expected), next_position());
562 }
563
564 bool is_element(token_range_t& element);
565 bool is_content(token_range_t& element);
566 bool is_e_tag(token_range_t& name, token_range_t& close_tag);
567 bool is_attribute_set(attribute_set_t& attribute_set);
568 bool is_attribute(token_range_t& name, token_range_t& value);
569 bool is_prolog();
570 bool is_bom(token_range_t& bom);
571 bool is_xml_decl(token_range_t& xml_decl);
572
573 void content_callback(token_range_t& result_element, const token_range_t& old_element,
574 const token_range_t& start_tag, const attribute_set_t attribute_set,
575 const token_range_t& content, bool preorder_parent);
576
580
581private:
582 xml_lex_t token_stream_m;
583 bool preorder_mode_m;
584};
585
586/**************************************************************************************************/
587
588inline token_range_t xml_element_echo(const token_range_t& entire_element_range,
589 const token_range_t& /*name*/,
590 const attribute_set_t& /*attribute_set*/,
591 const token_range_t& /*value*/) {
592 return entire_element_range;
593}
594
595/**************************************************************************************************/
596
597inline token_range_t xml_element_strip(const token_range_t& /*entire_element_range*/,
598 const token_range_t& /*name*/,
599 const attribute_set_t& /*attribute_set*/,
600 const token_range_t& value) {
601 return value;
602}
603
604/**************************************************************************************************/
605
606inline token_range_t xml_element_linefeed(const token_range_t& /*entire_element_range*/,
607 const token_range_t& name,
608 const attribute_set_t& attribute_set,
609 const token_range_t& value) {
610 if (token_range_equal(name, static_token_range("br")) && attribute_set.empty() &&
611 adobe::token_range_size(value) == 0) {
612#if ADOBE_PLATFORM_WIN
613 return static_token_range("&cr;&lf;");
614#elif ADOBE_PLATFORM_MAC
615 return static_token_range("&cr;");
616#elif ADOBE_PLATFORM_UNIX || ADOBE_PLATFORM_LINUX || ADOBE_PLATFORM_BSD || \
617 ADOBE_PLATFORM_SOLARIS || ADOBE_PLATFORM_IRIX || ADOBE_PLATFORM_HPUX || \
618 ADOBE_PLATFORM_CYGWIN || ADOBE_PLATFORM_AIX
619 return static_token_range("&lf;");
620#else
621#error \
622 "Line ending for platform unknown - please configure and report the results to stlab.adobe.com"
623#endif
624 }
625
626 return value;
627}
628
629/**************************************************************************************************/
630
631namespace implementation {
632
633/**************************************************************************************************/
634
635token_range_t transform_reference(const token_range_t& reference);
636
637/**************************************************************************************************/
638
639} // namespace implementation
640
641/**************************************************************************************************/
642
643template <typename O> // O models OutputIterator
644bool xml_parser_t<O>::is_token(xml_lex_token_set_t token_name, token_range_t& token_range) {
645 const token_type& result(get_token());
646
647 if (result.enum_m == token_name) {
648 token_range = result.range_m;
649
650 return true;
651 }
652
653 putback();
654
655 return false;
656}
657
658/**************************************************************************************************/
659
660template <typename O> // O models OutputIterator
661bool xml_parser_t<O>::is_token(xml_lex_token_set_t token_name) {
662 const token_type& result(get_token());
663
664 if (result.enum_m == token_name)
665 return true;
666
667 putback();
668
669 return false;
670}
671
672/**************************************************************************************************/
673
674template <typename O> // O models OutputIterator
675void xml_parser_t<O>::require_token(xml_lex_token_set_t token_name, token_range_t& token_range) {
676 const token_type& result(get_token());
677
678 if (result.enum_m != token_name)
679 throw_exception(result.enum_m, token_name);
680
681 token_range = result.range_m;
682}
683
684/**************************************************************************************************/
685
686template <typename O> // O models OutputIterator
687void xml_parser_t<O>::require_token(xml_lex_token_set_t token_name) {
688 const token_type& result(get_token());
689
690 if (result.enum_m != token_name)
691 throw_exception(result.enum_m, token_name);
692}
693
694/**************************************************************************************************/
695
696template <typename O> // O models OutputIterator
697void xml_parser_t<O>::content_callback(token_range_t& result_element,
698 const token_range_t& old_element,
699 const token_range_t& start_tag,
700 const attribute_set_t attribute_set,
701 const token_range_t& content, bool preorder_parent) {
702 if (preorder_parent) {
703 // if we are in preorder mode and we are the preorder_parent,
704 // we send the content to the client callback function.
705 // We get back a single token_range, which we then parse all
706 // over again in a content parser all its own.
707
708 token_range_t new_content(callback_m(old_element, start_tag, attribute_set, content));
709
710 if (old_element == new_content) {
711 // In the case when the new content is the same as the old element,
712 // the user has opted to echo the element to the output unchanged.
713
714 adobe::copy(old_element, output_m);
715 } else {
716 // otherwise we need to parse the new content before we can move on to
717 // the rest of the parse. The new parser has the same predicate and
718 // output iterator as this one
719
720 xml_parser_t<O>(new_content.first, new_content.second, next_position(), pred_m,
722 .parse_content();
723 }
724
725 // once the token_range from the client has been parsed, we can turn off
726 // preorder mode and resume parsing the original token stream from where we
727 // left off.
728
729 preorder_mode_m = false; // only the preorder_parent can turn off preorder mode
730 } else {
731 // in the case we are in preorder mode but we are not the initiator of
732 // the mode, we are within the context of another preorder parse. In
733 // this case we use the entire contents of the element as the token range
734 // and hand it back as the return value of this function.
735
736 result_element = old_element;
737 }
738}
739
740/**************************************************************************************************/
741
742template <typename O> // O models OutputIterator
744 element = token_range_t();
745
746 attribute_set_t attribute_set;
747
748 token_range_t open_tag;
749 token_range_t close_tag;
750
751 if (!is_token(xml_token_open_tag_k, open_tag))
752 return false;
753
754 token_range_t start_tag;
755 token_range_t end_tag;
756
757 require_token(xml_token_name_k, start_tag);
758
759 bool preorder_parent(false); // explained below
760
761 // Preorder mode is a state for the entire parser. In this state the
762 // client processing callback is never called until the end of the
763 // current element is found. This precludes the processing of elements
764 // and other entities nested within this element from being handled until
765 // this containing element is processed. This is useful in the case when
766 // the content of the element could potentially be replaced, in which
767 // case processing the nested elements first would be a moot point.
768
769 if (!preorder_mode_m && pred_m) {
770 // preorder mode is only set when the predicate is defined and
771 // returns true for the start_tag of this element.
772
773 preorder_mode_m = pred_m(start_tag);
774
775
776 // preorder_parent is used to denote which frame in the stack began
777 // the preorder traversal, as it is this frame alone that can turn
778 // it back off again.
779
780 preorder_parent = preorder_mode_m;
781 }
782
783 is_attribute_set(attribute_set);
784
785 if (is_token(xml_token_slash_close_tag_k, close_tag)) {
786 if (preorder_mode_m) {
787 content_callback(element, token_range_t(open_tag.first, close_tag.second), start_tag,
788 attribute_set, token_range_t(), preorder_parent);
789 } else {
790 // in the case when we are not in preorder mode at all, we pass the element
791 // to the client callback and output the token_range we receive back.
792
793 token_range_t result(callback_m(token_range_t(open_tag.first, close_tag.second),
794 start_tag, attribute_set, token_range_t()));
795
796 adobe::copy(result, output_m);
797 }
798
799 return true;
800 }
801
802 token_range_t close_of_open_tag;
803
804 require_token(xml_token_close_tag_k, close_of_open_tag);
805
806 token_range_t content;
807
808 // In the case of inorder parsing we want to output the tags
809 // as we see them; in this case we need to output the opening
810 // tag before we can go on to the content parsing.
811
812 if (!preorder_mode_m)
813 std::copy(open_tag.first, close_of_open_tag.second, output_m);
814
815 if (!is_content(content))
816 throw std::runtime_error("Content expected but not found.");
817
818 if (!is_e_tag(end_tag, close_tag))
819 throw std::runtime_error("End tag expected but not found.");
820
821 if (!token_range_equal(start_tag, end_tag))
822 throw std::runtime_error("Start tag and end tag do not have the same name.");
823
824 if (!preorder_mode_m) {
825 // in the case when we are not in preorder mode
826 // we output the content we have immediately,
827 // then we need to output the closing tag before
828 // we can go on to the rest of the parse.
829
830 adobe::copy(content, output_m);
831 adobe::copy(token_range_t(end_tag.first - 2, end_tag.second + 1), output_m);
832 } else {
833 // In this instance we are continuing a preorder parse...
834
835 content_callback(element, token_range_t(open_tag.first, close_tag.second), start_tag,
836 attribute_set, content, preorder_parent);
837 }
838
839 return true;
840}
841
842/**************************************************************************************************/
843
844template <typename O> // O models OutputIterator
845bool xml_parser_t<O>::is_content(token_range_t& content) {
846 content = token_range_t();
847
848 token_range_t char_data;
849
850 // NOTE (fbrereto) : The content parser can never initiate a preorder mode.
851 // It can only be initiated by the parsing of a preorder
852 // element, which isn't handled here. So for the content
853 // parse we are either in preorder mode or not; we need
854 // not worry about managing it.
855
856 if (is_token(xml_token_char_data_k, char_data)) {
857 // in the case when we are in preorder mode, we are part of a nested
858 // content, and we want to use this beginning char_data token as the
859 // start of the overall content token_range.
860
861 if (preorder_mode_m) {
862 content = char_data;
863 }
864
865 // in the case when we are not in preorder mode this range of char_data
866 // needs to be sent directly to the output.
867 else {
868 adobe::copy(char_data, output_m);
869 }
870 }
871
872 while (true) {
873 token_range_t result;
874
875 if (is_token(xml_token_reference_k, result)) {
876 if (adobe::token_range_size(result)) {
877 if (preorder_mode_m) {
878 // Again, if we're in preorder mode we're not outputting
879 // but extending (possibly even starting, too) the token_range
880 // for the preorder element.
881
882 if (!content.first)
883 content.first = result.first;
884
885 content.second = result.second;
886 } else {
887 // if we're not in preorder mode, we pass the element's
888 // reference-transformed token_range result directly to
889 // the output.
890
891 adobe::copy(implementation::transform_reference(result), output_m);
892 }
893 }
894 } else if (is_element(result)) {
895 if (adobe::token_range_size(result)) {
896 if (preorder_mode_m) {
897 // Again, if we're in preorder mode we're not outputting
898 // but extending (possibly even starting, too) the token_range
899 // for the preorder element.
900
901 if (!content.first)
902 content.first = result.first;
903
904 content.second = result.second;
905 } else {
906 // if we're not in preorder mode, we pass the element's
907 // token_range result directly to the output.
908
909 adobe::copy(result, output_m);
910 }
911 }
912 } else if (is_token(xml_token_comment_k, result)) {
913 // Comments are not parsed by any client functions.
914 // They are merely ignored by the parser.
915
916 // REVISIT eberdahl - Because some clients may want to
917 // handle comments, we may want to extend the client
918 // callback system to permit a comment callback.
919 } else {
920 break;
921 }
922
923 if (is_token(xml_token_char_data_k, char_data)) {
924 // if we find more char_data at the end of the content, we
925 // either extent the preorder content data or we output
926 // the contents of the char_data directly to the output (in
927 // fullorder mode).
928
929 if (preorder_mode_m) {
930 content.second = char_data.second;
931 } else {
932 adobe::copy(char_data, output_m);
933 }
934 }
935 }
936
937 return true;
938}
939
940/**************************************************************************************************/
941
942template <typename O> // O models OutputIterator
943bool xml_parser_t<O>::is_e_tag(token_range_t& name, token_range_t& close_tag) {
944 if (!is_token(xml_token_open_slash_tag_k))
945 return false;
946
947 require_token(xml_token_name_k, name);
948
949 require_token(xml_token_close_tag_k, close_tag);
950
951 return true;
952}
953
954/**************************************************************************************************/
955
956template <typename O> // O models OutputIterator
958 token_range_t att_name;
959 token_range_t att_value;
960
961 while (is_attribute(att_name, att_value))
962 attribute_set.insert(att_name, att_value);
963
964 return true;
965}
966
967/**************************************************************************************************/
968
969template <typename O> // O models OutputIterator
971 token_range_t bom;
972 token_range_t xml_decl;
973
974 if (is_bom(bom)) {
975 // REVISIT eberdahl 2006 Jun 18 - sanity check the bom
976 }
977
978 if (is_xml_decl(xml_decl)) {
979 // REVISIT eberdahl 2006 Jun 18 - sanity check the encoding
980 // of the XMLDecl
981
982 return true;
983 }
984
985 return false;
986}
987
988/**************************************************************************************************/
989
990template <typename O> // O models OutputIterator
991bool xml_parser_t<O>::is_bom(token_range_t& bom) {
992 const token_range_t utf8_bom = static_token_range("\xEF\xBB\xBF");
993 const token_range_t utf16_be_bom = static_token_range("\xFE\xFF");
994 const token_range_t utf16_le_bom = static_token_range("\xFF\xFE");
995
996 bool result = false;
997
998 // whitespace skipping should be off when sniffing for a bom
999 token_stream_m.set_skip_white_space(false);
1000
1001 if (is_token(xml_token_char_data_k, bom)) {
1002 if (adobe::token_range_size(utf8_bom) <= adobe::token_range_size(bom) &&
1003 adobe::equal(utf8_bom, bom.first)) {
1004 bom.second = bom.first;
1005 std::advance(bom.second, adobe::token_range_size(utf8_bom));
1006
1007 result = true;
1008 } else if (adobe::token_range_size(utf16_be_bom) <= adobe::token_range_size(bom) &&
1009 adobe::equal(utf16_be_bom, bom.first)) {
1010 // it's a bom, but it's not a format the parser supports
1011 throw_exception("utf16be bom encountered; xml_parser_t only supports utf8 encoding");
1012 } else if (adobe::token_range_size(utf16_le_bom) <= adobe::token_range_size(bom) &&
1013 adobe::equal(utf16_le_bom, bom.first)) {
1014 // it's a bom, but it's not a format the parser supports
1015 throw_exception("utf16le bom encountered; xml_parser_t only supports utf8 encoding");
1016 }
1017 }
1018
1019 token_stream_m.set_skip_white_space(true);
1020
1021 return result;
1022}
1023
1024/**************************************************************************************************/
1025
1026template <typename O> // O models OutputIterator
1027bool xml_parser_t<O>::is_xml_decl(token_range_t& xml_decl) {
1028 if (is_token(xml_token_processing_instruction_k, xml_decl)) {
1029 // REVISIT eberdahl 2006 Jun 18 - sanity check that the PI
1030 // encountered is, in fact, targeted at the xml application
1031
1032 return true;
1033 }
1034
1035 return false;
1036}
1037
1038/**************************************************************************************************/
1039
1040template <typename O> // O models OutputIterator
1041bool xml_parser_t<O>::is_attribute(token_range_t& name, token_range_t& value) {
1042 if (is_token(xml_token_name_k, name)) {
1043 require_token(xml_token_equals_k);
1044
1045 require_token(xml_token_att_value_k, value);
1046
1047 return true;
1048 }
1049
1050 return false;
1051}
1052
1053/**************************************************************************************************/
1054
1055template <typename O> // O models OutputIterator
1057 assert(callback_m);
1058
1059 token_range_t dummy;
1060
1061 token_stream_m.set_skip_white_space(false);
1062
1063 while (is_element(dummy))
1064 is_token(xml_token_char_data_k);
1065}
1066
1067/**************************************************************************************************/
1068
1069template <typename O> // O models OutputIterator
1071 token_range_t content;
1072
1073 token_stream_m.set_skip_white_space(false);
1074
1075 while (true) {
1076 // always returns true; have to test results
1077 is_content(content);
1078
1079 if (adobe::token_range_size(content)) {
1080 token_range_t result(
1081 this->callback_m(content, token_range_t(), attribute_set_t(), content));
1082
1083 adobe::copy(result, this->output_m);
1084 } else {
1085 break;
1086 }
1087 }
1088}
1089
1090/**************************************************************************************************/
1091
1092template <typename O> // O models OutputIterator
1094 token_range_t dummy;
1095
1096 token_stream_m.set_skip_white_space(true);
1097
1098 is_prolog();
1099 is_element(dummy);
1100}
1101
1102/**************************************************************************************************/
1103
1123template <typename O> // O models OutputIterator
1124inline xml_parser_t<O>
1125make_xml_parser(uchar_ptr_t first, uchar_ptr_t last, const line_position_t& position,
1126 typename xml_parser_t<O>::preorder_predicate_t predicate,
1127 typename xml_parser_t<O>::callback_proc_t callback, O output) {
1128 return xml_parser_t<O>(first, last, position, predicate, callback, output);
1129}
1130
1131/**************************************************************************************************/
1137template <typename Result, typename InputIterator>
1138InputIterator xatoi(InputIterator first, InputIterator last, Result& result) {
1139 result = 0;
1140
1141 while (first != last && std::isxdigit(*first)) {
1142 typename std::iterator_traits<InputIterator>::value_type c(*first);
1143
1144 result <<= 4;
1145
1146 if (std::isdigit(c)) {
1147 result += c - '0';
1148 } else {
1149 c = std::use_facet<std::ctype<char>>(std::locale()).tolower(c);
1150
1151 result += c - 'a' + 10;
1152 }
1153
1154 ++first;
1155 }
1156
1157 return first;
1158}
1159
1160/**************************************************************************************************/
1166template <typename Result, typename InputIterator>
1167InputIterator datoi(InputIterator first, InputIterator last, Result& result) {
1168 result = 0;
1169
1170 while (first != last && std::isdigit(*first)) {
1171 result *= 10;
1172
1173 result += *first - '0';
1174
1175 ++first;
1176 }
1177
1178 return first;
1179}
1180
1181/**************************************************************************************************/
1182
1183} // namespace adobe
1184
1185/**************************************************************************************************/
1186
1187#endif
1188
1189/**************************************************************************************************/
A relatively lightweight and simple xml (subset) parser.
xml_lex_t::token_type token_type
xml_parser_t & operator=(const xml_parser_t &rhs)
void throw_exception(xml_lex_token_set_t found, xml_lex_token_set_t expected)
bool is_token(xml_lex_token_set_t name, token_range_t &value)
bool is_content(token_range_t &element)
void content_callback(token_range_t &result_element, const token_range_t &old_element, const token_range_t &start_tag, const attribute_set_t attribute_set, const token_range_t &content, bool preorder_parent)
bool is_bom(token_range_t &bom)
std::function< bool(const token_range_t &)> preorder_predicate_t
bool is_element(token_range_t &element)
void require_token(xml_lex_token_set_t name)
void require_token(xml_lex_token_set_t name, token_range_t &value)
xml_parser_t(const xml_parser_t &rhs)
xml_element_proc_t callback_proc_t
callback_proc_t callback_m
bool is_attribute(token_range_t &name, token_range_t &value)
const token_type & get_token()
void throw_exception(const char *error_string)
bool is_attribute_set(attribute_set_t &attribute_set)
bool is_xml_decl(token_range_t &xml_decl)
void set_preorder_predicate(preorder_predicate_t pred)
xml_parser_t(uchar_ptr_t first, uchar_ptr_t last, const line_position_t &position, preorder_predicate_t predicate, callback_proc_t callback, O output)
preorder_predicate_t pred_m
bool is_e_tag(token_range_t &name, token_range_t &close_tag)
xml_parser_t< O > make_xml_parser(uchar_ptr_t first, uchar_ptr_t last, const line_position_t &position, typename xml_parser_t< O >::preorder_predicate_t predicate, typename xml_parser_t< O >::callback_proc_t callback, O output)
Create an object that will parse the indicated content range using the preorder and content functions...
const line_position_t & next_position()
bool is_token(xml_lex_token_set_t name)
std::function< implementation_xml_element_proc_t > xml_element_proc_t
OutputIterator copy(const InputRange &range, OutputIterator result)
copy implementation
Definition copy.hpp:42
bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate pred)
Definition equal.hpp:36
std::tuple_element< I, T > element
Deprecated, use std::tuple_element instead.
OutputIterator set_union(const InputRange1 &range1, const InputRange2 &range2, OutputIterator result)
set implementation
Definition set.hpp:76
OutputIterator set_intersection(const InputRange1 &range1, const InputRange2 &range2, OutputIterator result)
set implementation
Definition set.hpp:112
I lower_bound(I f, I l, const T &x)
InputIterator xatoi(InputIterator first, InputIterator last, Result &result)
InputIterator datoi(InputIterator first, InputIterator last, Result &result)
token_range_t xml_element_echo(const token_range_t &entire_element_range, const token_range_t &, const attribute_set_t &, const token_range_t &)
token_range_t xml_element_linefeed(const token_range_t &, const token_range_t &name, const attribute_set_t &attribute_set, const token_range_t &value)
stlab::copy_on_write< T > copy_on_write
token_range_t implementation_xml_element_proc_t(const token_range_t &entire_element_range, const token_range_t &name, const attribute_set_t &attribute_set, const token_range_t &value)
token_range_t xml_element_strip(const token_range_t &, const token_range_t &, const attribute_set_t &, const token_range_t &value)
bool operator()(const value_type &x, const value_type &y) const
bool operator()(const value_type &x, const value_type &y) const
An associated array based on adobe::token_range_t. A utility class for the xml_parser_t.
std::size_t count_same(const attribute_set_t &other_set, bool mapped_matters=true) const
bool lower_bound(const key_type &key, set_type::iterator &result)
const_iterator begin() const
bool has_collisions(const attribute_set_t &other_set) const
token_range_t mapped_type
bool lower_bound(const value_type &attribute, set_type::const_iterator &result) const
std::pair< key_type, mapped_type > value_type
size_type size() const
token_range_t key_type
std::size_t count_collisions(const attribute_set_t &other_set) const
friend std::ostream & operator<<(std::ostream &s, const attribute_set_t &attribute_set)
mapped_type operator[](const key_type &key) const
std::vector< value_type > set_type
set_type::size_type size_type
void insert(I first, I last)
bool lower_bound(const key_type &key, set_type::const_iterator &result) const
void insert(const value_type &attribute)
const_iterator iterator
const_iterator end() const
void insert(const key_type &key, const mapped_type &value)
set_type::const_iterator const_iterator
friend bool operator==(const attribute_set_t &x, const attribute_set_t &y)
attribute_set_t merge(const attribute_set_t &other_set) const
bool lower_bound(const value_type &attribute, set_type::iterator &result)
A type detailing parser position information.
Definition istream.hpp:127