8#ifndef INCLUDED_ORCUS_SAX_PARSER_HPP
9#define INCLUDED_ORCUS_SAX_PARSER_HPP
11#include "sax_parser_base.hpp"
98 (void)val; (void)
transient;
119template<
typename _Handler,
typename _Config = sax_parser_default_config>
123 typedef _Handler handler_type;
124 typedef _Config config_type;
126 sax_parser(
const char* content,
const size_t size, handler_type& handler);
127 sax_parser(
const char* content,
const size_t size,
bool transient_stream, handler_type& handler);
141 void element_open(std::ptrdiff_t begin_pos);
142 void element_close(std::ptrdiff_t begin_pos);
144 void declaration(
const char* name_check);
151 handler_type& m_handler;
154template<
typename _Handler,
typename _Config>
156 const char* content,
const size_t size, handler_type& handler) :
162template<
typename _Handler,
typename _Config>
163sax_parser<_Handler,_Config>::sax_parser(
164 const char* content,
const size_t size,
bool transient_stream, handler_type& handler) :
165 sax::parser_base(content, size, transient_stream),
170template<
typename _Handler,
typename _Config>
171sax_parser<_Handler,_Config>::~sax_parser()
175template<
typename _Handler,
typename _Config>
176void sax_parser<_Handler,_Config>::parse()
181 skip_space_and_control();
184 assert(m_buffer_pos == 0);
187template<
typename _Handler,
typename _Config>
188void sax_parser<_Handler,_Config>::header()
192 skip_space_and_control();
193 if (!has_char() || cur_char() !=
'<')
194 throw sax::malformed_xml_error(
"xml file must begin with '<'.", offset());
196 if (config_type::baseline_version >= 11)
200 if (next_char_checked() !=
'?')
201 throw sax::malformed_xml_error(
"xml file must begin with '<?'.", offset());
207template<
typename _Handler,
typename _Config>
208void sax_parser<_Handler,_Config>::body()
212 if (cur_char() ==
'<')
215 if (!m_root_elem_open)
219 else if (m_nest_level)
227template<
typename _Handler,
typename _Config>
228void sax_parser<_Handler,_Config>::element()
230 assert(cur_char() ==
'<');
231 std::ptrdiff_t pos = offset();
232 char c = next_char_checked();
242 declaration(
nullptr);
249template<
typename _Handler,
typename _Config>
250void sax_parser<_Handler,_Config>::element_open(std::ptrdiff_t begin_pos)
252 sax::parser_element elem;
253 element_name(elem, begin_pos);
257 skip_space_and_control();
262 if (next_and_char() !=
'>')
263 throw sax::malformed_xml_error(
"expected '/>' to self-close the element.", offset());
265 elem.end_pos = offset();
266 m_handler.start_element(elem);
268 m_handler.end_element(elem);
270 m_root_elem_open =
false;
271#if ORCUS_DEBUG_SAX_PARSER
272 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
280 elem.end_pos = offset();
282 m_handler.start_element(elem);
284#if ORCUS_DEBUG_SAX_PARSER
285 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
294template<
typename _Handler,
typename _Config>
295void sax_parser<_Handler,_Config>::element_close(std::ptrdiff_t begin_pos)
297 assert(cur_char() ==
'/');
300 sax::parser_element elem;
301 element_name(elem, begin_pos);
303 if (cur_char() !=
'>')
304 throw sax::malformed_xml_error(
"expected '>' to close the element.", offset());
306 elem.end_pos = offset();
308 m_handler.end_element(elem);
309#if ORCUS_DEBUG_SAX_PARSER
310 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
313 m_root_elem_open =
false;
316template<
typename _Handler,
typename _Config>
317void sax_parser<_Handler,_Config>::special_tag()
319 assert(cur_char() ==
'!');
321 size_t len = remains();
323 throw sax::malformed_xml_error(
"special tag too short.", offset());
325 switch (next_and_char())
330 if (next_and_char() !=
'-')
331 throw sax::malformed_xml_error(
"comment expected.", offset());
335 throw sax::malformed_xml_error(
"malformed comment.", offset());
344 expects_next(
"CDATA[", 6);
352 expects_next(
"OCTYPE", 6);
353 skip_space_and_control();
359 throw sax::malformed_xml_error(
"failed to parse special tag.", offset());
363template<
typename _Handler,
typename _Config>
364void sax_parser<_Handler,_Config>::declaration(
const char* name_check)
366 assert(cur_char() ==
'?');
370 std::string_view decl_name;
372#if ORCUS_DEBUG_SAX_PARSER
373 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
376 if (name_check && decl_name != name_check)
378 std::ostringstream os;
379 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
380 throw sax::malformed_xml_error(os.str(), offset());
383 m_handler.start_declaration(decl_name);
384 skip_space_and_control();
387 while (cur_char_checked() !=
'?')
390 skip_space_and_control();
392 if (next_char_checked() !=
'>')
393 throw sax::malformed_xml_error(
"declaration must end with '?>'.", offset());
395 m_handler.end_declaration(decl_name);
398#if ORCUS_DEBUG_SAX_PARSER
399 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
403template<
typename _Handler,
typename _Config>
404void sax_parser<_Handler,_Config>::cdata()
406 size_t len = remains();
410 const char* p0 = mp_char;
411 size_t i = 0, match = 0;
412 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
426 else if (c ==
'>' && match == 2)
429 size_t cdata_len = i - 2;
430 m_handler.characters(std::string_view(p0, cdata_len), transient_stream());
437 throw sax::malformed_xml_error(
"malformed CDATA section.", offset());
440template<
typename _Handler,
typename _Config>
441void sax_parser<_Handler,_Config>::doctype()
444 sax::doctype_declaration param;
445 name(param.root_element);
446 skip_space_and_control();
449 size_t len = remains();
451 throw sax::malformed_xml_error(
"DOCTYPE section too short.", offset());
453 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
457 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
458 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
460 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
464 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
465 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
469 skip_space_and_control();
470 has_char_throw(
"DOCTYPE section too short.");
473 value(param.fpi,
false);
475 has_char_throw(
"DOCTYPE section too short.");
476 skip_space_and_control();
477 has_char_throw(
"DOCTYPE section too short.");
479 if (cur_char() ==
'>')
482#if ORCUS_DEBUG_SAX_PARSER
483 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
485 m_handler.doctype(param);
491 value(param.uri,
false);
493 has_char_throw(
"DOCTYPE section too short.");
494 skip_space_and_control();
495 has_char_throw(
"DOCTYPE section too short.");
497 if (cur_char() !=
'>')
498 throw sax::malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
500#if ORCUS_DEBUG_SAX_PARSER
501 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
503 m_handler.doctype(param);
507template<
typename _Handler,
typename _Config>
508void sax_parser<_Handler,_Config>::characters()
510 const char* p0 = mp_char;
511 for (; has_char(); next())
513 if (cur_char() ==
'<')
516 if (cur_char() ==
'&')
519 cell_buffer& buf = get_cell_buffer();
521 buf.append(p0, mp_char-p0);
522 characters_with_encoded_char(buf);
524 m_handler.characters(std::string_view{}, transient_stream());
526 m_handler.characters(std::string_view(buf.get(), buf.size()),
true);
533 std::string_view val(p0, mp_char-p0);
534 m_handler.characters(val, transient_stream());
538template<
typename _Handler,
typename _Config>
539void sax_parser<_Handler,_Config>::attribute()
541 sax::parser_attribute attr;
542 attribute_name(attr.ns, attr.name);
544#if ORCUS_DEBUG_SAX_PARSER
545 cout <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'" << endl;
548 skip_space_and_control();
553 std::ostringstream os;
554 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
555 throw sax::malformed_xml_error(os.str(), offset());
559 skip_space_and_control();
561 attr.transient = value(attr.value,
true);
566#if ORCUS_DEBUG_SAX_PARSER
567 cout <<
"sax_parser::attribute: value='" << attr.value <<
"'" << endl;
570 m_handler.attribute(attr);
Definition: parser_base.hpp:41
Definition: sax_parser_base.hpp:108
Definition: sax_parser.hpp:28
void end_declaration(std::string_view decl)
Definition: sax_parser.hpp:57
void doctype(const orcus::sax::doctype_declaration ¶m)
Definition: sax_parser.hpp:35
void attribute(const orcus::sax::parser_attribute &attr)
Definition: sax_parser.hpp:109
void characters(std::string_view val, bool transient)
Definition: sax_parser.hpp:96
void start_declaration(std::string_view decl)
Definition: sax_parser.hpp:47
void end_element(const orcus::sax::parser_element &elem)
Definition: sax_parser.hpp:77
void start_element(const orcus::sax::parser_element &elem)
Definition: sax_parser.hpp:67
Definition: sax_parser.hpp:121
Definition: sax_parser_base.hpp:45
Definition: sax_parser_base.hpp:100
Definition: sax_parser_base.hpp:85
Definition: sax_parser.hpp:18
static const uint8_t baseline_version
Definition: sax_parser.hpp:24