Orcus
sax_parser_base.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
9#define INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
10
11#include "env.hpp"
12#include "pstring.hpp"
13#include "cell_buffer.hpp"
14#include "parser_global.hpp"
15#include "parser_base.hpp"
16
17#include <cassert>
18#include <cstdlib>
19#include <exception>
20#include <sstream>
21#include <memory>
22
23#define ORCUS_DEBUG_SAX_PARSER 0
24
25#if ORCUS_DEBUG_SAX_PARSER
26#include <iostream>
27using std::cout;
28using std::endl;
29#endif
30
31namespace orcus { namespace sax {
32
33class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public ::orcus::parse_error
34{
35public:
36 malformed_xml_error() = delete;
37 malformed_xml_error(const std::string& msg, std::ptrdiff_t offset);
38 virtual ~malformed_xml_error() throw();
39};
40
46{
47 enum class keyword_type { dtd_public, dtd_private };
48
49 keyword_type keyword;
50 pstring root_element;
51 pstring fpi;
52 pstring uri;
53};
54
66ORCUS_PSR_DLLPUBLIC char decode_xml_encoded_char(const char* p, size_t n);
67
79ORCUS_PSR_DLLPUBLIC std::string decode_xml_unicode_char(const char* p, size_t n);
80
86{
87 pstring ns; // element namespace (optional)
88 pstring name; // element name
89 std::ptrdiff_t begin_pos; // position of the opening brace '<'.
90 std::ptrdiff_t end_pos; // position of the char after the closing brace '>'.
91};
92
101{
102 pstring ns; // attribute namespace (optional)
103 pstring name; // attribute name
104 pstring value; // attribute value
105 bool transient; // whether or not the attribute value is on a temporary buffer.
106};
107
108class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
109{
110 struct impl;
111 std::unique_ptr<impl> mp_impl;
112
113 parser_base() = delete;
114 parser_base(const parser_base&) = delete;
115 parser_base& operator=(const parser_base&) = delete;
116protected:
117 size_t m_nest_level;
118 size_t m_buffer_pos;
119 bool m_root_elem_open:1;
120
121protected:
122 parser_base(const char* content, size_t size, bool transient_stream);
123 ~parser_base();
124
125 void next_check()
126 {
127 next();
128 if (!has_char())
129 throw malformed_xml_error("xml stream ended prematurely.", offset());
130 }
131
132 void nest_up() { ++m_nest_level; }
133 void nest_down()
134 {
135 if (m_nest_level == 0)
136 throw malformed_xml_error("incorrect nesting in xml stream", offset());
137
138 --m_nest_level;
139 }
140
141 void inc_buffer_pos();
142 void reset_buffer_pos() { m_buffer_pos = 0; }
143
144 void has_char_throw(const char* msg) const
145 {
146 if (!has_char())
147 throw malformed_xml_error(msg, offset());
148 }
149
157 inline size_t remains() const
158 {
159#if ORCUS_DEBUG_SAX_PARSER
160 if (mp_char >= mp_end)
161 throw malformed_xml_error("xml stream ended prematurely.", offset());
162#endif
163 return mp_end - mp_char;
164 }
165
166 char cur_char_checked() const
167 {
168 if (!has_char())
169 throw malformed_xml_error("xml stream ended prematurely.", offset());
170
171 return *mp_char;
172 }
173
174 char next_and_char()
175 {
176 next();
177#if ORCUS_DEBUG_SAX_PARSER
178 if (mp_char >= mp_end)
179 throw malformed_xml_error("xml stream ended prematurely.", offset());
180#endif
181 return *mp_char;
182 }
183
184 char next_char_checked()
185 {
186 next();
187 if (!has_char())
188 throw malformed_xml_error("xml stream ended prematurely.", offset());
189
190 return *mp_char;
191 }
192
193 cell_buffer& get_cell_buffer();
194
195 void comment();
196
200 void skip_bom();
201
202 void expects_next(const char* p, size_t n);
203
204 void parse_encoded_char(cell_buffer& buf);
205 void value_with_encoded_char(cell_buffer& buf, pstring& str);
206
215 bool value(pstring& str, bool decode);
216
217 void name(pstring& str);
218 void element_name(parser_element& elem, std::ptrdiff_t begin_pos);
219 void attribute_name(pstring& attr_ns, pstring& attr_name);
220 void characters_with_encoded_char(cell_buffer& buf);
221};
222
223}}
224
225#endif
226/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: cell_buffer.hpp:22
Definition: parser_base.hpp:26
Definition: parser_base.hpp:40
Definition: pstring.hpp:28
Definition: sax_parser_base.hpp:34
Definition: sax_parser_base.hpp:109
bool value(pstring &str, bool decode)
size_t remains() const
Definition: sax_parser_base.hpp:157
Definition: sax_parser_base.hpp:46
Definition: sax_parser_base.hpp:101
Definition: sax_parser_base.hpp:86