Orcus
sax_ns_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_SAX_NS_PARSER_HPP
9#define INCLUDED_ORCUS_SAX_NS_PARSER_HPP
10
11#include "sax_parser.hpp"
12#include "xml_namespace.hpp"
13#include "global.hpp"
14
15#include <unordered_set>
16#include <vector>
17#include <memory>
18#include <algorithm>
19
20namespace orcus {
21
23{
24 xmlns_id_t ns; // element namespace
25 std::string_view ns_alias; // element namespace alias
26 std::string_view name; // element name
27 std::ptrdiff_t begin_pos; // position of the opening brace '<'.
28 std::ptrdiff_t end_pos; // position of the char after the closing brace '>'.
29};
30
32{
33 xmlns_id_t ns; // attribute namespace
34 std::string_view ns_alias; // attribute namespace alias
35 std::string_view name; // attribute name
36 std::string_view value; // attribute value
37 bool transient; // whether or not the attribute value is transient.
38};
39
40namespace __sax {
41
43{
44 std::string_view ns;
45 std::string_view name;
46
47 entity_name(std::string_view _ns, std::string_view _name) :
48 ns(_ns), name(_name) {}
49
50 bool operator== (const entity_name& other) const
51 {
52 return other.ns == ns && other.name == name;
53 }
54
55 struct hash
56 {
57 size_t operator() (const entity_name& v) const
58 {
59 std::hash<std::string_view> hasher;
60 return hasher(v.ns) + hasher(v.name);
61 }
62 };
63};
64
65typedef std::unordered_set<std::string_view> ns_keys_type;
66typedef std::unordered_set<entity_name, entity_name::hash> entity_names_type;
67
69{
70 xmlns_id_t ns;
71 std::string_view name;
72 ns_keys_type ns_keys;
73};
74
75typedef std::vector<std::unique_ptr<elem_scope>> elem_scopes_type;
76
78{
79 xmlns_context& m_cxt;
80public:
81 pop_ns_by_key(xmlns_context& cxt) : m_cxt(cxt) {}
82 void operator() (std::string_view key)
83 {
84 m_cxt.pop(key);
85 }
86};
87
88}
89
91{
92public:
93 void doctype(const orcus::sax::doctype_declaration& /*dtd*/) {}
94
95 void start_declaration(std::string_view /*decl*/) {}
96
97 void end_declaration(std::string_view /*decl*/) {}
98
99 void start_element(const orcus::sax_ns_parser_element& /*elem*/) {}
100
101 void end_element(const orcus::sax_ns_parser_element& /*elem*/) {}
102
103 void characters(std::string_view /*val*/, bool /*transient*/) {}
104
105 void attribute(std::string_view /*name*/, std::string_view /*val*/) {}
106
107 void attribute(const orcus::sax_ns_parser_attribute& /*attr*/) {}
108};
109
113template<typename _Handler>
115{
116public:
117 typedef _Handler handler_type;
118
119 sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler);
120 sax_ns_parser(const char* content, const size_t size, bool transient_stream,
121 xmlns_context& ns_cxt, handler_type& handler);
123
124 void parse();
125
126private:
131 class handler_wrapper
132 {
133 __sax::elem_scopes_type m_scopes;
134 __sax::ns_keys_type m_ns_keys;
135 __sax::entity_names_type m_attrs;
136
139
140 xmlns_context& m_ns_cxt;
141 handler_type& m_handler;
142
143 bool m_declaration;
144
145 public:
146 handler_wrapper(xmlns_context& ns_cxt, handler_type& handler) : m_ns_cxt(ns_cxt), m_handler(handler), m_declaration(false) {}
147
148 void doctype(const sax::doctype_declaration& dtd)
149 {
150 m_handler.doctype(dtd);
151 }
152
153 void start_declaration(std::string_view name)
154 {
155 m_declaration = true;
156 m_handler.start_declaration(name);
157 }
158
159 void end_declaration(std::string_view name)
160 {
161 m_declaration = false;
162 m_handler.end_declaration(name);
163 }
164
165 void start_element(const sax::parser_element& elem)
166 {
167 m_scopes.push_back(std::make_unique<__sax::elem_scope>());
168 __sax::elem_scope& scope = *m_scopes.back();
169 scope.ns = m_ns_cxt.get(elem.ns);
170 scope.name = elem.name;
171 scope.ns_keys.swap(m_ns_keys);
172
173 m_elem.ns = scope.ns;
174 m_elem.ns_alias = elem.ns;
175 m_elem.name = scope.name;
176 m_elem.begin_pos = elem.begin_pos;
177 m_elem.end_pos = elem.end_pos;
178 m_handler.start_element(m_elem);
179
180 m_attrs.clear();
181 }
182
183 void end_element(const sax::parser_element& elem)
184 {
185 __sax::elem_scope& scope = *m_scopes.back();
186 if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name)
187 throw sax::malformed_xml_error("mis-matching closing element.", -1);
188
189 m_elem.ns = scope.ns;
190 m_elem.ns_alias = elem.ns;
191 m_elem.name = scope.name;
192 m_elem.begin_pos = elem.begin_pos;
193 m_elem.end_pos = elem.end_pos;
194 m_handler.end_element(m_elem);
195
196 // Pop all namespaces declared in this scope.
197 std::for_each(scope.ns_keys.begin(), scope.ns_keys.end(), __sax::pop_ns_by_key(m_ns_cxt));
198
199 m_scopes.pop_back();
200 }
201
202 void characters(std::string_view val, bool transient)
203 {
204 m_handler.characters(val, transient);
205 }
206
207 void attribute(const sax::parser_attribute& attr)
208 {
209 if (m_declaration)
210 {
211 // XML declaration attribute. Pass it through to the handler without namespace.
212 m_handler.attribute(attr.name, attr.value);
213 return;
214 }
215
216 if (m_attrs.count(__sax::entity_name(attr.ns, attr.name)) > 0)
218 "You can't define two attributes of the same name in the same element.", -1);
219
220 m_attrs.insert(__sax::entity_name(attr.ns, attr.name));
221
222 if (attr.ns.empty() && attr.name == "xmlns")
223 {
224 // Default namespace
225 m_ns_cxt.push(std::string_view{}, attr.value);
226 m_ns_keys.insert(std::string_view{});
227 return;
228 }
229
230 if (attr.ns == "xmlns")
231 {
232 // Namespace alias
233 if (!attr.name.empty())
234 {
235 m_ns_cxt.push(attr.name, attr.value);
236 m_ns_keys.insert(attr.name);
237 }
238 return;
239 }
240
241 m_attr.ns = attr.ns.empty() ? XMLNS_UNKNOWN_ID : m_ns_cxt.get(attr.ns);
242 m_attr.ns_alias = attr.ns;
243 m_attr.name = attr.name;
244 m_attr.value = attr.value;
245 m_attr.transient = attr.transient;
246 m_handler.attribute(m_attr);
247 }
248 };
249
250private:
251 handler_wrapper m_wrapper;
253};
254
255template<typename _Handler>
257 const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler) :
258 m_wrapper(ns_cxt, handler), m_parser(content, size, m_wrapper)
259{
260}
261
262template<typename _Handler>
263sax_ns_parser<_Handler>::sax_ns_parser(
264 const char* content, const size_t size, bool transient_stream, xmlns_context& ns_cxt, handler_type& handler) :
265 m_wrapper(ns_cxt, handler), m_parser(content, size, transient_stream, m_wrapper)
266{
267}
268
269template<typename _Handler>
270sax_ns_parser<_Handler>::~sax_ns_parser()
271{
272}
273
274template<typename _Handler>
275void sax_ns_parser<_Handler>::parse()
276{
277 m_parser.parse();
278}
279
280}
281
282#endif
283/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: sax_ns_parser.hpp:78
Definition: sax_parser_base.hpp:33
Definition: sax_ns_parser.hpp:91
Definition: sax_ns_parser.hpp:115
Definition: sax_parser.hpp:121
Definition: xml_namespace.hpp:82
xmlns_id_t get(std::string_view key) const
Definition: sax_ns_parser.hpp:69
Definition: sax_ns_parser.hpp:56
Definition: sax_ns_parser.hpp:43
Definition: sax_parser_base.hpp:45
Definition: sax_parser_base.hpp:100
Definition: sax_parser_base.hpp:85
Definition: sax_ns_parser.hpp:32
Definition: sax_ns_parser.hpp:23