Orcus
sax_ns_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_SAX_NS_PARSER_HPP
9#define INCLUDED_ORCUS_SAX_NS_PARSER_HPP
10
11#include "sax_parser.hpp"
12#include "xml_namespace.hpp"
13#include "global.hpp"
14
15#include <unordered_set>
16#include <vector>
17#include <memory>
18#include <algorithm>
19
20namespace orcus {
21
23{
24 xmlns_id_t ns; // element namespace
25 pstring ns_alias; // element namespace alias
26 pstring name; // element name
27 std::ptrdiff_t begin_pos; // position of the opening brace '<'.
28 std::ptrdiff_t end_pos; // position of the char after the closing brace '>'.
29};
30
32{
33 xmlns_id_t ns; // attribute namespace
34 pstring ns_alias; // attribute namespace alias
35 pstring name; // attribute name
36 pstring value; // attribute value
37 bool transient; // whether or not the attribute value is transient.
38};
39
40namespace __sax {
41
43{
44 pstring ns;
45 pstring name;
46
47 entity_name(const pstring& _ns, const pstring& _name) :
48 ns(_ns), name(_name) {}
49
50 bool operator== (const entity_name& other) const
51 {
52 return other.ns == ns && other.name == name;
53 }
54
55 struct hash
56 {
57 size_t operator() (const entity_name& v) const
58 {
59 static pstring::hash hasher;
60 return hasher(v.ns) + hasher(v.name);
61 }
62 };
63};
64
65typedef std::unordered_set<pstring, pstring::hash> ns_keys_type;
66typedef std::unordered_set<entity_name, entity_name::hash> entity_names_type;
67
69{
70 xmlns_id_t ns;
71 pstring name;
72 ns_keys_type ns_keys;
73};
74
75typedef std::vector<std::unique_ptr<elem_scope>> elem_scopes_type;
76
77class pop_ns_by_key : std::unary_function<pstring, void>
78{
79 xmlns_context& m_cxt;
80public:
81 pop_ns_by_key(xmlns_context& cxt) : m_cxt(cxt) {}
82 void operator() (const pstring& key)
83 {
84 m_cxt.pop(key);
85 }
86};
87
88}
89
93template<typename _Handler>
95{
96public:
97 typedef _Handler handler_type;
98
99 sax_ns_parser(const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler);
100 sax_ns_parser(const char* content, const size_t size, bool transient_stream,
101 xmlns_context& ns_cxt, handler_type& handler);
103
104 void parse();
105
106private:
111 class handler_wrapper
112 {
113 __sax::elem_scopes_type m_scopes;
114 __sax::ns_keys_type m_ns_keys;
115 __sax::entity_names_type m_attrs;
116
119
120 xmlns_context& m_ns_cxt;
121 handler_type& m_handler;
122
123 bool m_declaration;
124
125 public:
126 handler_wrapper(xmlns_context& ns_cxt, handler_type& handler) : m_ns_cxt(ns_cxt), m_handler(handler), m_declaration(false) {}
127
128 void doctype(const sax::doctype_declaration& dtd)
129 {
130 m_handler.doctype(dtd);
131 }
132
133 void start_declaration(const pstring& name)
134 {
135 m_declaration = true;
136 m_handler.start_declaration(name);
137 }
138
139 void end_declaration(const pstring& name)
140 {
141 m_declaration = false;
142 m_handler.end_declaration(name);
143 }
144
145 void start_element(const sax::parser_element& elem)
146 {
147 m_scopes.push_back(orcus::make_unique<__sax::elem_scope>());
148 __sax::elem_scope& scope = *m_scopes.back();
149 scope.ns = m_ns_cxt.get(elem.ns);
150 scope.name = elem.name;
151 scope.ns_keys.swap(m_ns_keys);
152
153 m_elem.ns = scope.ns;
154 m_elem.ns_alias = elem.ns;
155 m_elem.name = scope.name;
156 m_elem.begin_pos = elem.begin_pos;
157 m_elem.end_pos = elem.end_pos;
158 m_handler.start_element(m_elem);
159
160 m_attrs.clear();
161 }
162
163 void end_element(const sax::parser_element& elem)
164 {
165 __sax::elem_scope& scope = *m_scopes.back();
166 if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name)
167 throw sax::malformed_xml_error("mis-matching closing element.", -1);
168
169 m_elem.ns = scope.ns;
170 m_elem.ns_alias = elem.ns;
171 m_elem.name = scope.name;
172 m_elem.begin_pos = elem.begin_pos;
173 m_elem.end_pos = elem.end_pos;
174 m_handler.end_element(m_elem);
175
176 // Pop all namespaces declared in this scope.
177 std::for_each(scope.ns_keys.begin(), scope.ns_keys.end(), __sax::pop_ns_by_key(m_ns_cxt));
178
179 m_scopes.pop_back();
180 }
181
182 void characters(const pstring& val, bool transient)
183 {
184 m_handler.characters(val, transient);
185 }
186
187 void attribute(const sax::parser_attribute& attr)
188 {
189 if (m_declaration)
190 {
191 // XML declaration attribute. Pass it through to the handler without namespace.
192 m_handler.attribute(attr.name, attr.value);
193 return;
194 }
195
196 if (m_attrs.count(__sax::entity_name(attr.ns, attr.name)) > 0)
198 "You can't define two attributes of the same name in the same element.", -1);
199
200 m_attrs.insert(__sax::entity_name(attr.ns, attr.name));
201
202 if (attr.ns.empty() && attr.name == "xmlns")
203 {
204 // Default namespace
205 m_ns_cxt.push(pstring(), attr.value);
206 m_ns_keys.insert(pstring());
207 return;
208 }
209
210 if (attr.ns == "xmlns")
211 {
212 // Namespace alias
213 if (!attr.name.empty())
214 {
215 m_ns_cxt.push(attr.name, attr.value);
216 m_ns_keys.insert(attr.name);
217 }
218 return;
219 }
220
221 m_attr.ns = m_ns_cxt.get(attr.ns);
222 m_attr.ns_alias = attr.ns;
223 m_attr.name = attr.name;
224 m_attr.value = attr.value;
225 m_attr.transient = attr.transient;
226 m_handler.attribute(m_attr);
227 }
228 };
229
230private:
231 handler_wrapper m_wrapper;
233};
234
235template<typename _Handler>
237 const char* content, const size_t size, xmlns_context& ns_cxt, handler_type& handler) :
238 m_wrapper(ns_cxt, handler), m_parser(content, size, m_wrapper)
239{
240}
241
242template<typename _Handler>
243sax_ns_parser<_Handler>::sax_ns_parser(
244 const char* content, const size_t size, bool transient_stream, xmlns_context& ns_cxt, handler_type& handler) :
245 m_wrapper(ns_cxt, handler), m_parser(content, size, transient_stream, m_wrapper)
246{
247}
248
249template<typename _Handler>
250sax_ns_parser<_Handler>::~sax_ns_parser()
251{
252}
253
254template<typename _Handler>
255void sax_ns_parser<_Handler>::parse()
256{
257 m_parser.parse();
258}
259
260}
261
262#endif
263/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: sax_ns_parser.hpp:78
Definition: pstring.hpp:28
Definition: sax_parser_base.hpp:34
Definition: sax_ns_parser.hpp:95
Definition: sax_parser.hpp:31
Definition: xml_namespace.hpp:83
xmlns_id_t get(const pstring &key) const
Definition: sax_ns_parser.hpp:69
Definition: sax_ns_parser.hpp:56
Definition: sax_ns_parser.hpp:43
Definition: pstring.hpp:83
Definition: sax_parser_base.hpp:46
Definition: sax_parser_base.hpp:101
Definition: sax_parser_base.hpp:86
Definition: sax_ns_parser.hpp:32
Definition: sax_ns_parser.hpp:23