Orcus
yaml_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_YAML_PARSER_HPP
9#define INCLUDED_ORCUS_YAML_PARSER_HPP
10
11#include "orcus/yaml_parser_base.hpp"
12#include "orcus/parser_global.hpp"
13
14namespace orcus {
15
16template<typename _Handler>
18{
19public:
20 typedef _Handler handler_type;
21
22 yaml_parser(const char* p, size_t n, handler_type& hdl);
23
24 void parse();
25
26private:
27 size_t end_scope();
28 void check_or_begin_document();
29 void check_or_begin_map();
30 void check_or_begin_sequence();
31 void parse_value(const char* p, size_t len);
32 void push_value(const char* p, size_t len);
33 void parse_line(const char* p, size_t len);
34 void parse_map_key(const char* p, size_t len);
35
36 void handler_begin_parse();
37 void handler_end_parse();
38 void handler_begin_document();
39 void handler_end_document();
40 void handler_begin_sequence();
41 void handler_end_sequence();
42 void handler_begin_map();
43 void handler_end_map();
44 void handler_begin_map_key();
45 void handler_end_map_key();
46 void handler_string(const char* p, size_t n);
47 void handler_number(double val);
48 void handler_boolean_true();
49 void handler_boolean_false();
50 void handler_null();
51
52private:
53 handler_type& m_handler;
54};
55
56template<typename _Handler>
58{
59 push_parse_token(yaml::detail::parse_token_t::begin_parse);
60 m_handler.begin_parse();
61}
62
63template<typename _Handler>
64void yaml_parser<_Handler>::handler_end_parse()
65{
66 push_parse_token(yaml::detail::parse_token_t::end_parse);
67 m_handler.end_parse();
68}
69
70template<typename _Handler>
71void yaml_parser<_Handler>::handler_begin_document()
72{
73 push_parse_token(yaml::detail::parse_token_t::begin_document);
74 m_handler.begin_document();
75}
76
77template<typename _Handler>
78void yaml_parser<_Handler>::handler_end_document()
79{
80 push_parse_token(yaml::detail::parse_token_t::end_document);
81 m_handler.end_document();
82}
83
84template<typename _Handler>
85void yaml_parser<_Handler>::handler_begin_sequence()
86{
87 push_parse_token(yaml::detail::parse_token_t::begin_sequence);
88 m_handler.begin_sequence();
89}
90
91template<typename _Handler>
92void yaml_parser<_Handler>::handler_end_sequence()
93{
94 push_parse_token(yaml::detail::parse_token_t::end_sequence);
95 m_handler.end_sequence();
96}
97
98template<typename _Handler>
99void yaml_parser<_Handler>::handler_begin_map()
100{
101 push_parse_token(yaml::detail::parse_token_t::begin_map);
102 m_handler.begin_map();
103}
104
105template<typename _Handler>
106void yaml_parser<_Handler>::handler_end_map()
107{
108 push_parse_token(yaml::detail::parse_token_t::end_map);
109 m_handler.end_map();
110}
111
112template<typename _Handler>
113void yaml_parser<_Handler>::handler_begin_map_key()
114{
115 push_parse_token(yaml::detail::parse_token_t::begin_map_key);
116 m_handler.begin_map_key();
117}
118
119template<typename _Handler>
120void yaml_parser<_Handler>::handler_end_map_key()
121{
122 push_parse_token(yaml::detail::parse_token_t::end_map_key);
123 m_handler.end_map_key();
124}
125
126template<typename _Handler>
127void yaml_parser<_Handler>::handler_string(const char* p, size_t n)
128{
129 push_parse_token(yaml::detail::parse_token_t::string);
130 m_handler.string(p, n);
131}
132
133template<typename _Handler>
134void yaml_parser<_Handler>::handler_number(double val)
135{
136 push_parse_token(yaml::detail::parse_token_t::number);
137 m_handler.number(val);
138}
139
140template<typename _Handler>
141void yaml_parser<_Handler>::handler_boolean_true()
142{
143 push_parse_token(yaml::detail::parse_token_t::boolean_true);
144 m_handler.boolean_true();
145}
146
147template<typename _Handler>
148void yaml_parser<_Handler>::handler_boolean_false()
149{
150 push_parse_token(yaml::detail::parse_token_t::boolean_false);
151 m_handler.boolean_false();
152}
153
154template<typename _Handler>
155void yaml_parser<_Handler>::handler_null()
156{
157 push_parse_token(yaml::detail::parse_token_t::null);
158 m_handler.null();
159}
160
161template<typename _Handler>
162yaml_parser<_Handler>::yaml_parser(const char* p, size_t n, handler_type& hdl) :
163 yaml::parser_base(p, n), m_handler(hdl) {}
164
165template<typename _Handler>
166void yaml_parser<_Handler>::parse()
167{
168 handler_begin_parse();
169
170 while (has_char())
171 {
172 reset_on_new_line();
173
174 size_t indent = parse_indent();
175 if (indent == parse_indent_end_of_stream)
176 break;
177
178 if (indent == parse_indent_blank_line)
179 continue;
180
181 size_t cur_scope = get_scope();
182
183 if (cur_scope <= indent)
184 {
185 if (in_literal_block())
186 {
187 handle_line_in_literal(indent);
188 continue;
189 }
190
191 if (has_line_buffer())
192 {
193 // This line is part of multi-line string. Push the line to the
194 // buffer as-is.
195 handle_line_in_multi_line_string();
196 continue;
197 }
198 }
199
200 if (cur_scope == scope_empty)
201 {
202 if (indent > 0)
203 throw yaml::parse_error(
204 "first node of the document should not be indented.", offset());
205
206 push_scope(indent);
207 }
208 else if (indent > cur_scope)
209 {
210 push_scope(indent);
211 }
212 else if (indent < cur_scope)
213 {
214 // Current indent is less than the current scope level.
215 do
216 {
217 cur_scope = end_scope();
218 if (cur_scope < indent)
219 throw yaml::parse_error("parse: invalid indent level.", offset());
220 }
221 while (indent < cur_scope);
222 }
223
224 // Parse the rest of the line.
225 pstring line = parse_to_end_of_line();
226 line = line.trim();
227
228 assert(!line.empty());
229 parse_line(line.get(), line.size());
230 }
231
232 // End all remaining scopes.
233 size_t cur_scope = get_scope();
234 while (cur_scope != scope_empty)
235 cur_scope = end_scope();
236
237 if (get_doc_hash())
238 handler_end_document();
239
240 handler_end_parse();
241}
242
243template<typename _Handler>
244size_t yaml_parser<_Handler>::end_scope()
245{
246 switch (get_scope_type())
247 {
248 case yaml::detail::scope_t::map:
249 {
250 if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
251 handler_null();
252
253 handler_end_map();
254 break;
255 }
256 case yaml::detail::scope_t::sequence:
257 {
258 if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
259 handler_null();
260
261 handler_end_sequence();
262 break;
263 }
264 case yaml::detail::scope_t::multi_line_string:
265 {
266 pstring merged = merge_line_buffer();
267 handler_string(merged.get(), merged.size());
268 break;
269 }
270 default:
271 {
272 if (has_line_buffer())
273 {
274 assert(get_line_buffer_count() == 1);
275 pstring line = pop_line_front();
276 parse_value(line.get(), line.size());
277 }
278 }
279 }
280 return pop_scope();
281}
282
283template<typename _Handler>
284void yaml_parser<_Handler>::check_or_begin_document()
285{
286 if (!get_doc_hash())
287 {
288 set_doc_hash(mp_char);
289 handler_begin_document();
290 }
291}
292
293template<typename _Handler>
294void yaml_parser<_Handler>::check_or_begin_map()
295{
296 switch (get_scope_type())
297 {
298 case yaml::detail::scope_t::unset:
299 {
300 check_or_begin_document();
301 set_scope_type(yaml::detail::scope_t::map);
302 handler_begin_map();
303 break;
304 }
305 case yaml::detail::scope_t::map:
306 {
307 if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
308 handler_null();
309 break;
310 }
311 default:
312 ;
313 }
314}
315
316template<typename _Handler>
317void yaml_parser<_Handler>::check_or_begin_sequence()
318{
319 switch (get_scope_type())
320 {
321 case yaml::detail::scope_t::unset:
322 {
323 check_or_begin_document();
324 set_scope_type(yaml::detail::scope_t::sequence);
325 handler_begin_sequence();
326 break;
327 }
328 case yaml::detail::scope_t::sequence:
329 {
330 if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
331 handler_null();
332 break;
333 }
334 default:
335 ;
336 }
337
338 push_parse_token(yaml::detail::parse_token_t::begin_sequence_element);
339}
340
341template<typename _Handler>
342void yaml_parser<_Handler>::parse_value(const char* p, size_t len)
343{
344 check_or_begin_document();
345
346 const char* p0 = p;
347 const char* p_end = p + len;
348 double val = parse_numeric(p, len);
349 if (p == p_end)
350 {
351 handler_number(val);
352 return;
353 }
354
355 yaml::detail::keyword_t kw = parse_keyword(p0, len);
356
357 if (kw != yaml::detail::keyword_t::unknown)
358 {
359 switch (kw)
360 {
361 case yaml::detail::keyword_t::null:
362 handler_null();
363 break;
364 case yaml::detail::keyword_t::boolean_true:
365 handler_boolean_true();
366 break;
367 case yaml::detail::keyword_t::boolean_false:
368 handler_boolean_false();
369 break;
370 default:
371 ;
372 }
373
374 return;
375 }
376
377 // Failed to parse it as a number or a keyword. It must be a string.
378 handler_string(p0, len);
379}
380
381template<typename _Handler>
382void yaml_parser<_Handler>::push_value(const char* p, size_t len)
383{
384 check_or_begin_document();
385
386 if (has_line_buffer() && get_scope_type() == yaml::detail::scope_t::unset)
387 set_scope_type(yaml::detail::scope_t::multi_line_string);
388
389 push_line_back(p, len);
390}
391
392template<typename _Handler>
393void yaml_parser<_Handler>::parse_line(const char* p, size_t len)
394{
395 const char* p_end = p + len;
396 const char* p0 = p; // Save the original head position.
397
398 if (*p == '-')
399 {
400 ++p;
401 if (p == p_end)
402 {
403 // List item start.
404 check_or_begin_sequence();
405 return;
406 }
407
408 switch (*p)
409 {
410 case '-':
411 {
412 // start of a document
413 ++p;
414 if (p == p_end)
415 throw yaml::parse_error("parse_line: line ended with '--'.", offset_last_char_of_line());
416
417 if (*p != '-')
418 yaml::parse_error::throw_with(
419 "parse_line: '-' expected but '", *p, "' found.",
420 offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
421
422 ++p; // Skip the '-'.
423 set_doc_hash(p);
424 handler_begin_document();
425 clear_scopes();
426
427 if (p != p_end)
428 {
429 skip_blanks(p, p_end-p);
430
431 // Whatever comes after '---' is equivalent of first node.
432 assert(p != p_end);
433 push_scope(0);
434 parse_line(p, p_end-p);
435 }
436 return;
437 }
438 case ' ':
439 {
440 check_or_begin_sequence();
441
442 // list item start with inline first item content.
443 ++p;
444 if (p == p_end)
445 throw yaml::parse_error(
446 "parse_line: list item expected, but the line ended prematurely.",
447 offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
448
449 skip_blanks(p, p_end-p);
450
451 size_t scope_width = get_scope() + (p-p0);
452 push_scope(scope_width);
453 parse_line(p, p_end-p);
454 return;
455 }
456 default:
457 // It is none of the above.
458 p = p0;
459 }
460
461 }
462
463 if (get_scope_type() == yaml::detail::scope_t::sequence)
464 yaml::parse_error::throw_with(
465 "'-' was expected for a sequence element, but '", *p, "' was found.",
466 offset_last_char_of_line()-len+1);
467
468 // If the line doesn't start with a "- ", it must be a dictionary key.
469 parse_map_key(p, len);
470}
471
472template<typename _Handler>
473void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len)
474{
475 const char* p_end = p + len;
476 const char* p0 = p; // Save the original head position.
477
478 switch (*p)
479 {
480 case '"':
481 {
482 pstring quoted_str = parse_double_quoted_string_value(p, len);
483
484 if (p == p_end)
485 {
486 handler_string(quoted_str.get(), quoted_str.size());
487 return;
488 }
489
490 skip_blanks(p, p_end-p);
491
492 if (*p != ':')
493 throw yaml::parse_error(
494 "parse_map_key: ':' is expected after the quoted string key.",
495 offset() - std::ptrdiff_t(p_end-p+1));
496
497 check_or_begin_map();
498 handler_begin_map_key();
499 handler_string(quoted_str.get(), quoted_str.size());
500 handler_end_map_key();
501
502 ++p; // skip the ':'.
503 if (p == p_end)
504 return;
505
506 // Skip all white spaces.
507 skip_blanks(p, p_end-p);
508 }
509 break;
510 case '\'':
511 {
512 pstring quoted_str = parse_single_quoted_string_value(p, len);
513
514 if (p == p_end)
515 {
516 handler_string(quoted_str.get(), quoted_str.size());
517 return;
518 }
519
520 skip_blanks(p, p_end-p);
521
522 if (*p != ':')
523 throw yaml::parse_error(
524 "parse_map_key: ':' is expected after the quoted string key.",
525 offset() - std::ptrdiff_t(p_end-p+1));
526
527 check_or_begin_map();
528 handler_begin_map_key();
529 handler_string(quoted_str.get(), quoted_str.size());
530 handler_end_map_key();
531
532 ++p; // skip the ':'.
533 if (p == p_end)
534 return;
535
536 skip_blanks(p, p_end-p);
537 }
538 break;
539 default:
540 {
541 key_value kv = parse_key_value(p, p_end-p);
542
543 if (kv.key.empty())
544 {
545 // No map key found.
546 if (*p == '|')
547 {
548 start_literal_block();
549 return;
550 }
551
552 push_value(p, len);
553 return;
554 }
555
556 check_or_begin_map();
557 handler_begin_map_key();
558 parse_value(kv.key.get(), kv.key.size());
559 handler_end_map_key();
560
561 if (kv.value.empty())
562 return;
563
564 p = kv.value.get();
565 }
566 }
567
568 if (*p == '|')
569 {
570 start_literal_block();
571 return;
572 }
573
574 // inline map item.
575 if (*p == '-')
576 throw yaml::parse_error(
577 "parse_map_key: sequence entry is not allowed as an inline map item.",
578 offset() - std::ptrdiff_t(p_end-p+1));
579
580 size_t scope_width = get_scope() + (p-p0);
581 push_scope(scope_width);
582 parse_line(p, p_end-p);
583}
584
585}
586
587#endif
588
589/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: yaml_parser_base.hpp:76
Definition: yaml_parser.hpp:18