Orcus
css_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_CSS_PARSER_HPP
9#define INCLUDED_ORCUS_CSS_PARSER_HPP
10
11#define ORCUS_DEBUG_CSS 0
12
13#include "orcus/parser_global.hpp"
14#include "orcus/css_parser_base.hpp"
15#include "orcus/global.hpp"
16
17#include <cassert>
18
19#if ORCUS_DEBUG_CSS
20#include <iostream>
21using std::cout;
22using std::endl;
23#endif
24
25namespace orcus {
26
27template<typename _Handler>
29{
30public:
31 typedef _Handler handler_type;
32
33 css_parser(const char* p, size_t n, handler_type& hdl);
34 void parse();
35
36private:
37 // Handlers - at the time a handler is called the current position is
38 // expected to point to the first unprocessed non-blank character, and
39 // each handler must set the current position to the next unprocessed
40 // non-blank character when it finishes.
41 void rule();
42 void at_rule_name();
43 void simple_selector_name();
44 void property_name();
45 void property();
46 void quoted_value(char c);
47 void value();
48 void function_value(const char* p, size_t len);
49 void function_rgb(bool alpha);
50 void function_hsl(bool alpha);
51 void function_url();
52 void name_sep();
53 void property_sep();
54 void block();
55
56 handler_type& m_handler;
57};
58
59template<typename _Handler>
60css_parser<_Handler>::css_parser(const char* p, size_t n, handler_type& hdl) :
61 css::parser_base(p, n), m_handler(hdl) {}
62
63template<typename _Handler>
64void css_parser<_Handler>::parse()
65{
66 shrink_stream();
67
68#if ORCUS_DEBUG_CSS
69 std::cout << "compressed: '";
70 const char* p = mp_char;
71 for (; p != mp_end; ++p)
72 std::cout << *p;
73 std::cout << "'" << std::endl;
74#endif
75 m_handler.begin_parse();
76 while (has_char())
77 rule();
78 m_handler.end_parse();
79}
80
81template<typename _Handler>
82void css_parser<_Handler>::rule()
83{
84 // <selector name> , ... , <selector name> <block>
85 while (has_char())
86 {
87 if (skip_comment())
88 continue;
89
90 char c = cur_char();
91 if (is_alpha(c))
92 {
93 simple_selector_name();
94 continue;
95 }
96
97 switch (c)
98 {
99 case '>':
100 set_combinator(c, css::combinator_t::direct_child);
101 break;
102 case '+':
103 set_combinator(c, css::combinator_t::next_sibling);
104 break;
105 case '.':
106 case '#':
107 case '@':
108 simple_selector_name();
109 break;
110 case ',':
111 name_sep();
112 break;
113 case '{':
114 reset_before_block();
115 block();
116 break;
117 default:
118 css::parse_error::throw_with("rule: failed to parse '", c, "'");
119 }
120 }
121}
122
123template<typename _Handler>
124void css_parser<_Handler>::at_rule_name()
125{
126 assert(has_char());
127 assert(cur_char() == '@');
128 next();
129 char c = cur_char();
130 if (!is_alpha(c))
131 throw css::parse_error("at_rule_name: first character of an at-rule name must be an alphabet.");
132
133 const char* p;
134 size_t len;
135 identifier(p, len);
136 skip_blanks();
137
138 m_handler.at_rule_name(p, len);
139#if ORCUS_DEBUG_CSS
140 std::string foo(p, len);
141 std::cout << "at-rule name: " << foo.c_str() << std::endl;
142#endif
143}
144
145template<typename _Handler>
146void css_parser<_Handler>::simple_selector_name()
147{
148 assert(has_char());
149 char c = cur_char();
150 if (c == '@')
151 {
152 // This is the name of an at-rule.
153 at_rule_name();
154 return;
155 }
156
157 if (m_simple_selector_count)
158 {
159#if ORCUS_DEBUG_CSS
160 cout << "combinator: " << m_combinator << endl;
161#endif
162 m_handler.combinator(m_combinator);
163 m_combinator = css::combinator_t::descendant;
164 }
165 assert(is_alpha(c) || c == '.' || c == '#');
166
167 const char* p = nullptr;
168 size_t n = 0;
169
170#if ORCUS_DEBUG_CSS
171 cout << "simple_selector_name: (" << m_simple_selector_count << ")";
172#endif
173
174 if (c != '.' && c != '#')
175 {
176 identifier(p, n);
177#if ORCUS_DEBUG_CSS
178 std::string s(p, n);
179 cout << " type=" << s;
180#endif
181 m_handler.simple_selector_type(p, n);
182 }
183
184 bool in_loop = true;
185 while (in_loop && has_char())
186 {
187 switch (cur_char())
188 {
189 case '.':
190 {
191 next();
192 identifier(p, n);
193 m_handler.simple_selector_class(p, n);
194#if ORCUS_DEBUG_CSS
195 std::string s(p, n);
196 std::cout << " class=" << s;
197#endif
198 }
199 break;
200 case '#':
201 {
202 next();
203 identifier(p, n);
204 m_handler.simple_selector_id(p, n);
205#if ORCUS_DEBUG_CSS
206 std::string s(p, n);
207 std::cout << " id=" << s;
208#endif
209 }
210 break;
211 case ':':
212 {
213 // This could be either a pseudo element or pseudo class.
214 next();
215 if (cur_char() == ':')
216 {
217 // pseudo element.
218 next();
219 identifier(p, n);
220 css::pseudo_element_t elem = css::to_pseudo_element(p, n);
221 if (!elem)
222 css::parse_error::throw_with(
223 "selector_name: unknown pseudo element '", p, n, "'");
224
225 m_handler.simple_selector_pseudo_element(elem);
226 }
227 else
228 {
229 // pseudo class (or pseudo element in the older version of CSS).
230 identifier(p, n);
231 css::pseudo_class_t pc = css::to_pseudo_class(p, n);
232 if (!pc)
233 css::parse_error::throw_with(
234 "selector_name: unknown pseudo class '", p, n, "'");
235
236 m_handler.simple_selector_pseudo_class(pc);
237 }
238 }
239 break;
240 default:
241 in_loop = false;
242 }
243 }
244
245 m_handler.end_simple_selector();
246 skip_comments_and_blanks();
247
248 ++m_simple_selector_count;
249
250#if ORCUS_DEBUG_CSS
251 std::cout << std::endl;
252#endif
253}
254
255template<typename _Handler>
256void css_parser<_Handler>::property_name()
257{
258 // <identifier>
259
260 assert(has_char());
261 char c = cur_char();
262 if (!is_alpha(c) && c != '.')
263 css::parse_error::throw_with(
264 "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'");
265
266 const char* p;
267 size_t len;
268 identifier(p, len);
269 skip_comments_and_blanks();
270
271 m_handler.property_name(p, len);
272#if ORCUS_DEBUG_CSS
273 std::string foo(p, len);
274 std::cout << "property name: " << foo.c_str() << std::endl;
275#endif
276}
277
278template<typename _Handler>
279void css_parser<_Handler>::property()
280{
281 // <property name> : <value> , ... , <value>
282
283 m_handler.begin_property();
284 property_name();
285 if (cur_char() != ':')
286 throw css::parse_error("property: ':' expected.");
287 next();
288 skip_comments_and_blanks();
289
290 bool in_loop = true;
291 while (in_loop && has_char())
292 {
293 value();
294 char c = cur_char();
295 switch (c)
296 {
297 case ',':
298 {
299 // separated by commas.
300 next();
301 skip_comments_and_blanks();
302 }
303 break;
304 case ';':
305 case '}':
306 in_loop = false;
307 break;
308 default:
309 ;
310 }
311 }
312
313 skip_comments_and_blanks();
314 m_handler.end_property();
315}
316
317template<typename _Handler>
318void css_parser<_Handler>::quoted_value(char c)
319{
320 // Parse until the the end quote is reached.
321 const char* p = nullptr;
322 size_t len = 0;
323 literal(p, len, c);
324 next();
325 skip_blanks();
326
327 m_handler.value(p, len);
328#if ORCUS_DEBUG_CSS
329 std::string foo(p, len);
330 std::cout << "quoted value: " << foo.c_str() << std::endl;
331#endif
332}
333
334template<typename _Handler>
335void css_parser<_Handler>::value()
336{
337 assert(has_char());
338 char c = cur_char();
339 if (c == '"' || c == '\'')
340 {
341 quoted_value(c);
342 return;
343 }
344
345 if (!is_alpha(c) && !is_numeric(c) && !is_in(c, ORCUS_ASCII("-+.#")))
346 css::parse_error::throw_with("value:: illegal first character of a value '", c, "'");
347
348 const char* p = nullptr;
349 size_t len = 0;
350 identifier(p, len, ORCUS_ASCII(".%"));
351 if (cur_char() == '(')
352 {
353 function_value(p, len);
354 return;
355 }
356
357 m_handler.value(p, len);
358
359 skip_comments_and_blanks();
360
361#if ORCUS_DEBUG_CSS
362 std::string foo(p, len);
363 std::cout << "value: " << foo.c_str() << std::endl;
364#endif
365}
366
367template<typename _Handler>
368void css_parser<_Handler>::function_value(const char* p, size_t len)
369{
370 assert(cur_char() == '(');
371 css::property_function_t func = css::to_property_function(p, len);
372 if (func == css::property_function_t::unknown)
373 css::parse_error::throw_with("function_value: unknown function '", p, len, "'");
374
375 // Move to the first character of the first argument.
376 next();
377 skip_comments_and_blanks();
378
379 switch (func)
380 {
381 case css::property_function_t::rgb:
382 function_rgb(false);
383 break;
384 case css::property_function_t::rgba:
385 function_rgb(true);
386 break;
387 case css::property_function_t::hsl:
388 function_hsl(false);
389 break;
390 case css::property_function_t::hsla:
391 function_hsl(true);
392 break;
393 case css::property_function_t::url:
394 function_url();
395 break;
396 default:
397 css::parse_error::throw_with("function_value: unhandled function '", p, len, "'");
398 }
399
400 char c = cur_char();
401 if (c != ')')
402 css::parse_error::throw_with("function_value: ')' expected but '", c, "' found.");
403
404 next();
405 skip_comments_and_blanks();
406}
407
408template<typename _Handler>
409void css_parser<_Handler>::function_rgb(bool alpha)
410{
411 // rgb(num, num, num) rgba(num, num, num, float)
412
413 uint8_t vals[3];
414 uint8_t* p = vals;
415 const uint8_t* plast = p + 2;
416 char c = 0;
417
418 for (; ; ++p)
419 {
420 *p = parse_uint8();
421
422 skip_comments_and_blanks();
423
424 if (p == plast)
425 break;
426
427 c = cur_char();
428
429 if (c != ',')
430 css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
431
432 next();
433 skip_comments_and_blanks();
434 }
435
436 if (alpha)
437 {
438 c = cur_char();
439 if (c != ',')
440 css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
441
442 next();
443 skip_comments_and_blanks();
444
445 double alpha_val = parse_double_or_throw();
446
447 alpha_val = clip(alpha_val, 0.0, 1.0);
448 m_handler.rgba(vals[0], vals[1], vals[2], alpha_val);
449 }
450 else
451 m_handler.rgb(vals[0], vals[1], vals[2]);
452
453#if ORCUS_DEBUG_CSS
454 std::cout << "rgb";
455 if (alpha)
456 std::cout << 'a';
457 std::cout << '(';
458 p = vals;
459 const uint8_t* pend = plast + 1;
460 for (; p != pend; ++p)
461 std::cout << ' ' << (int)*p;
462 std::cout << " )" << std::endl;
463#endif
464}
465
466template<typename _Handler>
467void css_parser<_Handler>::function_hsl(bool alpha)
468{
469 // hsl(num, percent, percent) hsla(num, percent, percent, float)
470
471 double hue = parse_double_or_throw(); // casted to uint8_t eventually.
472 hue = clip(hue, 0.0, 360.0);
473 skip_comments_and_blanks();
474
475 char c = cur_char();
476 if (c != ',')
477 css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
478
479 next();
480 skip_comments_and_blanks();
481
482 double sat = parse_percent();
483 sat = clip(sat, 0.0, 100.0);
484 skip_comments_and_blanks();
485
486 c = cur_char();
487 if (c != ',')
488 css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
489
490 next();
491 skip_comments_and_blanks();
492
493 double light = parse_percent();
494 light = clip(light, 0.0, 100.0);
495 skip_comments_and_blanks();
496
497 if (!alpha)
498 {
499 m_handler.hsl(hue, sat, light);
500 return;
501 }
502
503 c = cur_char();
504 if (c != ',')
505 css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
506
507 next();
508 skip_comments_and_blanks();
509
510 double alpha_val = parse_double_or_throw();
511 alpha_val = clip(alpha_val, 0.0, 1.0);
512 skip_comments_and_blanks();
513 m_handler.hsla(hue, sat, light, alpha_val);
514}
515
516template<typename _Handler>
517void css_parser<_Handler>::function_url()
518{
519 char c = cur_char();
520
521 if (c == '"' || c == '\'')
522 {
523 // Quoted URL value.
524 const char* p;
525 size_t len;
526 literal(p, len, c);
527 next();
528 skip_comments_and_blanks();
529 m_handler.url(p, len);
530#if ORCUS_DEBUG_CSS
531 std::cout << "url(" << std::string(p, len) << ")" << std::endl;
532#endif
533 return;
534 }
535
536 // Unquoted URL value.
537 const char* p;
538 size_t len;
539 skip_to_or_blank(p, len, ORCUS_ASCII(")"));
540 skip_comments_and_blanks();
541 m_handler.url(p, len);
542#if ORCUS_DEBUG_CSS
543 std::cout << "url(" << std::string(p, len) << ")" << std::endl;
544#endif
545}
546
547template<typename _Handler>
548void css_parser<_Handler>::name_sep()
549{
550 assert(cur_char() == ',');
551#if ORCUS_DEBUG_CSS
552 std::cout << "," << std::endl;
553#endif
554 next();
555 skip_blanks();
556 m_handler.end_selector();
557}
558
559template<typename _Handler>
560void css_parser<_Handler>::property_sep()
561{
562#if ORCUS_DEBUG_CSS
563 std::cout << ";" << std::endl;
564#endif
565 next();
566 skip_comments_and_blanks();
567}
568
569template<typename _Handler>
570void css_parser<_Handler>::block()
571{
572 // '{' <property> ';' ... ';' <property> ';'(optional) '}'
573
574 assert(cur_char() == '{');
575#if ORCUS_DEBUG_CSS
576 std::cout << "{" << std::endl;
577#endif
578 m_handler.end_selector();
579 m_handler.begin_block();
580
581 next();
582 skip_comments_and_blanks();
583
584 // parse properties.
585 while (has_char())
586 {
587 property();
588 if (cur_char() != ';')
589 break;
590 property_sep();
591 if (cur_char() == '}')
592 // ';' after the last property. This is optional but allowed.
593 break;
594 }
595
596 if (cur_char() != '}')
597 throw css::parse_error("block: '}' expected.");
598
599 m_handler.end_block();
600
601 next();
602 skip_comments_and_blanks();
603
604#if ORCUS_DEBUG_CSS
605 std::cout << "}" << std::endl;
606#endif
607}
608
609}
610
611#endif
612
613/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: css_parser_base.hpp:31
Definition: css_parser.hpp:29
Definition: parser_base.hpp:40