Line data Source code
1 : //
2 : // Copyright (c) 2019 Vinnie Falco (vinnie.falco@gmail.com)
3 : // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
4 : //
5 : // Distributed under the Boost Software License, Version 1.0. (See accompanying
6 : // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 : //
8 : // Official repository: https://github.com/boostorg/json
9 : //
10 :
11 : #ifndef BOOST_JSON_BASIC_PARSER_HPP
12 : #define BOOST_JSON_BASIC_PARSER_HPP
13 :
14 : #include <boost/json/detail/config.hpp>
15 : #include <boost/json/detail/except.hpp>
16 : #include <boost/json/error.hpp>
17 : #include <boost/json/kind.hpp>
18 : #include <boost/json/parse_options.hpp>
19 : #include <boost/json/detail/stack.hpp>
20 : #include <boost/json/detail/stream.hpp>
21 : #include <boost/json/detail/utf8.hpp>
22 : #include <boost/json/detail/sbo_buffer.hpp>
23 :
24 : namespace boost {
25 : namespace json {
26 :
27 : /** An incremental SAX parser for serialized JSON.
28 :
29 : This implements a SAX-style parser, invoking a
30 : caller-supplied handler with each parsing event.
31 : To use, first declare a variable of type
32 : `basic_parser<T>` where `T` meets the handler
33 : requirements specified below. Then call
34 : @ref write_some one or more times with the input,
35 : setting `more = false` on the final buffer.
36 : The parsing events are realized through member
37 : function calls on the handler, which exists
38 : as a data member of the parser.
39 : \n
40 : The parser may dynamically allocate intermediate
41 : storage as needed to accommodate the nesting level
42 : of the input JSON. On subsequent invocations, the
43 : parser can cheaply re-use this memory, improving
44 : performance. This storage is freed when the
45 : parser is destroyed
46 :
47 : @par Usage
48 :
49 : To get the declaration and function definitions
50 : for this class it is necessary to include this
51 : file instead:
52 : @code
53 : #include <boost/json/basic_parser_impl.hpp>
54 : @endcode
55 :
56 : Users who wish to parse JSON into the DOM container
57 : @ref value will not use this class directly; instead
58 : they will create an instance of @ref parser or
59 : @ref stream_parser and use that instead. Alternatively,
60 : they may call the function @ref parse. This class is
61 : designed for users who wish to perform custom actions
62 : instead of building a @ref value. For example, to
63 : produce a DOM from an external library.
64 : \n
65 : @note
66 :
67 : By default, only conforming JSON using UTF-8
68 : encoding is accepted. However, select non-compliant
69 : syntax can be allowed by construction using a
70 : @ref parse_options set to desired values.
71 :
72 : @par Handler
73 :
74 : The handler provided must be implemented as an
75 : object of class type which defines each of the
76 : required event member functions below. The event
77 : functions return a `bool` where `true` indicates
78 : success, and `false` indicates failure. If the
79 : member function returns `false`, it must set
80 : the error code to a suitable value. This error
81 : code will be returned by the write function to
82 : the caller.
83 : \n
84 : Handlers are required to declare the maximum
85 : limits on various elements. If these limits
86 : are exceeded during parsing, then parsing
87 : fails with an error.
88 : \n
89 : The following declaration meets the parser's
90 : handler requirements:
91 :
92 : @code
93 : struct handler
94 : {
95 : /// The maximum number of elements allowed in an array
96 : static constexpr std::size_t max_array_size = -1;
97 :
98 : /// The maximum number of elements allowed in an object
99 : static constexpr std::size_t max_object_size = -1;
100 :
101 : /// The maximum number of characters allowed in a string
102 : static constexpr std::size_t max_string_size = -1;
103 :
104 : /// The maximum number of characters allowed in a key
105 : static constexpr std::size_t max_key_size = -1;
106 :
107 : /// Called once when the JSON parsing begins.
108 : ///
109 : /// @return `true` on success.
110 : /// @param ec Set to the error, if any occurred.
111 : ///
112 : bool on_document_begin( error_code& ec );
113 :
114 : /// Called when the JSON parsing is done.
115 : ///
116 : /// @return `true` on success.
117 : /// @param ec Set to the error, if any occurred.
118 : ///
119 : bool on_document_end( error_code& ec );
120 :
121 : /// Called when the beginning of an array is encountered.
122 : ///
123 : /// @return `true` on success.
124 : /// @param ec Set to the error, if any occurred.
125 : ///
126 : bool on_array_begin( error_code& ec );
127 :
128 : /// Called when the end of the current array is encountered.
129 : ///
130 : /// @return `true` on success.
131 : /// @param n The number of elements in the array.
132 : /// @param ec Set to the error, if any occurred.
133 : ///
134 : bool on_array_end( std::size_t n, error_code& ec );
135 :
136 : /// Called when the beginning of an object is encountered.
137 : ///
138 : /// @return `true` on success.
139 : /// @param ec Set to the error, if any occurred.
140 : ///
141 : bool on_object_begin( error_code& ec );
142 :
143 : /// Called when the end of the current object is encountered.
144 : ///
145 : /// @return `true` on success.
146 : /// @param n The number of elements in the object.
147 : /// @param ec Set to the error, if any occurred.
148 : ///
149 : bool on_object_end( std::size_t n, error_code& ec );
150 :
151 : /// Called with characters corresponding to part of the current string.
152 : ///
153 : /// @return `true` on success.
154 : /// @param s The partial characters
155 : /// @param n The total size of the string thus far
156 : /// @param ec Set to the error, if any occurred.
157 : ///
158 : bool on_string_part( string_view s, std::size_t n, error_code& ec );
159 :
160 : /// Called with the last characters corresponding to the current string.
161 : ///
162 : /// @return `true` on success.
163 : /// @param s The remaining characters
164 : /// @param n The total size of the string
165 : /// @param ec Set to the error, if any occurred.
166 : ///
167 : bool on_string( string_view s, std::size_t n, error_code& ec );
168 :
169 : /// Called with characters corresponding to part of the current key.
170 : ///
171 : /// @return `true` on success.
172 : /// @param s The partial characters
173 : /// @param n The total size of the key thus far
174 : /// @param ec Set to the error, if any occurred.
175 : ///
176 : bool on_key_part( string_view s, std::size_t n, error_code& ec );
177 :
178 : /// Called with the last characters corresponding to the current key.
179 : ///
180 : /// @return `true` on success.
181 : /// @param s The remaining characters
182 : /// @param n The total size of the key
183 : /// @param ec Set to the error, if any occurred.
184 : ///
185 : bool on_key( string_view s, std::size_t n, error_code& ec );
186 :
187 : /// Called with the characters corresponding to part of the current number.
188 : ///
189 : /// @return `true` on success.
190 : /// @param s The partial characters
191 : /// @param ec Set to the error, if any occurred.
192 : ///
193 : bool on_number_part( string_view s, error_code& ec );
194 :
195 : /// Called when a signed integer is parsed.
196 : ///
197 : /// @return `true` on success.
198 : /// @param i The value
199 : /// @param s The remaining characters
200 : /// @param ec Set to the error, if any occurred.
201 : ///
202 : bool on_int64( int64_t i, string_view s, error_code& ec );
203 :
204 : /// Called when an unsigend integer is parsed.
205 : ///
206 : /// @return `true` on success.
207 : /// @param u The value
208 : /// @param s The remaining characters
209 : /// @param ec Set to the error, if any occurred.
210 : ///
211 : bool on_uint64( uint64_t u, string_view s, error_code& ec );
212 :
213 : /// Called when a double is parsed.
214 : ///
215 : /// @return `true` on success.
216 : /// @param d The value
217 : /// @param s The remaining characters
218 : /// @param ec Set to the error, if any occurred.
219 : ///
220 : bool on_double( double d, string_view s, error_code& ec );
221 :
222 : /// Called when a boolean is parsed.
223 : ///
224 : /// @return `true` on success.
225 : /// @param b The value
226 : /// @param s The remaining characters
227 : /// @param ec Set to the error, if any occurred.
228 : ///
229 : bool on_bool( bool b, error_code& ec );
230 :
231 : /// Called when a null is parsed.
232 : ///
233 : /// @return `true` on success.
234 : /// @param ec Set to the error, if any occurred.
235 : ///
236 : bool on_null( error_code& ec );
237 :
238 : /// Called with characters corresponding to part of the current comment.
239 : ///
240 : /// @return `true` on success.
241 : /// @param s The partial characters.
242 : /// @param ec Set to the error, if any occurred.
243 : ///
244 : bool on_comment_part( string_view s, error_code& ec );
245 :
246 : /// Called with the last characters corresponding to the current comment.
247 : ///
248 : /// @return `true` on success.
249 : /// @param s The remaining characters
250 : /// @param ec Set to the error, if any occurred.
251 : ///
252 : bool on_comment( string_view s, error_code& ec );
253 : };
254 : @endcode
255 :
256 : @see
257 : @ref parse,
258 : @ref stream_parser,
259 : [Validating parser example](../../doc/html/json/examples.html#json.examples.validate).
260 :
261 : @headerfile <boost/json/basic_parser.hpp>
262 : */
263 : template<class Handler>
264 : class basic_parser
265 : {
266 : enum class state : char
267 : {
268 : doc1, doc3,
269 : com1, com2, com3, com4,
270 : lit1,
271 : str1, str2, str3, str4,
272 : str5, str6, str7, str8,
273 : sur1, sur2, sur3,
274 : sur4, sur5, sur6,
275 : obj1, obj2, obj3, obj4,
276 : obj5, obj6, obj7, obj8,
277 : obj9, obj10, obj11,
278 : arr1, arr2, arr3,
279 : arr4, arr5, arr6,
280 : num1, num2, num3, num4,
281 : num5, num6, num7, num8,
282 : exp1, exp2, exp3,
283 : val1, val2, val3
284 : };
285 :
286 : struct number
287 : {
288 : uint64_t mant;
289 : int bias;
290 : int exp;
291 : bool frac;
292 : bool neg;
293 : };
294 :
295 : template< bool StackEmpty_, char First_ >
296 : struct parse_number_helper;
297 :
298 : // optimization: must come first
299 : Handler h_;
300 :
301 : number num_;
302 : system::error_code ec_;
303 : detail::stack st_;
304 : detail::utf8_sequence seq_;
305 : unsigned u1_;
306 : unsigned u2_;
307 : bool more_; // false for final buffer
308 : bool done_ = false; // true on complete parse
309 : bool clean_ = true; // write_some exited cleanly
310 : const char* end_;
311 : detail::sbo_buffer<16 + 16 + 1 + 1> num_buf_;
312 : parse_options opt_;
313 : // how many levels deeper the parser can go
314 : std::size_t depth_ = opt_.max_depth;
315 : unsigned char cur_lit_ = 0;
316 : unsigned char lit_offset_ = 0;
317 :
318 : inline void reserve();
319 : inline const char* sentinel();
320 : inline bool incomplete(
321 : const detail::const_stream_wrapper& cs);
322 :
323 : #ifdef __INTEL_COMPILER
324 : #pragma warning push
325 : #pragma warning disable 2196
326 : #endif
327 :
328 : BOOST_NOINLINE
329 : inline
330 : const char*
331 : suspend_or_fail(state st);
332 :
333 : BOOST_NOINLINE
334 : inline
335 : const char*
336 : suspend_or_fail(
337 : state st,
338 : std::size_t n);
339 :
340 : BOOST_NOINLINE
341 : inline
342 : const char*
343 : fail(const char* p) noexcept;
344 :
345 : BOOST_NOINLINE
346 : inline
347 : const char*
348 : fail(
349 : const char* p,
350 : error ev,
351 : source_location const* loc) noexcept;
352 :
353 : BOOST_NOINLINE
354 : inline
355 : const char*
356 : maybe_suspend(
357 : const char* p,
358 : state st);
359 :
360 : BOOST_NOINLINE
361 : inline
362 : const char*
363 : maybe_suspend(
364 : const char* p,
365 : state st,
366 : std::size_t n);
367 :
368 : BOOST_NOINLINE
369 : inline
370 : const char*
371 : maybe_suspend(
372 : const char* p,
373 : state st,
374 : const number& num);
375 :
376 : BOOST_NOINLINE
377 : inline
378 : const char*
379 : suspend(
380 : const char* p,
381 : state st);
382 :
383 : BOOST_NOINLINE
384 : inline
385 : const char*
386 : suspend(
387 : const char* p,
388 : state st,
389 : const number& num);
390 :
391 : #ifdef __INTEL_COMPILER
392 : #pragma warning pop
393 : #endif
394 :
395 : template<bool StackEmpty_/*, bool Terminal_*/>
396 : const char* parse_comment(const char* p,
397 : std::integral_constant<bool, StackEmpty_> stack_empty,
398 : /*std::integral_constant<bool, Terminal_>*/ bool terminal);
399 :
400 : template<bool StackEmpty_>
401 : const char* parse_document(const char* p,
402 : std::integral_constant<bool, StackEmpty_> stack_empty);
403 :
404 : template<bool StackEmpty_, bool AllowComments_/*,
405 : bool AllowTrailing_, bool AllowBadUTF8_*/>
406 : const char* parse_value(const char* p,
407 : std::integral_constant<bool, StackEmpty_> stack_empty,
408 : std::integral_constant<bool, AllowComments_> allow_comments,
409 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
410 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
411 : bool allow_bad_utf16);
412 :
413 : template<bool AllowComments_/*,
414 : bool AllowTrailing_, bool AllowBadUTF8_*/>
415 : const char* resume_value(const char* p,
416 : std::integral_constant<bool, AllowComments_> allow_comments,
417 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
418 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
419 : bool allow_bad_utf16);
420 :
421 : template<bool StackEmpty_, bool AllowComments_/*,
422 : bool AllowTrailing_, bool AllowBadUTF8_*/>
423 : const char* parse_object(const char* p,
424 : std::integral_constant<bool, StackEmpty_> stack_empty,
425 : std::integral_constant<bool, AllowComments_> allow_comments,
426 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
427 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
428 : bool allow_bad_utf16);
429 :
430 : template<bool StackEmpty_, bool AllowComments_/*,
431 : bool AllowTrailing_, bool AllowBadUTF8_*/>
432 : const char* parse_array(const char* p,
433 : std::integral_constant<bool, StackEmpty_> stack_empty,
434 : std::integral_constant<bool, AllowComments_> allow_comments,
435 : /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
436 : /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8,
437 : bool allow_bad_utf16);
438 :
439 : template<int Literal>
440 : const char* parse_literal(const char* p,
441 : std::integral_constant<int, Literal> literal);
442 :
443 : template<bool StackEmpty_, bool IsKey_>
444 : const char* parse_string(const char* p,
445 : std::integral_constant<bool, StackEmpty_> stack_empty,
446 : std::integral_constant<bool, IsKey_> is_key,
447 : bool allow_bad_utf8,
448 : bool allow_bad_utf16);
449 :
450 : template<bool StackEmpty_>
451 : const char* parse_escaped(
452 : const char* p,
453 : std::size_t& total,
454 : std::integral_constant<bool, StackEmpty_> stack_empty,
455 : bool is_key,
456 : bool allow_bad_utf16);
457 :
458 : template<bool StackEmpty_, char First_, number_precision Numbers_>
459 : const char* parse_number(const char* p,
460 : std::integral_constant<bool, StackEmpty_> stack_empty,
461 : std::integral_constant<char, First_> first,
462 : std::integral_constant<number_precision, Numbers_> numbers);
463 :
464 : // intentionally private
465 : std::size_t
466 173058 : depth() const noexcept
467 : {
468 173058 : return opt_.max_depth - depth_;
469 : }
470 :
471 : public:
472 : /// Copy constructor (deleted)
473 : basic_parser(
474 : basic_parser const&) = delete;
475 :
476 : /// Copy assignment (deleted)
477 : basic_parser& operator=(
478 : basic_parser const&) = delete;
479 :
480 : /** Destructor.
481 :
482 : All dynamically allocated internal memory is freed.
483 :
484 : @par Effects
485 : @code
486 : this->handler().~Handler()
487 : @endcode
488 :
489 : @par Complexity
490 : Same as `~Handler()`.
491 :
492 : @par Exception Safety
493 : Same as `~Handler()`.
494 : */
495 2164579 : ~basic_parser() = default;
496 :
497 : /** Constructor.
498 :
499 : This function constructs the parser with
500 : the specified options, with any additional
501 : arguments forwarded to the handler's constructor.
502 :
503 : @par Complexity
504 : Same as `Handler( std::forward< Args >( args )... )`.
505 :
506 : @par Exception Safety
507 : Same as `Handler( std::forward< Args >( args )... )`.
508 :
509 : @param opt Configuration settings for the parser.
510 : If this structure is default constructed, the
511 : parser will accept only standard JSON.
512 :
513 : @param args Optional additional arguments
514 : forwarded to the handler's constructor.
515 : */
516 : template<class... Args>
517 : explicit
518 : basic_parser(
519 : parse_options const& opt,
520 : Args&&... args);
521 :
522 : /** Return a reference to the handler.
523 :
524 : This function provides access to the constructed
525 : instance of the handler owned by the parser.
526 :
527 : @par Complexity
528 : Constant.
529 :
530 : @par Exception Safety
531 : No-throw guarantee.
532 : */
533 : Handler&
534 6310634 : handler() noexcept
535 : {
536 6310634 : return h_;
537 : }
538 :
539 : /** Return a reference to the handler.
540 :
541 : This function provides access to the constructed
542 : instance of the handler owned by the parser.
543 :
544 : @par Complexity
545 : Constant.
546 :
547 : @par Exception Safety
548 : No-throw guarantee.
549 : */
550 : Handler const&
551 24 : handler() const noexcept
552 : {
553 24 : return h_;
554 : }
555 :
556 : /** Return the last error.
557 :
558 : This returns the last error code which
559 : was generated in the most recent call
560 : to @ref write_some.
561 :
562 : @par Complexity
563 : Constant.
564 :
565 : @par Exception Safety
566 : No-throw guarantee.
567 : */
568 : system::error_code
569 8 : last_error() const noexcept
570 : {
571 8 : return ec_;
572 : }
573 :
574 : /** Return true if a complete JSON has been parsed.
575 :
576 : This function returns `true` when all of these
577 : conditions are met:
578 :
579 : @li A complete serialized JSON has been
580 : presented to the parser, and
581 :
582 : @li No error or exception has occurred since the
583 : parser was constructed, or since the last call
584 : to @ref reset,
585 :
586 : @par Complexity
587 : Constant.
588 :
589 : @par Exception Safety
590 : No-throw guarantee.
591 : */
592 : bool
593 4078231 : done() const noexcept
594 : {
595 4078231 : return done_;
596 : }
597 :
598 : /** Reset the state, to parse a new document.
599 :
600 : This function discards the current parsing
601 : state, to prepare for parsing a new document.
602 : Dynamically allocated temporary memory used
603 : by the implementation is not deallocated.
604 :
605 : @par Complexity
606 : Constant.
607 :
608 : @par Exception Safety
609 : No-throw guarantee.
610 : */
611 : void
612 : reset() noexcept;
613 :
614 : /** Indicate a parsing failure.
615 :
616 : This changes the state of the parser to indicate
617 : that the parse has failed. A parser implementation
618 : can use this to fail the parser if needed due to
619 : external inputs.
620 :
621 : @note
622 :
623 : If `!ec`, the stored error code is unspecified.
624 :
625 : @par Complexity
626 : Constant.
627 :
628 : @par Exception Safety
629 : No-throw guarantee.
630 :
631 : @param ec The error code to set. If the code does
632 : not indicate failure, an implementation-defined
633 : error code that indicates failure will be stored
634 : instead.
635 : */
636 : void
637 : fail(system::error_code ec) noexcept;
638 :
639 : /** Parse some of an input string as JSON, incrementally.
640 :
641 : This function parses the JSON in the specified
642 : buffer, calling the handler to emit each SAX
643 : parsing event. The parse proceeds from the
644 : current state, which is at the beginning of a
645 : new JSON or in the middle of the current JSON
646 : if any characters were already parsed.
647 : \n
648 : The characters in the buffer are processed
649 : starting from the beginning, until one of the
650 : following conditions is met:
651 :
652 : @li All of the characters in the buffer
653 : have been parsed, or
654 :
655 : @li Some of the characters in the buffer
656 : have been parsed and the JSON is complete, or
657 :
658 : @li A parsing error occurs.
659 :
660 : The supplied buffer does not need to contain the
661 : entire JSON. Subsequent calls can provide more
662 : serialized data, allowing JSON to be processed
663 : incrementally. The end of the serialized JSON
664 : can be indicated by passing `more = false`.
665 :
666 : @par Complexity
667 : Linear in `size`.
668 :
669 : @par Exception Safety
670 : Basic guarantee.
671 : Calls to the handler may throw.
672 : Upon error or exception, subsequent calls will
673 : fail until @ref reset is called to parse a new JSON.
674 :
675 : @return The number of characters successfully
676 : parsed, which may be smaller than `size`.
677 :
678 : @param more `true` if there are possibly more
679 : buffers in the current JSON, otherwise `false`.
680 :
681 : @param data A pointer to a buffer of `size`
682 : characters to parse.
683 :
684 : @param size The number of characters pointed to
685 : by `data`.
686 :
687 : @param ec Set to the error, if any occurred.
688 : */
689 : /** @{ */
690 : std::size_t
691 : write_some(
692 : bool more,
693 : char const* data,
694 : std::size_t size,
695 : system::error_code& ec);
696 :
697 : std::size_t
698 : write_some(
699 : bool more,
700 : char const* data,
701 : std::size_t size,
702 : std::error_code& ec);
703 : /** @} */
704 : };
705 :
706 : } // namespace json
707 : } // namespace boost
708 :
709 : #endif
|