1196 lines
35 KiB
C
1196 lines
35 KiB
C
/* JSON_parser.c */
|
|
|
|
/* 2007-08-24 */
|
|
|
|
/*
|
|
Copyright (c) 2005 JSON.org
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all
|
|
copies or substantial portions of the Software.
|
|
|
|
The Software shall be used for Good, not Evil.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
SOFTWARE.
|
|
*/
|
|
|
|
/*
|
|
Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de),
|
|
2007-2008.
|
|
|
|
For the added features the license above applies also.
|
|
|
|
Changelog:
|
|
2008/07/19
|
|
- Removed some duplicate code & debugging variable
|
|
(Charles.Kerr@noaa.gov)
|
|
|
|
2008/05/28
|
|
- Made JSON_value structure ansi C compliant. This bug was report by
|
|
trisk@acm.jhu.edu
|
|
|
|
2008/05/20
|
|
- Fixed bug reported by Charles.Kerr@noaa.gov where the switching
|
|
from static to dynamic parse buffer did not copy the static parse
|
|
buffer's content.
|
|
*/
|
|
|
|
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <float.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "JSON_parser.h"
|
|
#include "ConvertUTF.h"
|
|
|
|
#if _MSC_VER >= 1400 /* Visual Studio 2005 and up */
|
|
#pragma warning(disable:4996) /* unsecure sscanf */
|
|
#endif
|
|
|
|
|
|
#define true 1
|
|
#define false 0
|
|
#define __ -1 /* the universal error code */
|
|
|
|
/* values chosen so that the object size is approx equal to one page (4K) */
|
|
#ifndef JSON_PARSER_STACK_SIZE
|
|
#define JSON_PARSER_STACK_SIZE 128
|
|
#endif
|
|
|
|
#ifndef JSON_PARSER_PARSE_BUFFER_SIZE
|
|
#define JSON_PARSER_PARSE_BUFFER_SIZE 3500
|
|
#endif
|
|
|
|
|
|
typedef struct JSON_parser_struct
|
|
{
|
|
JSON_parser_callback callback;
|
|
void* ctx;
|
|
signed char state, before_comment_state, type, escaped,
|
|
comment, allow_comments, handle_floats_manually;
|
|
UTF16 utf16_decode_buffer[2];
|
|
long depth;
|
|
long top;
|
|
signed char* stack;
|
|
long stack_capacity;
|
|
signed char static_stack[JSON_PARSER_STACK_SIZE];
|
|
char* parse_buffer;
|
|
size_t parse_buffer_capacity;
|
|
size_t parse_buffer_count;
|
|
size_t comment_begin_offset;
|
|
char static_parse_buffer[
|
|
JSON_PARSER_PARSE_BUFFER_SIZE];
|
|
} * JSON_parser;
|
|
|
|
#define COUNTOF( x ) ( sizeof( x ) / sizeof( x[0] ) )
|
|
|
|
/*
|
|
Characters are mapped into these 31 character classes. This allows for
|
|
a significant reduction in the size of the state transition table.
|
|
*/
|
|
|
|
|
|
enum classes
|
|
{
|
|
C_SPACE, /* space */
|
|
C_WHITE, /* other whitespace */
|
|
C_LCURB, /* { */
|
|
C_RCURB, /* } */
|
|
C_LSQRB, /* [ */
|
|
C_RSQRB, /* ] */
|
|
C_COLON, /* : */
|
|
C_COMMA, /* , */
|
|
C_QUOTE, /* " */
|
|
C_BACKS, /* \ */
|
|
C_SLASH, /* / */
|
|
C_PLUS, /* + */
|
|
C_MINUS, /* - */
|
|
C_POINT, /* . */
|
|
C_ZERO, /* 0 */
|
|
C_DIGIT, /* 123456789 */
|
|
C_LOW_A, /* a */
|
|
C_LOW_B, /* b */
|
|
C_LOW_C, /* c */
|
|
C_LOW_D, /* d */
|
|
C_LOW_E, /* e */
|
|
C_LOW_F, /* f */
|
|
C_LOW_L, /* l */
|
|
C_LOW_N, /* n */
|
|
C_LOW_R, /* r */
|
|
C_LOW_S, /* s */
|
|
C_LOW_T, /* t */
|
|
C_LOW_U, /* u */
|
|
C_ABCDF, /* ABCDF */
|
|
C_E, /* E */
|
|
C_ETC, /* everything else */
|
|
C_STAR, /* * */
|
|
NR_CLASSES
|
|
};
|
|
|
|
static int ascii_class[128] = {
|
|
/*
|
|
This array maps the 128 ASCII characters into character classes.
|
|
The remaining Unicode characters should be mapped to C_ETC.
|
|
Non-whitespace control characters are errors.
|
|
*/
|
|
__, __, __, __, __, __,
|
|
__, __,
|
|
__, C_WHITE, C_WHITE, __, __, C_WHITE,
|
|
__, __,
|
|
__, __, __, __, __, __,
|
|
__, __,
|
|
__, __, __, __, __, __,
|
|
__, __,
|
|
|
|
C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC,
|
|
C_ETC, C_ETC,
|
|
C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS,
|
|
C_POINT, C_SLASH,
|
|
C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
|
|
C_DIGIT, C_DIGIT,
|
|
C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC,
|
|
C_ETC, C_ETC,
|
|
|
|
C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E,
|
|
C_ABCDF, C_ETC,
|
|
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
|
|
C_ETC, C_ETC,
|
|
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
|
|
C_ETC, C_ETC,
|
|
C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB,
|
|
C_ETC, C_ETC,
|
|
|
|
C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E,
|
|
C_LOW_F, C_ETC,
|
|
C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC,
|
|
C_LOW_N, C_ETC,
|
|
C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U,
|
|
C_ETC, C_ETC,
|
|
C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB,
|
|
C_ETC, C_ETC
|
|
};
|
|
|
|
|
|
/*
|
|
The state codes.
|
|
*/
|
|
enum states
|
|
{
|
|
GO, /* start */
|
|
OK, /* ok */
|
|
OB, /* object */
|
|
KE, /* key */
|
|
CO, /* colon */
|
|
VA, /* value */
|
|
AR, /* array */
|
|
ST, /* string */
|
|
ES, /* escape */
|
|
U1, /* u1 */
|
|
U2, /* u2 */
|
|
U3, /* u3 */
|
|
U4, /* u4 */
|
|
MI, /* minus */
|
|
ZE, /* zero */
|
|
IN, /* integer */
|
|
FR, /* fraction */
|
|
E1, /* e */
|
|
E2, /* ex */
|
|
E3, /* exp */
|
|
T1, /* tr */
|
|
T2, /* tru */
|
|
T3, /* true */
|
|
F1, /* fa */
|
|
F2, /* fal */
|
|
F3, /* fals */
|
|
F4, /* false */
|
|
N1, /* nu */
|
|
N2, /* nul */
|
|
N3, /* null */
|
|
C1, /* / */
|
|
C2, /* / * */
|
|
C3, /* * */
|
|
FX, /* *.* *eE* */
|
|
D1, /* second UTF-16 character decoding started by \ */
|
|
D2, /* second UTF-16 character proceeded by u */
|
|
NR_STATES
|
|
};
|
|
|
|
enum actions
|
|
{
|
|
CB = -10, /* comment begin */
|
|
CE = -11, /* comment end */
|
|
FA = -12, /* false */
|
|
TR = -13, /* false */
|
|
NU = -14, /* null */
|
|
DE = -15, /* double detected by exponent e E */
|
|
DF = -16, /* double detected by fraction . */
|
|
SB = -17, /* string begin */
|
|
MX = -18, /* integer detected by minus */
|
|
ZX = -19, /* integer detected by zero */
|
|
IX = -20, /* integer detected by 1-9 */
|
|
EX = -21, /* next char is escaped */
|
|
UC = -22, /* Unicode character read */
|
|
};
|
|
|
|
|
|
static int state_transition_table[NR_STATES][NR_CLASSES] = {
|
|
/*
|
|
The state transition table takes the current state and the current symbol,
|
|
and returns either a new state or an action. An action is represented as a
|
|
negative number. A JSON text is accepted if at the end of the text the
|
|
state is OK and if the mode is MODE_DONE.
|
|
|
|
white 1-9
|
|
ABCDF etc
|
|
space | { } [ ] : , " \ / + - . 0 | a b c d e f
|
|
l n r s t u | E | * */
|
|
/*start GO*/ {GO, GO, -6, __, -5, __, __, __, __, __, CB, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*ok OK*/ {OK, OK, __, -8, __, -7, __, -3, __, __, CB, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*object OB*/ {OB, OB, __, -9, __, __, __, __, SB, __, CB, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*key KE*/ {KE, KE, __, __, __, __, __, __, SB, __, CB, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*colon CO*/ {CO, CO, __, __, __, __, -2, __, __, __, CB, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*value VA*/ {VA, VA, -6, __, -5, __, __, __, SB, __, CB, __, MX, __, ZX,
|
|
IX, __, __, __, __, __, FA, __, NU, __, __, TR, __, __, __,
|
|
__, __},
|
|
/*array AR*/ {AR, AR, -6, __, -5, -7, __, __, SB, __, CB, __, MX, __, ZX,
|
|
IX, __, __, __, __, __, FA, __, NU, __, __, TR, __, __, __,
|
|
__, __},
|
|
/*string ST*/ {ST, __, ST, ST, ST, ST, ST, ST, -4, EX, ST, ST, ST, ST, ST,
|
|
ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST,
|
|
ST, ST},
|
|
/*escape ES*/ {__, __, __, __, __, __, __, __, ST, ST, ST, __, __, __, __,
|
|
__, __, ST, __, __, __, ST, __, ST, ST, __, ST, U1, __, __,
|
|
__, __},
|
|
/*u1 U1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U2,
|
|
U2, U2, U2, U2, U2, U2, U2, __, __, __, __, __, __, U2, U2,
|
|
__, __},
|
|
/*u2 U2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U3,
|
|
U3, U3, U3, U3, U3, U3, U3, __, __, __, __, __, __, U3, U3,
|
|
__, __},
|
|
/*u3 U3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U4,
|
|
U4, U4, U4, U4, U4, U4, U4, __, __, __, __, __, __, U4, U4,
|
|
__, __},
|
|
/*u4 U4*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, UC,
|
|
UC, UC, UC, UC, UC, UC, UC, __, __, __, __, __, __, UC, UC,
|
|
__, __},
|
|
/*minus MI*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, ZE,
|
|
IN, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*zero ZE*/ {OK, OK, __, -8, __, -7, __, -3, __, __, CB, __, __, DF, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*int IN*/ {OK, OK, __, -8, __, -7, __, -3, __, __, CB, __, __, DF, IN,
|
|
IN, __, __, __, __, DE, __, __, __, __, __, __, __, __, DE,
|
|
__, __},
|
|
/*frac FR*/ {OK, OK, __, -8, __, -7, __, -3, __, __, CB, __, __, __, FR,
|
|
FR, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1,
|
|
__, __},
|
|
/*e E1*/ {__, __, __, __, __, __, __, __, __, __, __, E2, E2, __, E3,
|
|
E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*ex E2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, E3,
|
|
E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*exp E3*/ {OK, OK, __, -8, __, -7, __, -3, __, __, __, __, __, __, E3,
|
|
E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*tr T1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, T2, __, __, __, __, __,
|
|
__, __},
|
|
/*tru T2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, T3, __, __,
|
|
__, __},
|
|
/*true T3*/ {__, __, __, __, __, __, __, __, __, __, CB, __, __, __, __,
|
|
__, __, __, __, __, OK, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*fa F1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, F2, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*fal F2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __, __, __, __, __, __, F3, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*fals F3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, F4, __, __, __, __,
|
|
__, __},
|
|
/*false F4*/ {__, __, __, __, __, __, __, __, __, __, CB, __, __, __, __,
|
|
__, __, __, __, __, OK, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*nu N1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, N2, __, __,
|
|
__, __},
|
|
/*nul N2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __, __, __, __, __, __, N3, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*null N3*/ {__, __, __, __, __, __, __, __, __, __, CB, __, __, __, __,
|
|
__, __, __, __, __, __, __, OK, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*/ C1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, C2},
|
|
/*/* C2*/ {C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
|
|
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
|
|
C2, C3},
|
|
/** C3*/ {C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, CE, C2, C2, C2, C2,
|
|
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
|
|
C2, C3},
|
|
/*_. FX*/ {OK, OK, __, -8, __, -7, __, -3, __, __, __, __, __, __, FR,
|
|
FR, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1,
|
|
__, __},
|
|
/*\ D1*/ {__, __, __, __, __, __, __, __, __, D2, __, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __},
|
|
/*\ D2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
|
|
__, __, __, __, __, __, __, __, __, __, __, __, U1, __, __,
|
|
__, __},
|
|
};
|
|
|
|
|
|
/*
|
|
These modes can be pushed on the stack.
|
|
*/
|
|
enum modes
|
|
{
|
|
MODE_ARRAY = 1,
|
|
MODE_DONE = 2,
|
|
MODE_KEY = 3,
|
|
MODE_OBJECT = 4
|
|
};
|
|
|
|
static int
|
|
push( JSON_parser jc,
|
|
int mode )
|
|
{
|
|
/*
|
|
Push a mode onto the stack. Return false if there is overflow.
|
|
*/
|
|
jc->top += 1;
|
|
if( jc->depth < 0 )
|
|
{
|
|
if( jc->top >= jc->stack_capacity )
|
|
{
|
|
size_t bytes_to_allocate;
|
|
jc->stack_capacity *= 2;
|
|
bytes_to_allocate = jc->stack_capacity *
|
|
sizeof( jc->static_stack[0] );
|
|
if( jc->stack == &jc->static_stack[0] )
|
|
{
|
|
jc->stack = (signed char*)malloc( bytes_to_allocate );
|
|
memcpy( jc->stack, jc->static_stack,
|
|
sizeof( jc->static_stack ) );
|
|
}
|
|
else
|
|
{
|
|
jc->stack = (signed char*)realloc( jc->stack,
|
|
bytes_to_allocate );
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( jc->top >= jc->depth )
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
jc->stack[jc->top] = mode;
|
|
return true;
|
|
}
|
|
|
|
static int
|
|
pop( JSON_parser jc,
|
|
int mode )
|
|
{
|
|
/*
|
|
Pop the stack, assuring that the current mode matches the expectation.
|
|
Return false if there is underflow or if the modes mismatch.
|
|
*/
|
|
if( jc->top < 0 || jc->stack[jc->top] != mode )
|
|
{
|
|
return false;
|
|
}
|
|
jc->top -= 1;
|
|
return true;
|
|
}
|
|
|
|
#define parse_buffer_clear( jc ) \
|
|
do { \
|
|
jc->parse_buffer_count = 0; \
|
|
jc->parse_buffer[0] = 0; \
|
|
} while( 0 )
|
|
|
|
#define parse_buffer_pop_back_char( jc ) \
|
|
do { \
|
|
assert( jc->parse_buffer_count >= 1 ); \
|
|
--jc->parse_buffer_count; \
|
|
jc->parse_buffer[jc->parse_buffer_count] = 0; \
|
|
} while( 0 )
|
|
|
|
void
|
|
delete_JSON_parser( JSON_parser jc )
|
|
{
|
|
if( jc )
|
|
{
|
|
if( jc->stack != &jc->static_stack[0] )
|
|
{
|
|
free( (void*)jc->stack );
|
|
}
|
|
if( jc->parse_buffer != &jc->static_parse_buffer[0] )
|
|
{
|
|
free( (void*)jc->parse_buffer );
|
|
}
|
|
free( (void*)jc );
|
|
}
|
|
}
|
|
|
|
JSON_parser
|
|
new_JSON_parser( JSON_config* config )
|
|
{
|
|
/*
|
|
new_JSON_parser starts the checking process by constructing a JSON_parser
|
|
object. It takes a depth parameter that restricts the level of maximum
|
|
nesting.
|
|
|
|
To continue the process, call JSON_parser_char for each character in the
|
|
JSON text, and then call JSON_parser_done to obtain the final result.
|
|
These functions are fully reentrant.
|
|
*/
|
|
|
|
int depth = 0;
|
|
JSON_config default_config;
|
|
|
|
JSON_parser jc = malloc( sizeof( struct JSON_parser_struct ) );
|
|
|
|
memset( jc, 0, sizeof( *jc ) );
|
|
|
|
|
|
/* initialize configuration */
|
|
init_JSON_config( &default_config );
|
|
|
|
/* set to default configuration if none was provided */
|
|
if( config == NULL )
|
|
{
|
|
config = &default_config;
|
|
}
|
|
|
|
depth = config->depth;
|
|
|
|
/* We need to be able to push at least one object */
|
|
if( depth == 0 )
|
|
{
|
|
depth = 1;
|
|
}
|
|
|
|
jc->state = GO;
|
|
jc->top = -1;
|
|
|
|
/* Do we want non-bound stack? */
|
|
if( depth > 0 )
|
|
{
|
|
jc->stack_capacity = depth;
|
|
jc->depth = depth;
|
|
if( depth <= (int)COUNTOF( jc->static_stack ) )
|
|
{
|
|
jc->stack = &jc->static_stack[0];
|
|
}
|
|
else
|
|
{
|
|
jc->stack =
|
|
(signed char*)malloc( jc->stack_capacity *
|
|
sizeof( jc->static_stack[0] ) );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
jc->stack_capacity = COUNTOF( jc->static_stack );
|
|
jc->depth = -1;
|
|
jc->stack = &jc->static_stack[0];
|
|
}
|
|
|
|
/* set parser to start */
|
|
push( jc, MODE_DONE );
|
|
|
|
/* set up the parse buffer */
|
|
jc->parse_buffer = &jc->static_parse_buffer[0];
|
|
jc->parse_buffer_capacity = COUNTOF( jc->static_parse_buffer );
|
|
parse_buffer_clear( jc );
|
|
|
|
/* set up callback, comment & float handling */
|
|
jc->callback = config->callback;
|
|
jc->ctx = config->callback_ctx;
|
|
jc->allow_comments = config->allow_comments != 0;
|
|
jc->handle_floats_manually = config->handle_floats_manually != 0;
|
|
return jc;
|
|
}
|
|
|
|
static void
|
|
grow_parse_buffer( JSON_parser jc )
|
|
{
|
|
size_t bytes_to_allocate;
|
|
|
|
jc->parse_buffer_capacity *= 2;
|
|
bytes_to_allocate = jc->parse_buffer_capacity *
|
|
sizeof( jc->parse_buffer[0] );
|
|
if( jc->parse_buffer == &jc->static_parse_buffer[0] )
|
|
{
|
|
jc->parse_buffer = (char*)malloc( bytes_to_allocate );
|
|
memcpy( jc->parse_buffer, jc->static_parse_buffer,
|
|
jc->parse_buffer_count );
|
|
}
|
|
else
|
|
{
|
|
jc->parse_buffer = (char*)realloc( jc->parse_buffer,
|
|
bytes_to_allocate );
|
|
}
|
|
}
|
|
|
|
#define parse_buffer_push_back_char( jc, c ) \
|
|
do { \
|
|
if( jc->parse_buffer_count + 1 >=\
|
|
jc->parse_buffer_capacity ) grow_parse_buffer( jc );\
|
|
jc->parse_buffer[jc->parse_buffer_count++] = c; \
|
|
jc->parse_buffer[jc->parse_buffer_count] = 0; \
|
|
} while( 0 )
|
|
|
|
|
|
static int
|
|
parse_parse_buffer( JSON_parser jc )
|
|
{
|
|
if( jc->callback )
|
|
{
|
|
JSON_value value, *arg = NULL;
|
|
|
|
if( jc->type != JSON_T_NONE )
|
|
{
|
|
assert(
|
|
jc->type == JSON_T_NULL
|
|
|| jc->type == JSON_T_FALSE
|
|
|| jc->type == JSON_T_TRUE
|
|
|| jc->type == JSON_T_FLOAT
|
|
|| jc->type == JSON_T_INTEGER
|
|
|| jc->type == JSON_T_STRING );
|
|
|
|
switch( jc->type )
|
|
{
|
|
case JSON_T_FLOAT:
|
|
arg = &value;
|
|
if( jc->handle_floats_manually )
|
|
{
|
|
value.vu.str.value = jc->parse_buffer;
|
|
value.vu.str.length = jc->parse_buffer_count;
|
|
}
|
|
else
|
|
{
|
|
sscanf( jc->parse_buffer, "%Lf",
|
|
&value.vu.float_value );
|
|
}
|
|
break;
|
|
|
|
case JSON_T_INTEGER:
|
|
arg = &value;
|
|
sscanf( jc->parse_buffer,
|
|
JSON_PARSER_INTEGER_SSCANF_TOKEN,
|
|
&value.vu.integer_value );
|
|
break;
|
|
|
|
case JSON_T_STRING:
|
|
arg = &value;
|
|
value.vu.str.value = jc->parse_buffer;
|
|
value.vu.str.length = jc->parse_buffer_count;
|
|
break;
|
|
}
|
|
|
|
if( !( *jc->callback )( jc->ctx, jc->type, arg ) )
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
parse_buffer_clear( jc );
|
|
|
|
return true;
|
|
}
|
|
|
|
static int
|
|
decode_unicode_char( JSON_parser jc )
|
|
{
|
|
const unsigned chars = jc->utf16_decode_buffer[0] ? 2 : 1;
|
|
int i;
|
|
UTF16 * uc = chars ==
|
|
1 ? &jc->utf16_decode_buffer[0] : &jc->
|
|
utf16_decode_buffer[1];
|
|
UTF16 x;
|
|
char* p;
|
|
|
|
assert( jc->parse_buffer_count >= 6 );
|
|
|
|
p = &jc->parse_buffer[jc->parse_buffer_count - 4];
|
|
|
|
for( i = 0; i < 4; ++i, ++p )
|
|
{
|
|
x = *p;
|
|
|
|
if( x >= 'a' )
|
|
{
|
|
x -= ( 'a' - 10 );
|
|
}
|
|
else if( x >= 'A' )
|
|
{
|
|
x -= ( 'A' - 10 );
|
|
}
|
|
else
|
|
{
|
|
x &= ~( (UTF16) 0x30 );
|
|
}
|
|
|
|
assert( x < 16 );
|
|
|
|
*uc |= x << ( ( 3u - i ) << 2 );
|
|
}
|
|
|
|
/* clear UTF-16 char form buffer */
|
|
jc->parse_buffer_count -= 6;
|
|
jc->parse_buffer[jc->parse_buffer_count] = 0;
|
|
|
|
/* attempt decoding ... */
|
|
{
|
|
UTF8* dec_start =
|
|
(UTF8*)&jc->parse_buffer[jc->parse_buffer_count];
|
|
UTF8* dec_start_dup = dec_start;
|
|
UTF8* dec_end = dec_start + 6;
|
|
|
|
const UTF16* enc_start = &jc->utf16_decode_buffer[0];
|
|
const UTF16* enc_end = enc_start + chars;
|
|
|
|
const ConversionResult result = ConvertUTF16toUTF8(
|
|
&enc_start, enc_end, &dec_start, dec_end, strictConversion );
|
|
|
|
const size_t new_chars = dec_start - dec_start_dup;
|
|
|
|
/* was it a surrogate UTF-16 char? */
|
|
if( chars == 1 && result == sourceExhausted )
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if( result != conversionOK )
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/* NOTE: clear decode buffer to resume string reading,
|
|
otherwise we continue to read UTF-16 */
|
|
jc->utf16_decode_buffer[0] = 0;
|
|
|
|
assert( new_chars <= 6 );
|
|
|
|
jc->parse_buffer_count += new_chars;
|
|
jc->parse_buffer[jc->parse_buffer_count] = 0;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int
|
|
JSON_parser_char( JSON_parser jc,
|
|
int next_char )
|
|
{
|
|
/*
|
|
After calling new_JSON_parser, call this function for each character (or
|
|
partial character) in your JSON text. It can accept UTF-8, UTF-16, or
|
|
UTF-32. It returns true if things are looking ok so far. If it rejects the
|
|
text, it returns false.
|
|
*/
|
|
int next_class, next_state;
|
|
|
|
/*
|
|
Determine the character's class.
|
|
*/
|
|
if( next_char < 0 )
|
|
{
|
|
return false;
|
|
}
|
|
if( next_char >= 128 )
|
|
{
|
|
next_class = C_ETC;
|
|
}
|
|
else
|
|
{
|
|
next_class = ascii_class[next_char];
|
|
if( next_class <= __ )
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if( jc->escaped )
|
|
{
|
|
jc->escaped = 0;
|
|
/* remove the backslash */
|
|
parse_buffer_pop_back_char( jc );
|
|
switch( next_char )
|
|
{
|
|
case 'b':
|
|
parse_buffer_push_back_char( jc, '\b' );
|
|
break;
|
|
|
|
case 'f':
|
|
parse_buffer_push_back_char( jc, '\f' );
|
|
break;
|
|
|
|
case 'n':
|
|
parse_buffer_push_back_char( jc, '\n' );
|
|
break;
|
|
|
|
case 'r':
|
|
parse_buffer_push_back_char( jc, '\r' );
|
|
break;
|
|
|
|
case 't':
|
|
parse_buffer_push_back_char( jc, '\t' );
|
|
break;
|
|
|
|
case '"':
|
|
parse_buffer_push_back_char( jc, '"' );
|
|
break;
|
|
|
|
case '\\':
|
|
parse_buffer_push_back_char( jc, '\\' );
|
|
break;
|
|
|
|
case '/':
|
|
parse_buffer_push_back_char( jc, '/' );
|
|
break;
|
|
|
|
case 'u':
|
|
parse_buffer_push_back_char( jc, '\\' );
|
|
parse_buffer_push_back_char( jc, 'u' );
|
|
break;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
else if( !jc->comment )
|
|
{
|
|
if( jc->type != JSON_T_NONE
|
|
|| !( next_class == C_SPACE || next_class == C_WHITE ) /*
|
|
non-white-space
|
|
*/ )
|
|
{
|
|
parse_buffer_push_back_char( jc, (char)next_char );
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Get the next state from the state transition table.
|
|
*/
|
|
next_state = state_transition_table[jc->state][next_class];
|
|
if( next_state >= 0 )
|
|
{
|
|
/*
|
|
Change the state.
|
|
*/
|
|
jc->state = next_state;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Or perform one of the actions.
|
|
*/
|
|
switch( next_state )
|
|
{
|
|
/* Unicode character */
|
|
case UC:
|
|
if( !decode_unicode_char( jc ) )
|
|
{
|
|
return false;
|
|
}
|
|
/* check if we need to read a second UTF-16 char */
|
|
if( jc->utf16_decode_buffer[0] )
|
|
{
|
|
jc->state = D1;
|
|
}
|
|
else
|
|
{
|
|
jc->state = ST;
|
|
}
|
|
break;
|
|
|
|
/* escaped char */
|
|
case EX:
|
|
jc->escaped = 1;
|
|
jc->state = ES;
|
|
break;
|
|
|
|
/* integer detected by minus */
|
|
case MX:
|
|
jc->type = JSON_T_INTEGER;
|
|
jc->state = MI;
|
|
break;
|
|
|
|
/* integer detected by zero */
|
|
case ZX:
|
|
jc->type = JSON_T_INTEGER;
|
|
jc->state = ZE;
|
|
break;
|
|
|
|
/* integer detected by 1-9 */
|
|
case IX:
|
|
jc->type = JSON_T_INTEGER;
|
|
jc->state = IN;
|
|
break;
|
|
|
|
/* floating point number detected by exponent*/
|
|
case DE:
|
|
assert( jc->type != JSON_T_FALSE );
|
|
assert( jc->type != JSON_T_TRUE );
|
|
assert( jc->type != JSON_T_NULL );
|
|
assert( jc->type != JSON_T_STRING );
|
|
jc->type = JSON_T_FLOAT;
|
|
jc->state = E1;
|
|
break;
|
|
|
|
/* floating point number detected by fraction */
|
|
case DF:
|
|
assert( jc->type != JSON_T_FALSE );
|
|
assert( jc->type != JSON_T_TRUE );
|
|
assert( jc->type != JSON_T_NULL );
|
|
assert( jc->type != JSON_T_STRING );
|
|
jc->type = JSON_T_FLOAT;
|
|
jc->state = FX;
|
|
break;
|
|
|
|
/* string begin " */
|
|
case SB:
|
|
parse_buffer_clear( jc );
|
|
assert( jc->type == JSON_T_NONE );
|
|
jc->type = JSON_T_STRING;
|
|
jc->state = ST;
|
|
break;
|
|
|
|
/* n */
|
|
case NU:
|
|
assert( jc->type == JSON_T_NONE );
|
|
jc->type = JSON_T_NULL;
|
|
jc->state = N1;
|
|
break;
|
|
|
|
/* f */
|
|
case FA:
|
|
assert( jc->type == JSON_T_NONE );
|
|
jc->type = JSON_T_FALSE;
|
|
jc->state = F1;
|
|
break;
|
|
|
|
/* t */
|
|
case TR:
|
|
assert( jc->type == JSON_T_NONE );
|
|
jc->type = JSON_T_TRUE;
|
|
jc->state = T1;
|
|
break;
|
|
|
|
/* closing comment */
|
|
case CE:
|
|
jc->comment = 0;
|
|
assert( jc->parse_buffer_count == 0 );
|
|
assert( jc->type == JSON_T_NONE );
|
|
jc->state = jc->before_comment_state;
|
|
break;
|
|
|
|
/* opening comment */
|
|
case CB:
|
|
if( !jc->allow_comments )
|
|
{
|
|
return false;
|
|
}
|
|
parse_buffer_pop_back_char( jc );
|
|
if( !parse_parse_buffer( jc ) )
|
|
{
|
|
return false;
|
|
}
|
|
assert( jc->parse_buffer_count == 0 );
|
|
assert( jc->type != JSON_T_STRING );
|
|
switch( jc->stack[jc->top] )
|
|
{
|
|
case MODE_ARRAY:
|
|
case MODE_OBJECT:
|
|
switch( jc->state )
|
|
{
|
|
case VA:
|
|
case AR:
|
|
jc->before_comment_state = jc->state;
|
|
break;
|
|
|
|
default:
|
|
jc->before_comment_state = OK;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
jc->before_comment_state = jc->state;
|
|
break;
|
|
}
|
|
jc->type = JSON_T_NONE;
|
|
jc->state = C1;
|
|
jc->comment = 1;
|
|
break;
|
|
|
|
/* empty } */
|
|
case - 9:
|
|
parse_buffer_clear( jc );
|
|
if( jc->callback
|
|
&& !( *jc->callback )( jc->ctx, JSON_T_OBJECT_END, NULL ) )
|
|
{
|
|
return false;
|
|
}
|
|
if( !pop( jc, MODE_KEY ) )
|
|
{
|
|
return false;
|
|
}
|
|
jc->state = OK;
|
|
break;
|
|
|
|
/* } */ case - 8:
|
|
parse_buffer_pop_back_char( jc );
|
|
if( !parse_parse_buffer( jc ) )
|
|
{
|
|
return false;
|
|
}
|
|
if( jc->callback
|
|
&& !( *jc->callback )( jc->ctx, JSON_T_OBJECT_END, NULL ) )
|
|
{
|
|
return false;
|
|
}
|
|
if( !pop( jc, MODE_OBJECT ) )
|
|
{
|
|
return false;
|
|
}
|
|
jc->type = JSON_T_NONE;
|
|
jc->state = OK;
|
|
break;
|
|
|
|
/* ] */ case - 7:
|
|
parse_buffer_pop_back_char( jc );
|
|
if( !parse_parse_buffer( jc ) )
|
|
{
|
|
return false;
|
|
}
|
|
if( jc->callback
|
|
&& !( *jc->callback )( jc->ctx, JSON_T_ARRAY_END, NULL ) )
|
|
{
|
|
return false;
|
|
}
|
|
if( !pop( jc, MODE_ARRAY ) )
|
|
{
|
|
return false;
|
|
}
|
|
|
|
jc->type = JSON_T_NONE;
|
|
jc->state = OK;
|
|
break;
|
|
|
|
/* { */ case - 6:
|
|
parse_buffer_pop_back_char( jc );
|
|
if( jc->callback
|
|
&& !( *jc->callback )( jc->ctx, JSON_T_OBJECT_BEGIN, NULL ) )
|
|
{
|
|
return false;
|
|
}
|
|
if( !push( jc, MODE_KEY ) )
|
|
{
|
|
return false;
|
|
}
|
|
assert( jc->type == JSON_T_NONE );
|
|
jc->state = OB;
|
|
break;
|
|
|
|
/* [ */ case - 5:
|
|
parse_buffer_pop_back_char( jc );
|
|
if( jc->callback
|
|
&& !( *jc->callback )( jc->ctx, JSON_T_ARRAY_BEGIN, NULL ) )
|
|
{
|
|
return false;
|
|
}
|
|
if( !push( jc, MODE_ARRAY ) )
|
|
{
|
|
return false;
|
|
}
|
|
assert( jc->type == JSON_T_NONE );
|
|
jc->state = AR;
|
|
break;
|
|
|
|
/* string end " */ case - 4:
|
|
parse_buffer_pop_back_char( jc );
|
|
switch( jc->stack[jc->top] )
|
|
{
|
|
case MODE_KEY:
|
|
assert( jc->type == JSON_T_STRING );
|
|
jc->type = JSON_T_NONE;
|
|
jc->state = CO;
|
|
|
|
if( jc->callback )
|
|
{
|
|
JSON_value value;
|
|
value.vu.str.value = jc->parse_buffer;
|
|
value.vu.str.length = jc->parse_buffer_count;
|
|
if( !( *jc->callback )( jc->ctx, JSON_T_KEY,
|
|
&value ) )
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
parse_buffer_clear( jc );
|
|
break;
|
|
|
|
case MODE_ARRAY:
|
|
case MODE_OBJECT:
|
|
assert( jc->type == JSON_T_STRING );
|
|
if( !parse_parse_buffer( jc ) )
|
|
{
|
|
return false;
|
|
}
|
|
jc->type = JSON_T_NONE;
|
|
jc->state = OK;
|
|
break;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
break;
|
|
|
|
/* , */ case - 3:
|
|
parse_buffer_pop_back_char( jc );
|
|
if( !parse_parse_buffer( jc ) )
|
|
{
|
|
return false;
|
|
}
|
|
switch( jc->stack[jc->top] )
|
|
{
|
|
case MODE_OBJECT:
|
|
/*
|
|
A comma causes a flip from object mode to key mode.
|
|
*/
|
|
if( !pop( jc, MODE_OBJECT ) || !push( jc, MODE_KEY ) )
|
|
{
|
|
return false;
|
|
}
|
|
assert( jc->type != JSON_T_STRING );
|
|
jc->type = JSON_T_NONE;
|
|
jc->state = KE;
|
|
break;
|
|
|
|
case MODE_ARRAY:
|
|
assert( jc->type != JSON_T_STRING );
|
|
jc->type = JSON_T_NONE;
|
|
jc->state = VA;
|
|
break;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
break;
|
|
|
|
/* : */ case - 2:
|
|
/*
|
|
A colon causes a flip from key mode to object mode.
|
|
*/
|
|
parse_buffer_pop_back_char( jc );
|
|
if( !pop( jc, MODE_KEY ) || !push( jc, MODE_OBJECT ) )
|
|
{
|
|
return false;
|
|
}
|
|
assert( jc->type == JSON_T_NONE );
|
|
jc->state = VA;
|
|
break;
|
|
|
|
/*
|
|
Bad action.
|
|
*/
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int
|
|
JSON_parser_done( JSON_parser jc )
|
|
{
|
|
const int result = jc->state == OK && pop( jc, MODE_DONE );
|
|
|
|
return result;
|
|
}
|
|
|
|
int
|
|
JSON_parser_is_legal_white_space_string( const char* s )
|
|
{
|
|
int c, char_class;
|
|
|
|
if( s == NULL )
|
|
{
|
|
return false;
|
|
}
|
|
|
|
for( ; *s; ++s )
|
|
{
|
|
c = *s;
|
|
|
|
if( c < 0 || c >= 128 )
|
|
{
|
|
return false;
|
|
}
|
|
|
|
char_class = ascii_class[c];
|
|
|
|
if( char_class != C_SPACE && char_class != C_WHITE )
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
init_JSON_config( JSON_config* config )
|
|
{
|
|
if( config )
|
|
{
|
|
memset( config, 0, sizeof( *config ) );
|
|
|
|
config->depth = JSON_PARSER_STACK_SIZE - 1;
|
|
}
|
|
}
|
|
|