transmission/libtransmission/JSON_parser.c

1196 lines
35 KiB
C
Raw Normal View History

/* JSON_parser.c */
2008-05-11 22:42:53 +00:00
/* 2007-08-24 */
/*
Copyright (c) 2005 JSON.org
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all
copies or substantial portions of the Software.
The Software shall be used for Good, not Evil.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
2008-05-11 22:42:53 +00:00
/*
Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de),
2007-2008.
2008-05-11 22:42:53 +00:00
For the added features the license above applies also.
Changelog:
2008/07/19
- Removed some duplicate code & debugging variable
(Charles.Kerr@noaa.gov)
2008/05/28
- Made JSON_value structure ansi C compliant. This bug was report by
trisk@acm.jhu.edu
2008-05-11 22:42:53 +00:00
2008/05/20
- Fixed bug reported by Charles.Kerr@noaa.gov where the switching
from static to dynamic parse buffer did not copy the static parse
buffer's content.
*/
2008-05-11 22:42:53 +00:00
#include <assert.h>
#include <ctype.h>
#include <float.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "JSON_parser.h"
2008-05-11 22:42:53 +00:00
#include "ConvertUTF.h"
#if _MSC_VER >= 1400 /* Visual Studio 2005 and up */
#pragma warning(disable:4996) /* unsecure sscanf */
2008-05-11 22:42:53 +00:00
#endif
#define true 1
#define false 0
#define __ -1 /* the universal error code */
/* values chosen so that the object size is approx equal to one page (4K) */
#ifndef JSON_PARSER_STACK_SIZE
#define JSON_PARSER_STACK_SIZE 128
2008-05-11 22:42:53 +00:00
#endif
#ifndef JSON_PARSER_PARSE_BUFFER_SIZE
#define JSON_PARSER_PARSE_BUFFER_SIZE 3500
2008-05-11 22:42:53 +00:00
#endif
typedef struct JSON_parser_struct
{
JSON_parser_callback callback;
void* ctx;
signed char state, before_comment_state, type, escaped,
comment, allow_comments, handle_floats_manually;
UTF16 utf16_decode_buffer[2];
long depth;
long top;
signed char* stack;
long stack_capacity;
signed char static_stack[JSON_PARSER_STACK_SIZE];
char* parse_buffer;
size_t parse_buffer_capacity;
size_t parse_buffer_count;
size_t comment_begin_offset;
char static_parse_buffer[
JSON_PARSER_PARSE_BUFFER_SIZE];
} * JSON_parser;
2008-05-11 22:42:53 +00:00
#define COUNTOF( x ) ( sizeof( x ) / sizeof( x[0] ) )
2008-05-11 22:42:53 +00:00
/*
Characters are mapped into these 31 character classes. This allows for
a significant reduction in the size of the state transition table.
*/
2008-05-11 22:42:53 +00:00
enum classes
{
2008-05-11 22:42:53 +00:00
C_SPACE, /* space */
C_WHITE, /* other whitespace */
C_LCURB, /* { */
C_RCURB, /* } */
C_LSQRB, /* [ */
C_RSQRB, /* ] */
C_COLON, /* : */
C_COMMA, /* , */
C_QUOTE, /* " */
C_BACKS, /* \ */
C_SLASH, /* / */
C_PLUS, /* + */
C_MINUS, /* - */
C_POINT, /* . */
C_ZERO, /* 0 */
2008-05-11 22:42:53 +00:00
C_DIGIT, /* 123456789 */
C_LOW_A, /* a */
C_LOW_B, /* b */
C_LOW_C, /* c */
C_LOW_D, /* d */
C_LOW_E, /* e */
C_LOW_F, /* f */
C_LOW_L, /* l */
C_LOW_N, /* n */
C_LOW_R, /* r */
C_LOW_S, /* s */
C_LOW_T, /* t */
C_LOW_U, /* u */
C_ABCDF, /* ABCDF */
C_E, /* E */
C_ETC, /* everything else */
C_STAR, /* * */
2008-05-11 22:42:53 +00:00
NR_CLASSES
};
static int ascii_class[128] = {
/*
This array maps the 128 ASCII characters into character classes.
The remaining Unicode characters should be mapped to C_ETC.
Non-whitespace control characters are errors.
*/
__, __, __, __, __, __,
__, __,
__, C_WHITE, C_WHITE, __, __, C_WHITE,
__, __,
__, __, __, __, __, __,
__, __,
__, __, __, __, __, __,
__, __,
C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC,
C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS,
C_POINT, C_SLASH,
C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
C_DIGIT, C_DIGIT,
C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC,
C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E,
C_ABCDF, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB,
C_ETC, C_ETC,
C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E,
C_LOW_F, C_ETC,
C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC,
C_LOW_N, C_ETC,
C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U,
C_ETC, C_ETC,
C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB,
C_ETC, C_ETC
2008-05-11 22:42:53 +00:00
};
/*
The state codes.
*/
enum states
{
2008-05-11 22:42:53 +00:00
GO, /* start */
OK, /* ok */
OB, /* object */
KE, /* key */
CO, /* colon */
VA, /* value */
AR, /* array */
ST, /* string */
ES, /* escape */
U1, /* u1 */
U2, /* u2 */
U3, /* u3 */
U4, /* u4 */
MI, /* minus */
ZE, /* zero */
IN, /* integer */
FR, /* fraction */
E1, /* e */
E2, /* ex */
E3, /* exp */
T1, /* tr */
T2, /* tru */
T3, /* true */
F1, /* fa */
F2, /* fal */
F3, /* fals */
F4, /* false */
N1, /* nu */
N2, /* nul */
N3, /* null */
C1, /* / */
C2, /* / * */
C3, /* * */
FX, /* *.* *eE* */
D1, /* second UTF-16 character decoding started by \ */
D2, /* second UTF-16 character proceeded by u */
NR_STATES
};
enum actions
{
CB = -10, /* comment begin */
CE = -11, /* comment end */
FA = -12, /* false */
TR = -13, /* false */
NU = -14, /* null */
DE = -15, /* double detected by exponent e E */
DF = -16, /* double detected by fraction . */
SB = -17, /* string begin */
MX = -18, /* integer detected by minus */
ZX = -19, /* integer detected by zero */
IX = -20, /* integer detected by 1-9 */
EX = -21, /* next char is escaped */
UC = -22, /* Unicode character read */
};
static int state_transition_table[NR_STATES][NR_CLASSES] = {
/*
The state transition table takes the current state and the current symbol,
and returns either a new state or an action. An action is represented as a
negative number. A JSON text is accepted if at the end of the text the
state is OK and if the mode is MODE_DONE.
white 1-9
ABCDF etc
space | { } [ ] : , " \ / + - . 0 | a b c d e f
l n r s t u | E | * */
/*start GO*/ {GO, GO, -6, __, -5, __, __, __, __, __, CB, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*ok OK*/ {OK, OK, __, -8, __, -7, __, -3, __, __, CB, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*object OB*/ {OB, OB, __, -9, __, __, __, __, SB, __, CB, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*key KE*/ {KE, KE, __, __, __, __, __, __, SB, __, CB, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*colon CO*/ {CO, CO, __, __, __, __, -2, __, __, __, CB, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*value VA*/ {VA, VA, -6, __, -5, __, __, __, SB, __, CB, __, MX, __, ZX,
IX, __, __, __, __, __, FA, __, NU, __, __, TR, __, __, __,
__, __},
/*array AR*/ {AR, AR, -6, __, -5, -7, __, __, SB, __, CB, __, MX, __, ZX,
IX, __, __, __, __, __, FA, __, NU, __, __, TR, __, __, __,
__, __},
/*string ST*/ {ST, __, ST, ST, ST, ST, ST, ST, -4, EX, ST, ST, ST, ST, ST,
ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST, ST,
ST, ST},
/*escape ES*/ {__, __, __, __, __, __, __, __, ST, ST, ST, __, __, __, __,
__, __, ST, __, __, __, ST, __, ST, ST, __, ST, U1, __, __,
__, __},
/*u1 U1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U2,
U2, U2, U2, U2, U2, U2, U2, __, __, __, __, __, __, U2, U2,
__, __},
/*u2 U2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U3,
U3, U3, U3, U3, U3, U3, U3, __, __, __, __, __, __, U3, U3,
__, __},
/*u3 U3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, U4,
U4, U4, U4, U4, U4, U4, U4, __, __, __, __, __, __, U4, U4,
__, __},
/*u4 U4*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, UC,
UC, UC, UC, UC, UC, UC, UC, __, __, __, __, __, __, UC, UC,
__, __},
/*minus MI*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, ZE,
IN, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*zero ZE*/ {OK, OK, __, -8, __, -7, __, -3, __, __, CB, __, __, DF, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*int IN*/ {OK, OK, __, -8, __, -7, __, -3, __, __, CB, __, __, DF, IN,
IN, __, __, __, __, DE, __, __, __, __, __, __, __, __, DE,
__, __},
/*frac FR*/ {OK, OK, __, -8, __, -7, __, -3, __, __, CB, __, __, __, FR,
FR, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1,
__, __},
/*e E1*/ {__, __, __, __, __, __, __, __, __, __, __, E2, E2, __, E3,
E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*ex E2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, E3,
E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*exp E3*/ {OK, OK, __, -8, __, -7, __, -3, __, __, __, __, __, __, E3,
E3, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*tr T1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, T2, __, __, __, __, __,
__, __},
/*tru T2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, T3, __, __,
__, __},
/*true T3*/ {__, __, __, __, __, __, __, __, __, __, CB, __, __, __, __,
__, __, __, __, __, OK, __, __, __, __, __, __, __, __, __,
__, __},
/*fa F1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, F2, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*fal F2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, F3, __, __, __, __, __, __, __,
__, __},
/*fals F3*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, F4, __, __, __, __,
__, __},
/*false F4*/ {__, __, __, __, __, __, __, __, __, __, CB, __, __, __, __,
__, __, __, __, __, OK, __, __, __, __, __, __, __, __, __,
__, __},
/*nu N1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, N2, __, __,
__, __},
/*nul N2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, N3, __, __, __, __, __, __, __,
__, __},
/*null N3*/ {__, __, __, __, __, __, __, __, __, __, CB, __, __, __, __,
__, __, __, __, __, __, __, OK, __, __, __, __, __, __, __,
__, __},
/*/ C1*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, C2},
/*/* C2*/ {C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C3},
/** C3*/ {C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, CE, C2, C2, C2, C2,
C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
C2, C3},
/*_. FX*/ {OK, OK, __, -8, __, -7, __, -3, __, __, __, __, __, __, FR,
FR, __, __, __, __, E1, __, __, __, __, __, __, __, __, E1,
__, __},
/*\ D1*/ {__, __, __, __, __, __, __, __, __, D2, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __},
/*\ D2*/ {__, __, __, __, __, __, __, __, __, __, __, __, __, __, __,
__, __, __, __, __, __, __, __, __, __, __, __, U1, __, __,
__, __},
2008-05-11 22:42:53 +00:00
};
/*
These modes can be pushed on the stack.
*/
enum modes
{
MODE_ARRAY = 1,
MODE_DONE = 2,
MODE_KEY = 3,
2008-05-11 22:42:53 +00:00
MODE_OBJECT = 4
};
static int
push( JSON_parser jc,
int mode )
2008-05-11 22:42:53 +00:00
{
/*
Push a mode onto the stack. Return false if there is overflow.
*/
2008-05-11 22:42:53 +00:00
jc->top += 1;
if( jc->depth < 0 )
{
if( jc->top >= jc->stack_capacity )
{
size_t bytes_to_allocate;
2008-05-11 22:42:53 +00:00
jc->stack_capacity *= 2;
bytes_to_allocate = jc->stack_capacity *
sizeof( jc->static_stack[0] );
if( jc->stack == &jc->static_stack[0] )
{
jc->stack = (signed char*)malloc( bytes_to_allocate );
memcpy( jc->stack, jc->static_stack,
sizeof( jc->static_stack ) );
}
else
{
jc->stack = (signed char*)realloc( jc->stack,
bytes_to_allocate );
2008-05-11 22:42:53 +00:00
}
}
}
else
{
if( jc->top >= jc->depth )
{
2008-05-11 22:42:53 +00:00
return false;
}
}
2008-05-11 22:42:53 +00:00
jc->stack[jc->top] = mode;
return true;
}
static int
pop( JSON_parser jc,
int mode )
2008-05-11 22:42:53 +00:00
{
/*
Pop the stack, assuring that the current mode matches the expectation.
Return false if there is underflow or if the modes mismatch.
*/
if( jc->top < 0 || jc->stack[jc->top] != mode )
{
2008-05-11 22:42:53 +00:00
return false;
}
jc->top -= 1;
return true;
}
#define parse_buffer_clear( jc ) \
do { \
jc->parse_buffer_count = 0; \
jc->parse_buffer[0] = 0; \
} while( 0 )
#define parse_buffer_pop_back_char( jc ) \
do { \
assert( jc->parse_buffer_count >= 1 ); \
--jc->parse_buffer_count; \
jc->parse_buffer[jc->parse_buffer_count] = 0; \
} while( 0 )
void
delete_JSON_parser( JSON_parser jc )
2008-05-11 22:42:53 +00:00
{
if( jc )
{
if( jc->stack != &jc->static_stack[0] )
{
free( (void*)jc->stack );
2008-05-11 22:42:53 +00:00
}
if( jc->parse_buffer != &jc->static_parse_buffer[0] )
{
free( (void*)jc->parse_buffer );
2008-05-11 22:42:53 +00:00
}
free( (void*)jc );
}
2008-05-11 22:42:53 +00:00
}
JSON_parser
new_JSON_parser( JSON_config* config )
2008-05-11 22:42:53 +00:00
{
/*
new_JSON_parser starts the checking process by constructing a JSON_parser
2008-05-11 22:42:53 +00:00
object. It takes a depth parameter that restricts the level of maximum
nesting.
To continue the process, call JSON_parser_char for each character in the
JSON text, and then call JSON_parser_done to obtain the final result.
2008-05-11 22:42:53 +00:00
These functions are fully reentrant.
*/
int depth = 0;
JSON_config default_config;
JSON_parser jc = malloc( sizeof( struct JSON_parser_struct ) );
memset( jc, 0, sizeof( *jc ) );
/* initialize configuration */
init_JSON_config( &default_config );
/* set to default configuration if none was provided */
if( config == NULL )
{
config = &default_config;
}
depth = config->depth;
2008-05-11 22:42:53 +00:00
/* We need to be able to push at least one object */
if( depth == 0 )
{
2008-05-11 22:42:53 +00:00
depth = 1;
}
2008-05-11 22:42:53 +00:00
jc->state = GO;
jc->top = -1;
/* Do we want non-bound stack? */
if( depth > 0 )
{
2008-05-11 22:42:53 +00:00
jc->stack_capacity = depth;
jc->depth = depth;
if( depth <= (int)COUNTOF( jc->static_stack ) )
{
2008-05-11 22:42:53 +00:00
jc->stack = &jc->static_stack[0];
}
else
{
jc->stack =
(signed char*)malloc( jc->stack_capacity *
sizeof( jc->static_stack[0] ) );
}
}
else
{
jc->stack_capacity = COUNTOF( jc->static_stack );
2008-05-11 22:42:53 +00:00
jc->depth = -1;
jc->stack = &jc->static_stack[0];
}
/* set parser to start */
push( jc, MODE_DONE );
/* set up the parse buffer */
2008-05-11 22:42:53 +00:00
jc->parse_buffer = &jc->static_parse_buffer[0];
jc->parse_buffer_capacity = COUNTOF( jc->static_parse_buffer );
parse_buffer_clear( jc );
/* set up callback, comment & float handling */
jc->callback = config->callback;
jc->ctx = config->callback_ctx;
jc->allow_comments = config->allow_comments != 0;
jc->handle_floats_manually = config->handle_floats_manually != 0;
2008-05-11 22:42:53 +00:00
return jc;
}
static void
grow_parse_buffer( JSON_parser jc )
2008-05-11 22:42:53 +00:00
{
size_t bytes_to_allocate;
jc->parse_buffer_capacity *= 2;
bytes_to_allocate = jc->parse_buffer_capacity *
sizeof( jc->parse_buffer[0] );
if( jc->parse_buffer == &jc->static_parse_buffer[0] )
{
jc->parse_buffer = (char*)malloc( bytes_to_allocate );
memcpy( jc->parse_buffer, jc->static_parse_buffer,
jc->parse_buffer_count );
}
else
{
jc->parse_buffer = (char*)realloc( jc->parse_buffer,
bytes_to_allocate );
2008-05-11 22:42:53 +00:00
}
}
#define parse_buffer_push_back_char( jc, c ) \
do { \
if( jc->parse_buffer_count + 1 >=\
jc->parse_buffer_capacity ) grow_parse_buffer( jc );\
jc->parse_buffer[jc->parse_buffer_count++] = c; \
jc->parse_buffer[jc->parse_buffer_count] = 0; \
} while( 0 )
2008-05-11 22:42:53 +00:00
static int
parse_parse_buffer( JSON_parser jc )
2008-05-11 22:42:53 +00:00
{
if( jc->callback )
{
2008-05-11 22:42:53 +00:00
JSON_value value, *arg = NULL;
if( jc->type != JSON_T_NONE )
{
2008-05-11 22:42:53 +00:00
assert(
jc->type == JSON_T_NULL
|| jc->type == JSON_T_FALSE
|| jc->type == JSON_T_TRUE
|| jc->type == JSON_T_FLOAT
|| jc->type == JSON_T_INTEGER
|| jc->type == JSON_T_STRING );
switch( jc->type )
{
2008-05-11 22:42:53 +00:00
case JSON_T_FLOAT:
arg = &value;
if( jc->handle_floats_manually )
{
value.vu.str.value = jc->parse_buffer;
value.vu.str.length = jc->parse_buffer_count;
}
else
{
sscanf( jc->parse_buffer, "%Lf",
&value.vu.float_value );
}
2008-05-11 22:42:53 +00:00
break;
2008-05-11 22:42:53 +00:00
case JSON_T_INTEGER:
arg = &value;
sscanf( jc->parse_buffer,
JSON_PARSER_INTEGER_SSCANF_TOKEN,
&value.vu.integer_value );
2008-05-11 22:42:53 +00:00
break;
2008-05-11 22:42:53 +00:00
case JSON_T_STRING:
arg = &value;
value.vu.str.value = jc->parse_buffer;
value.vu.str.length = jc->parse_buffer_count;
2008-05-11 22:42:53 +00:00
break;
}
if( !( *jc->callback )( jc->ctx, jc->type, arg ) )
{
2008-05-11 22:42:53 +00:00
return false;
}
}
}
parse_buffer_clear( jc );
2008-05-11 22:42:53 +00:00
return true;
}
static int
decode_unicode_char( JSON_parser jc )
2008-05-11 22:42:53 +00:00
{
const unsigned chars = jc->utf16_decode_buffer[0] ? 2 : 1;
int i;
UTF16 * uc = chars ==
1 ? &jc->utf16_decode_buffer[0] : &jc->
utf16_decode_buffer[1];
UTF16 x;
char* p;
assert( jc->parse_buffer_count >= 6 );
2008-05-11 22:42:53 +00:00
p = &jc->parse_buffer[jc->parse_buffer_count - 4];
for( i = 0; i < 4; ++i, ++p )
{
2008-05-11 22:42:53 +00:00
x = *p;
if( x >= 'a' )
{
x -= ( 'a' - 10 );
}
else if( x >= 'A' )
{
x -= ( 'A' - 10 );
}
else
{
x &= ~( (UTF16) 0x30 );
2008-05-11 22:42:53 +00:00
}
assert( x < 16 );
*uc |= x << ( ( 3u - i ) << 2 );
2008-05-11 22:42:53 +00:00
}
2008-05-11 22:42:53 +00:00
/* clear UTF-16 char form buffer */
jc->parse_buffer_count -= 6;
jc->parse_buffer[jc->parse_buffer_count] = 0;
2008-05-11 22:42:53 +00:00
/* attempt decoding ... */
{
UTF8* dec_start =
(UTF8*)&jc->parse_buffer[jc->parse_buffer_count];
UTF8* dec_start_dup = dec_start;
UTF8* dec_end = dec_start + 6;
const UTF16* enc_start = &jc->utf16_decode_buffer[0];
const UTF16* enc_end = enc_start + chars;
2008-05-11 22:42:53 +00:00
const ConversionResult result = ConvertUTF16toUTF8(
&enc_start, enc_end, &dec_start, dec_end, strictConversion );
const size_t new_chars = dec_start - dec_start_dup;
2008-05-11 22:42:53 +00:00
/* was it a surrogate UTF-16 char? */
if( chars == 1 && result == sourceExhausted )
{
2008-05-11 22:42:53 +00:00
return true;
}
if( result != conversionOK )
{
2008-05-11 22:42:53 +00:00
return false;
}
2008-05-11 22:42:53 +00:00
/* NOTE: clear decode buffer to resume string reading,
otherwise we continue to read UTF-16 */
jc->utf16_decode_buffer[0] = 0;
assert( new_chars <= 6 );
2008-05-11 22:42:53 +00:00
jc->parse_buffer_count += new_chars;
jc->parse_buffer[jc->parse_buffer_count] = 0;
}
2008-05-11 22:42:53 +00:00
return true;
}
int
JSON_parser_char( JSON_parser jc,
int next_char )
2008-05-11 22:42:53 +00:00
{
/*
After calling new_JSON_parser, call this function for each character (or
2008-05-11 22:42:53 +00:00
partial character) in your JSON text. It can accept UTF-8, UTF-16, or
UTF-32. It returns true if things are looking ok so far. If it rejects the
text, it returns false.
*/
2008-05-11 22:42:53 +00:00
int next_class, next_state;
2008-05-11 22:42:53 +00:00
/*
Determine the character's class.
*/
if( next_char < 0 )
{
2008-05-11 22:42:53 +00:00
return false;
}
if( next_char >= 128 )
{
2008-05-11 22:42:53 +00:00
next_class = C_ETC;
}
else
{
2008-05-11 22:42:53 +00:00
next_class = ascii_class[next_char];
if( next_class <= __ )
{
2008-05-11 22:42:53 +00:00
return false;
}
}
if( jc->escaped )
{
2008-05-11 22:42:53 +00:00
jc->escaped = 0;
/* remove the backslash */
parse_buffer_pop_back_char( jc );
switch( next_char )
{
case 'b':
parse_buffer_push_back_char( jc, '\b' );
break;
case 'f':
parse_buffer_push_back_char( jc, '\f' );
break;
case 'n':
parse_buffer_push_back_char( jc, '\n' );
break;
case 'r':
parse_buffer_push_back_char( jc, '\r' );
break;
case 't':
parse_buffer_push_back_char( jc, '\t' );
break;
case '"':
parse_buffer_push_back_char( jc, '"' );
break;
case '\\':
parse_buffer_push_back_char( jc, '\\' );
break;
case '/':
parse_buffer_push_back_char( jc, '/' );
break;
case 'u':
parse_buffer_push_back_char( jc, '\\' );
parse_buffer_push_back_char( jc, 'u' );
break;
default:
return false;
2008-05-11 22:42:53 +00:00
}
}
else if( !jc->comment )
{
if( jc->type != JSON_T_NONE
|| !( next_class == C_SPACE || next_class == C_WHITE ) /*
non-white-space
*/ )
{
parse_buffer_push_back_char( jc, (char)next_char );
2008-05-11 22:42:53 +00:00
}
}
2008-05-11 22:42:53 +00:00
/*
Get the next state from the state transition table.
*/
2008-05-11 22:42:53 +00:00
next_state = state_transition_table[jc->state][next_class];
if( next_state >= 0 )
{
2008-05-11 22:42:53 +00:00
/*
Change the state.
*/
2008-05-11 22:42:53 +00:00
jc->state = next_state;
}
else
{
2008-05-11 22:42:53 +00:00
/*
Or perform one of the actions.
*/
switch( next_state )
{
/* Unicode character */
case UC:
if( !decode_unicode_char( jc ) )
{
return false;
}
/* check if we need to read a second UTF-16 char */
if( jc->utf16_decode_buffer[0] )
{
jc->state = D1;
}
else
{
jc->state = ST;
}
break;
2008-05-11 22:42:53 +00:00
/* escaped char */
case EX:
jc->escaped = 1;
jc->state = ES;
break;
2008-05-11 22:42:53 +00:00
/* integer detected by minus */
case MX:
jc->type = JSON_T_INTEGER;
jc->state = MI;
break;
/* integer detected by zero */
case ZX:
jc->type = JSON_T_INTEGER;
jc->state = ZE;
break;
/* integer detected by 1-9 */
case IX:
jc->type = JSON_T_INTEGER;
jc->state = IN;
break;
2008-05-11 22:42:53 +00:00
/* floating point number detected by exponent*/
case DE:
assert( jc->type != JSON_T_FALSE );
assert( jc->type != JSON_T_TRUE );
assert( jc->type != JSON_T_NULL );
assert( jc->type != JSON_T_STRING );
jc->type = JSON_T_FLOAT;
jc->state = E1;
break;
2008-05-11 22:42:53 +00:00
/* floating point number detected by fraction */
case DF:
assert( jc->type != JSON_T_FALSE );
assert( jc->type != JSON_T_TRUE );
assert( jc->type != JSON_T_NULL );
assert( jc->type != JSON_T_STRING );
jc->type = JSON_T_FLOAT;
jc->state = FX;
break;
2008-05-11 22:42:53 +00:00
/* string begin " */
case SB:
parse_buffer_clear( jc );
assert( jc->type == JSON_T_NONE );
jc->type = JSON_T_STRING;
jc->state = ST;
break;
2008-05-11 22:42:53 +00:00
/* n */
case NU:
assert( jc->type == JSON_T_NONE );
jc->type = JSON_T_NULL;
jc->state = N1;
break;
2008-05-11 22:42:53 +00:00
/* f */
case FA:
assert( jc->type == JSON_T_NONE );
jc->type = JSON_T_FALSE;
jc->state = F1;
break;
2008-05-11 22:42:53 +00:00
/* t */
case TR:
assert( jc->type == JSON_T_NONE );
jc->type = JSON_T_TRUE;
jc->state = T1;
break;
2008-05-11 22:42:53 +00:00
/* closing comment */
case CE:
jc->comment = 0;
assert( jc->parse_buffer_count == 0 );
assert( jc->type == JSON_T_NONE );
jc->state = jc->before_comment_state;
break;
2008-05-11 22:42:53 +00:00
/* opening comment */
case CB:
if( !jc->allow_comments )
{
return false;
2008-05-11 22:42:53 +00:00
}
parse_buffer_pop_back_char( jc );
if( !parse_parse_buffer( jc ) )
{
return false;
}
assert( jc->parse_buffer_count == 0 );
assert( jc->type != JSON_T_STRING );
switch( jc->stack[jc->top] )
{
case MODE_ARRAY:
case MODE_OBJECT:
switch( jc->state )
{
case VA:
case AR:
jc->before_comment_state = jc->state;
break;
default:
jc->before_comment_state = OK;
break;
}
break;
default:
jc->before_comment_state = jc->state;
break;
}
jc->type = JSON_T_NONE;
jc->state = C1;
jc->comment = 1;
2008-05-11 22:42:53 +00:00
break;
2008-05-11 22:42:53 +00:00
/* empty } */
case - 9:
parse_buffer_clear( jc );
if( jc->callback
&& !( *jc->callback )( jc->ctx, JSON_T_OBJECT_END, NULL ) )
{
return false;
}
if( !pop( jc, MODE_KEY ) )
{
return false;
}
jc->state = OK;
break;
2008-05-11 22:42:53 +00:00
/* } */ case - 8:
parse_buffer_pop_back_char( jc );
if( !parse_parse_buffer( jc ) )
{
return false;
}
if( jc->callback
&& !( *jc->callback )( jc->ctx, JSON_T_OBJECT_END, NULL ) )
{
return false;
}
if( !pop( jc, MODE_OBJECT ) )
{
return false;
}
jc->type = JSON_T_NONE;
jc->state = OK;
break;
2008-05-11 22:42:53 +00:00
/* ] */ case - 7:
parse_buffer_pop_back_char( jc );
if( !parse_parse_buffer( jc ) )
{
return false;
}
if( jc->callback
&& !( *jc->callback )( jc->ctx, JSON_T_ARRAY_END, NULL ) )
{
return false;
}
if( !pop( jc, MODE_ARRAY ) )
{
return false;
}
2008-05-11 22:42:53 +00:00
jc->type = JSON_T_NONE;
jc->state = OK;
break;
/* { */ case - 6:
parse_buffer_pop_back_char( jc );
if( jc->callback
&& !( *jc->callback )( jc->ctx, JSON_T_OBJECT_BEGIN, NULL ) )
{
return false;
}
if( !push( jc, MODE_KEY ) )
{
return false;
2008-05-11 22:42:53 +00:00
}
assert( jc->type == JSON_T_NONE );
jc->state = OB;
2008-05-11 22:42:53 +00:00
break;
/* [ */ case - 5:
parse_buffer_pop_back_char( jc );
if( jc->callback
&& !( *jc->callback )( jc->ctx, JSON_T_ARRAY_BEGIN, NULL ) )
{
2008-05-11 22:42:53 +00:00
return false;
}
if( !push( jc, MODE_ARRAY ) )
{
return false;
}
assert( jc->type == JSON_T_NONE );
jc->state = AR;
2008-05-11 22:42:53 +00:00
break;
/* string end " */ case - 4:
parse_buffer_pop_back_char( jc );
switch( jc->stack[jc->top] )
{
case MODE_KEY:
assert( jc->type == JSON_T_STRING );
jc->type = JSON_T_NONE;
jc->state = CO;
if( jc->callback )
{
JSON_value value;
value.vu.str.value = jc->parse_buffer;
value.vu.str.length = jc->parse_buffer_count;
if( !( *jc->callback )( jc->ctx, JSON_T_KEY,
&value ) )
{
return false;
}
}
parse_buffer_clear( jc );
break;
case MODE_ARRAY:
case MODE_OBJECT:
assert( jc->type == JSON_T_STRING );
if( !parse_parse_buffer( jc ) )
{
return false;
}
jc->type = JSON_T_NONE;
jc->state = OK;
break;
default:
return false;
}
break;
/* , */ case - 3:
parse_buffer_pop_back_char( jc );
if( !parse_parse_buffer( jc ) )
{
return false;
}
switch( jc->stack[jc->top] )
{
case MODE_OBJECT:
2008-05-11 22:42:53 +00:00
/*
A comma causes a flip from object mode to key mode.
*/
if( !pop( jc, MODE_OBJECT ) || !push( jc, MODE_KEY ) )
{
return false;
}
assert( jc->type != JSON_T_STRING );
jc->type = JSON_T_NONE;
jc->state = KE;
break;
case MODE_ARRAY:
assert( jc->type != JSON_T_STRING );
jc->type = JSON_T_NONE;
jc->state = VA;
break;
default:
return false;
2008-05-11 22:42:53 +00:00
}
break;
/* : */ case - 2:
2008-05-11 22:42:53 +00:00
/*
A colon causes a flip from key mode to object mode.
*/
parse_buffer_pop_back_char( jc );
if( !pop( jc, MODE_KEY ) || !push( jc, MODE_OBJECT ) )
{
return false;
}
assert( jc->type == JSON_T_NONE );
jc->state = VA;
break;
/*
Bad action.
*/
default:
2008-05-11 22:42:53 +00:00
return false;
}
}
return true;
}
int
JSON_parser_done( JSON_parser jc )
2008-05-11 22:42:53 +00:00
{
const int result = jc->state == OK && pop( jc, MODE_DONE );
2008-05-11 22:42:53 +00:00
return result;
}
int
JSON_parser_is_legal_white_space_string( const char* s )
2008-05-11 22:42:53 +00:00
{
int c, char_class;
if( s == NULL )
{
2008-05-11 22:42:53 +00:00
return false;
}
for( ; *s; ++s )
{
2008-05-11 22:42:53 +00:00
c = *s;
if( c < 0 || c >= 128 )
{
2008-05-11 22:42:53 +00:00
return false;
}
2008-05-11 22:42:53 +00:00
char_class = ascii_class[c];
if( char_class != C_SPACE && char_class != C_WHITE )
{
2008-05-11 22:42:53 +00:00
return false;
}
}
2008-05-11 22:42:53 +00:00
return true;
}
void
init_JSON_config( JSON_config* config )
{
if( config )
{
memset( config, 0, sizeof( *config ) );
config->depth = JSON_PARSER_STACK_SIZE - 1;
}
}