diff --git a/libtransmission/jsonsl.c b/libtransmission/jsonsl.c index 9cebd8c44..a257d2323 100644 --- a/libtransmission/jsonsl.c +++ b/libtransmission/jsonsl.c @@ -1,31 +1,12 @@ -/* - * jsonsl - * https://github.com/mnunberg/jsonsl +/* https://github.com/mnunberg/jsonsl */ + +/* Copyright (C) 2012-2015 Mark Nunberg. * - * Copyright (c) 2012 M. Nunberg, mnunberg@haskalah.org - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * See included LICENSE file for license details. */ #include "jsonsl.h" -#include +#include #include #ifdef JSONSL_USE_METRICS @@ -41,6 +22,7 @@ X(STRUCTURAL_TOKEN) \ X(SPECIAL_SWITCHFIRST) \ X(STRINGY_CATCH) \ + X(NUMBER_FASTPATH) \ X(ESCAPES) \ X(TOTAL) \ @@ -110,71 +92,47 @@ case '8': \ case '9': \ case '0': - - -/** - * This table (predeclared) contains characters which are recognized - * non-string values. - */ -static jsonsl_special_t *Special_table; -#define extract_special(c) \ - Special_table[(unsigned int)(c & 0xff)] - -/** - * This table (predeclared) contains the tokens and other characters - * which signal the termination of the non-string values. - */ -static int *Special_Endings; -#define is_special_end(c) \ - Special_Endings[(unsigned int)c & 0xff] - -/** - * This table contains entries for the allowed whitespace - * as per RFC 4627 - */ -static int *Allowed_Whitespace; -#define is_allowed_whitespace(c) \ - (c == ' ' || Allowed_Whitespace[(unsigned int)c & 0xff]) - - -/** - * This table contains allowed two-character escapes - * as per the RFC - */ -static int *Allowed_Escapes; -#define is_allowed_escape(c) \ - Allowed_Escapes[(unsigned int)c & 0xff] +static unsigned extract_special(unsigned); +static int is_special_end(unsigned); +static int is_allowed_whitespace(unsigned); +static int is_allowed_escape(unsigned); +static int is_simple_char(unsigned); +static char get_escape_equiv(unsigned); JSONSL_API jsonsl_t jsonsl_new(int nlevels) { - struct jsonsl_st *jsn = + unsigned int ii; + struct jsonsl_st * jsn; + + if (nlevels < 2) { + return NULL; + } + + jsn = (struct jsonsl_st *) calloc(1, sizeof (*jsn) + ( (nlevels-1) * sizeof (struct jsonsl_state_st) ) ); - jsn->levels_max = nlevels; - jsn->max_callback_level = -1; + jsn->levels_max = (unsigned int) nlevels; + jsn->max_callback_level = UINT_MAX; jsonsl_reset(jsn); + for (ii = 0; ii < jsn->levels_max; ii++) { + jsn->stack[ii].level = ii; + } return jsn; } JSONSL_API void jsonsl_reset(jsonsl_t jsn) { - unsigned int ii; jsn->tok_last = 0; jsn->can_insert = 1; jsn->pos = 0; jsn->level = 0; + jsn->stopfl = 0; jsn->in_escape = 0; jsn->expecting = 0; - - memset(jsn->stack, 0, (jsn->levels_max * sizeof (struct jsonsl_state_st))); - - for (ii = 0; ii < jsn->levels_max; ii++) { - jsn->stack[ii].level = ii; - } } JSONSL_API @@ -185,6 +143,80 @@ void jsonsl_destroy(jsonsl_t jsn) } } + +#define FASTPARSE_EXHAUSTED 1 +#define FASTPARSE_BREAK 0 + +/* + * This function is meant to accelerate string parsing, reducing the main loop's + * check if we are indeed a string. + * + * @param jsn the parser + * @param[in,out] bytes_p A pointer to the current buffer (i.e. current position) + * @param[in,out] nbytes_p A pointer to the current size of the buffer + * @return true if all bytes have been exhausted (and thus the main loop can + * return), false if a special character was examined which requires greater + * examination. + */ +static int +jsonsl__str_fastparse(jsonsl_t jsn, + const jsonsl_uchar_t **bytes_p, size_t *nbytes_p) +{ + const jsonsl_uchar_t *bytes = *bytes_p; + const jsonsl_uchar_t *end; + for (end = bytes + *nbytes_p; bytes != end; bytes++) { + if ( +#ifdef JSONSL_USE_WCHAR + *bytes >= 0x100 || +#endif /* JSONSL_USE_WCHAR */ + (is_simple_char(*bytes))) { + INCR_METRIC(TOTAL); + INCR_METRIC(STRINGY_INSIGNIFICANT); + } else { + /* Once we're done here, re-calculate the position variables */ + jsn->pos += (bytes - *bytes_p); + *nbytes_p -= (bytes - *bytes_p); + *bytes_p = bytes; + return FASTPARSE_BREAK; + } + } + + /* Once we're done here, re-calculate the position variables */ + jsn->pos += (bytes - *bytes_p); + return FASTPARSE_EXHAUSTED; +} + +/* Functions exactly like str_fastparse, except it also accepts a 'state' + * argument, since the number's value is updated in the state. */ +static int +jsonsl__num_fastparse(jsonsl_t jsn, + const jsonsl_uchar_t **bytes_p, size_t *nbytes_p, + struct jsonsl_state_st *state) +{ + int exhausted = 1; + size_t nbytes = *nbytes_p; + const jsonsl_uchar_t *bytes = *bytes_p; + + for (; nbytes; nbytes--, bytes++) { + jsonsl_uchar_t c = *bytes; + if (isdigit(c)) { + INCR_METRIC(TOTAL); + INCR_METRIC(NUMBER_FASTPATH); + state->nelem = (state->nelem * 10) + (c - 0x30); + } else { + exhausted = 0; + break; + } + } + jsn->pos += (*nbytes_p - nbytes); + if (exhausted) { + return FASTPARSE_EXHAUSTED; + } + *nbytes_p = nbytes; + *bytes_p = bytes; + return FASTPARSE_BREAK; +} + JSONSL_API void jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) @@ -241,6 +273,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) } else if (jsn->action_callback) { \ jsn->action_callback(jsn, JSONSL_ACTION_##action, state, (jsonsl_char_t*)c); \ } \ + if (jsn->stopfl) { return; } \ } /** @@ -256,69 +289,143 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) INVOKE_ERROR(SPECIAL_EXPECTED); \ } +#define VERIFY_SPECIAL_CI(lit) \ + if (tolower(CUR_CHAR) != (lit)[jsn->pos - state->pos_begin]) { \ + INVOKE_ERROR(SPECIAL_EXPECTED); \ + } + +#define STATE_SPECIAL_LENGTH \ + (state)->nescapes + +#define IS_NORMAL_NUMBER \ + ((state)->special_flags == JSONSL_SPECIALf_UNSIGNED || \ + (state)->special_flags == JSONSL_SPECIALf_SIGNED) + +#define STATE_NUM_LAST jsn->tok_last + +#define CONTINUE_NEXT_CHAR() continue + const jsonsl_uchar_t *c = (jsonsl_uchar_t*)bytes; size_t levels_max = jsn->levels_max; struct jsonsl_state_st *state = jsn->stack + jsn->level; - static int chrt_string_nopass[0x100] = { JSONSL_CHARTABLE_string_nopass }; jsn->base = bytes; for (; nbytes; nbytes--, jsn->pos++, c++) { - register jsonsl_type_t state_type; + unsigned state_type; INCR_METRIC(TOTAL); - /* Special escape handling for some stuff */ - if (jsn->in_escape) { - jsn->in_escape = 0; - if (!is_allowed_escape(CUR_CHAR)) { - INVOKE_ERROR(ESCAPE_INVALID); - } else if (CUR_CHAR == 'u') { - DO_CALLBACK(UESCAPE, UESCAPE); - if (jsn->return_UESCAPE) { - return; - } - } - goto GT_NEXT; - } + GT_AGAIN: - /** - * Several fast-tracks for common cases: - */ state_type = state->type; + /* Most common type is typically a string: */ if (state_type & JSONSL_Tf_STRINGY) { - /* check if our character cannot ever change our current string state - * or throw an error - */ - if ( -#ifdef JSONSL_USE_WCHAR - CUR_CHAR >= 0x100 || -#endif /* JSONSL_USE_WCHAR */ - (!chrt_string_nopass[CUR_CHAR & 0xff])) { - INCR_METRIC(STRINGY_INSIGNIFICANT); - goto GT_NEXT; - } else if (CUR_CHAR == '"') { - goto GT_QUOTE; - } else if (CUR_CHAR == '\\') { - goto GT_ESCAPE; + /* Special escape handling for some stuff */ + if (jsn->in_escape) { + jsn->in_escape = 0; + if (!is_allowed_escape(CUR_CHAR)) { + INVOKE_ERROR(ESCAPE_INVALID); + } else if (CUR_CHAR == 'u') { + DO_CALLBACK(UESCAPE, UESCAPE); + if (jsn->return_UESCAPE) { + return; + } + } + CONTINUE_NEXT_CHAR(); + } + + if (jsonsl__str_fastparse(jsn, &c, &nbytes) == + FASTPARSE_EXHAUSTED) { + /* No need to readjust variables as we've exhausted the iterator */ + return; } else { - INVOKE_ERROR(WEIRD_WHITESPACE); + if (CUR_CHAR == '"') { + goto GT_QUOTE; + } else if (CUR_CHAR == '\\') { + goto GT_ESCAPE; + } else { + INVOKE_ERROR(WEIRD_WHITESPACE); + } } INCR_METRIC(STRINGY_SLOWPATH); } else if (state_type == JSONSL_T_SPECIAL) { - if (state->special_flags & JSONSL_SPECIALf_NUMERIC) { + /* Fast track for signed/unsigned */ + if (IS_NORMAL_NUMBER) { + if (jsonsl__num_fastparse(jsn, &c, &nbytes, state) == + FASTPARSE_EXHAUSTED) { + return; + } else { + goto GT_SPECIAL_NUMERIC; + } + } else if (state->special_flags == JSONSL_SPECIALf_DASH) { +#ifdef JSONSL_PARSE_NAN + if (CUR_CHAR == 'I' || CUR_CHAR == 'i') { + /* parsing -Infinity? */ + state->special_flags = JSONSL_SPECIALf_NEG_INF; + CONTINUE_NEXT_CHAR(); + } +#endif + + if (!isdigit(CUR_CHAR)) { + INVOKE_ERROR(INVALID_NUMBER); + } + + if (CUR_CHAR == '0') { + state->special_flags = JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED; + } else if (isdigit(CUR_CHAR)) { + state->special_flags = JSONSL_SPECIALf_SIGNED; + state->nelem = CUR_CHAR - 0x30; + } else { + INVOKE_ERROR(INVALID_NUMBER); + } + CONTINUE_NEXT_CHAR(); + + } else if (state->special_flags == JSONSL_SPECIALf_ZERO) { + if (isdigit(CUR_CHAR)) { + /* Following a zero! */ + INVOKE_ERROR(INVALID_NUMBER); + } + /* Unset the 'zero' flag: */ + if (state->special_flags & JSONSL_SPECIALf_SIGNED) { + state->special_flags = JSONSL_SPECIALf_SIGNED; + } else { + state->special_flags = JSONSL_SPECIALf_UNSIGNED; + } + goto GT_SPECIAL_NUMERIC; + } + + if ((state->special_flags & JSONSL_SPECIALf_NUMERIC) && + !(state->special_flags & JSONSL_SPECIALf_INF)) { + GT_SPECIAL_NUMERIC: switch (CUR_CHAR) { CASE_DIGITS - state->nelem = (state->nelem*10) + (CUR_CHAR-0x30); - goto GT_NEXT; + STATE_NUM_LAST = '1'; + CONTINUE_NEXT_CHAR(); + + case '.': + if (state->special_flags & JSONSL_SPECIALf_FLOAT) { + INVOKE_ERROR(INVALID_NUMBER); + } + state->special_flags |= JSONSL_SPECIALf_FLOAT; + STATE_NUM_LAST = '.'; + CONTINUE_NEXT_CHAR(); case 'e': case 'E': + if (state->special_flags & JSONSL_SPECIALf_EXPONENT) { + INVOKE_ERROR(INVALID_NUMBER); + } + state->special_flags |= JSONSL_SPECIALf_EXPONENT; + STATE_NUM_LAST = 'e'; + CONTINUE_NEXT_CHAR(); + case '-': case '+': - state->special_flags |= JSONSL_SPECIALf_EXPONENT; - goto GT_NEXT; - case '.': - state->special_flags |= JSONSL_SPECIALf_FLOAT; - goto GT_NEXT; + if (STATE_NUM_LAST != 'e') { + INVOKE_ERROR(INVALID_NUMBER); + } + STATE_NUM_LAST = '-'; + CONTINUE_NEXT_CHAR(); + default: if (is_special_end(CUR_CHAR)) { goto GT_SPECIAL_POP; @@ -329,6 +436,8 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) } /* else if (!NUMERIC) */ if (!is_special_end(CUR_CHAR)) { + STATE_SPECIAL_LENGTH++; + /* Verify TRUE, FALSE, NULL */ if (state->special_flags == JSONSL_SPECIALf_TRUE) { VERIFY_SPECIAL("true"); @@ -336,16 +445,68 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) VERIFY_SPECIAL("false"); } else if (state->special_flags == JSONSL_SPECIALf_NULL) { VERIFY_SPECIAL("null"); +#ifdef JSONSL_PARSE_NAN + } else if (state->special_flags == JSONSL_SPECIALf_POS_INF) { + VERIFY_SPECIAL_CI("infinity"); + } else if (state->special_flags == JSONSL_SPECIALf_NEG_INF) { + VERIFY_SPECIAL_CI("-infinity"); + } else if (state->special_flags == JSONSL_SPECIALf_NAN) { + VERIFY_SPECIAL_CI("nan"); + } else if (state->special_flags & JSONSL_SPECIALf_NULL || + state->special_flags & JSONSL_SPECIALf_NAN) { + /* previous char was "n", are we parsing null or nan? */ + if (CUR_CHAR != 'u') { + state->special_flags &= ~JSONSL_SPECIALf_NULL; + } + + if (tolower(CUR_CHAR) != 'a') { + state->special_flags &= ~JSONSL_SPECIALf_NAN; + } +#endif } INCR_METRIC(SPECIAL_FASTPATH); - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); } GT_SPECIAL_POP: + jsn->can_insert = 0; + if (IS_NORMAL_NUMBER) { + /* Nothing */ + } else if (state->special_flags == JSONSL_SPECIALf_ZERO || + state->special_flags == (JSONSL_SPECIALf_ZERO|JSONSL_SPECIALf_SIGNED)) { + /* 0 is unsigned! */ + state->special_flags = JSONSL_SPECIALf_UNSIGNED; + } else if (state->special_flags == JSONSL_SPECIALf_DASH) { + /* Still in dash! */ + INVOKE_ERROR(INVALID_NUMBER); + } else if (state->special_flags & JSONSL_SPECIALf_INF) { + if (STATE_SPECIAL_LENGTH != 8) { + INVOKE_ERROR(SPECIAL_INCOMPLETE); + } + state->nelem = 1; + } else if (state->special_flags & JSONSL_SPECIALf_NUMERIC) { + /* Check that we're not at the end of a token */ + if (STATE_NUM_LAST != '1') { + INVOKE_ERROR(INVALID_NUMBER); + } + } else if (state->special_flags == JSONSL_SPECIALf_TRUE) { + if (STATE_SPECIAL_LENGTH != 4) { + INVOKE_ERROR(SPECIAL_INCOMPLETE); + } + state->nelem = 1; + } else if (state->special_flags == JSONSL_SPECIALf_FALSE) { + if (STATE_SPECIAL_LENGTH != 5) { + INVOKE_ERROR(SPECIAL_INCOMPLETE); + } + } else if (state->special_flags == JSONSL_SPECIALf_NULL) { + if (STATE_SPECIAL_LENGTH != 4) { + INVOKE_ERROR(SPECIAL_INCOMPLETE); + } + } SPECIAL_POP; jsn->expecting = ','; if (is_allowed_whitespace(CUR_CHAR)) { - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); } /** * This works because we have a non-whitespace token @@ -360,7 +521,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) /* So we're not special. Harmless insignificant whitespace * passthrough */ - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); } else if (extract_special(CUR_CHAR)) { /* not a string, whitespace, or structural token. must be special */ goto GT_SPECIAL_BEGIN; @@ -376,10 +537,10 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) /* the end of a string or hash key */ case JSONSL_T_STRING: CALLBACK_AND_POP(STRING); - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); case JSONSL_T_HKEY: CALLBACK_AND_POP(HKEY); - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); case JSONSL_T_OBJECT: state->nelem++; @@ -407,7 +568,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) state->type = JSONSL_T_HKEY; DO_CALLBACK(HKEY, PUSH); } - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); case JSONSL_T_LIST: state->nelem++; @@ -416,7 +577,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) jsn->expecting = ','; jsn->tok_last = 0; DO_CALLBACK(STRING, PUSH); - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); case JSONSL_T_SPECIAL: INVOKE_ERROR(STRAY_TOKEN); @@ -435,7 +596,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) } state->nescapes++; jsn->in_escape = 1; - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); } /* " or \ */ GT_STRUCTURAL_TOKEN: @@ -448,7 +609,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) jsn->tok_last = ':'; jsn->can_insert = 1; jsn->expecting = '"'; - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); case ',': INCR_METRIC(STRUCTURAL_TOKEN); @@ -474,7 +635,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) jsn->tok_last = ','; jsn->expecting = '"'; - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); /* new list or object */ /* hashes are more common */ @@ -503,7 +664,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) DO_CALLBACK(LIST, PUSH); } jsn->tok_last = 0; - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); /* closing of list or object */ case '}': @@ -525,12 +686,14 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) } else { if (state->type != '{') { INVOKE_ERROR(BRACKET_MISMATCH); + } else if (state->nelem && state->nelem % 2 != 0) { + INVOKE_ERROR(VALUE_EXPECTED); } DO_CALLBACK(OBJECT, POP); } state = jsn->stack + jsn->level; state->pos_cur = jsn->pos; - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); default: GT_SPECIAL_BEGIN: @@ -563,24 +726,28 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes) STACK_PUSH; state->type = JSONSL_T_SPECIAL; state->special_flags = special_flags; + STATE_SPECIAL_LENGTH = 1; + if (special_flags == JSONSL_SPECIALf_UNSIGNED) { state->nelem = CUR_CHAR - 0x30; + STATE_NUM_LAST = '1'; } else { + STATE_NUM_LAST = '-'; state->nelem = 0; } DO_CALLBACK(SPECIAL, PUSH); } - goto GT_NEXT; + CONTINUE_NEXT_CHAR(); } - - GT_NEXT: - continue; } } JSONSL_API const char* jsonsl_strerror(jsonsl_error_t err) { + if (err == JSONSL_ERROR_SUCCESS) { + return "SUCCESS"; + } #define X(t) \ if (err == JSONSL_ERROR_##t) \ return #t; @@ -704,20 +871,21 @@ JSONSL_API jsonsl_jpr_t jsonsl_jpr_new(const char *path, jsonsl_error_t *errp) { - char *my_copy; + char *my_copy = NULL; int count, curidx; - struct jsonsl_jpr_st *ret; - struct jsonsl_jpr_component_st *components; + struct jsonsl_jpr_st *ret = NULL; + struct jsonsl_jpr_component_st *components = NULL; size_t origlen; jsonsl_error_t errstacked; +#define JPR_BAIL(err) *errp = err; goto GT_ERROR; + if (errp == NULL) { errp = &errstacked; } if (path == NULL || *path != '/') { - *errp = JSONSL_ERROR_JPR_NOROOT; - return NULL; + JPR_BAIL(JSONSL_ERROR_JPR_NOROOT); } count = 1; @@ -728,8 +896,7 @@ jsonsl_jpr_new(const char *path, jsonsl_error_t *errp) if (*c == '/') { count++; if (*(c+1) == '/') { - *errp = JSONSL_ERROR_JPR_DUPSLASH; - return NULL; + JPR_BAIL(JSONSL_ERROR_JPR_DUPSLASH); } } } @@ -738,8 +905,17 @@ jsonsl_jpr_new(const char *path, jsonsl_error_t *errp) count++; } - components = malloc(sizeof(*components) * count); - my_copy = malloc(strlen(path) + 1); + components = (struct jsonsl_jpr_component_st *) + malloc(sizeof(*components) * count); + if (!components) { + JPR_BAIL(JSONSL_ERROR_ENOMEM); + } + + my_copy = (char *)malloc(strlen(path) + 1); + if (!my_copy) { + JPR_BAIL(JSONSL_ERROR_ENOMEM); + } + strcpy(my_copy, path); components[0].ptype = JSONSL_PATH_ROOT; @@ -748,7 +924,7 @@ jsonsl_jpr_new(const char *path, jsonsl_error_t *errp) char *cur = my_copy; int pathret = JSONSL_PATH_STRING; curidx = 1; - while (pathret > 0 && curidx < count) { + while (curidx < count) { pathret = populate_component(cur, components + curidx, &cur, errp); if (pathret > 0) { curidx++; @@ -758,9 +934,7 @@ jsonsl_jpr_new(const char *path, jsonsl_error_t *errp) } if (pathret == JSONSL_PATH_INVALID) { - free(components); - free(my_copy); - return NULL; + JPR_BAIL(JSONSL_ERROR_JPR_BADPATH); } } else { curidx = 1; @@ -768,15 +942,31 @@ jsonsl_jpr_new(const char *path, jsonsl_error_t *errp) path--; /*revert path to leading '/' */ origlen = strlen(path) + 1; - ret = malloc(sizeof(*ret)); + ret = (struct jsonsl_jpr_st *)malloc(sizeof(*ret)); + if (!ret) { + JPR_BAIL(JSONSL_ERROR_ENOMEM); + } + ret->orig = (char *)malloc(origlen); + if (!ret->orig) { + JPR_BAIL(JSONSL_ERROR_ENOMEM); + } ret->components = components; ret->ncomponents = curidx; ret->basestr = my_copy; - ret->orig = malloc(origlen); ret->norig = origlen-1; strcpy(ret->orig, path); return ret; + + GT_ERROR: + free(my_copy); + free(components); + if (ret) { + free(ret->orig); + } + free(ret); + return NULL; +#undef JPR_BAIL } void jsonsl_jpr_destroy(jsonsl_jpr_t jpr) @@ -787,10 +977,82 @@ void jsonsl_jpr_destroy(jsonsl_jpr_t jpr) free(jpr); } +/** + * Call when there is a possibility of a match, either as a final match or + * as a path within a match + * @param jpr The JPR path + * @param component Component corresponding to the current element + * @param prlevel The level of the *parent* + * @param chtype The type of the child + * @return Match status + */ +static jsonsl_jpr_match_t +jsonsl__match_continue(jsonsl_jpr_t jpr, + const struct jsonsl_jpr_component_st *component, + unsigned prlevel, unsigned chtype) +{ + const struct jsonsl_jpr_component_st *next_comp = component + 1; + if (prlevel == jpr->ncomponents - 1) { + /* This is the match. Check the expected type of the match against + * the child */ + if (jpr->match_type == 0 || jpr->match_type == chtype) { + return JSONSL_MATCH_COMPLETE; + } else { + return JSONSL_MATCH_TYPE_MISMATCH; + } + } + if (chtype == JSONSL_T_LIST) { + if (next_comp->ptype == JSONSL_PATH_NUMERIC) { + return JSONSL_MATCH_POSSIBLE; + } else { + return JSONSL_MATCH_TYPE_MISMATCH; + } + } else if (chtype == JSONSL_T_OBJECT) { + if (next_comp->ptype == JSONSL_PATH_NUMERIC) { + return JSONSL_MATCH_TYPE_MISMATCH; + } else { + return JSONSL_MATCH_POSSIBLE; + } + } else { + return JSONSL_MATCH_TYPE_MISMATCH; + } +} + +JSONSL_API +jsonsl_jpr_match_t +jsonsl_path_match(jsonsl_jpr_t jpr, + const struct jsonsl_state_st *parent, + const struct jsonsl_state_st *child, + const char *key, size_t nkey) +{ + const struct jsonsl_jpr_component_st *comp; + if (!parent) { + /* No parent. Return immediately since it's always a match */ + return jsonsl__match_continue(jpr, jpr->components, 0, child->type); + } + + comp = jpr->components + parent->level; + + /* note that we don't need to verify the type of the match, this is + * always done through the previous call to jsonsl__match_continue. + * If we are in a POSSIBLE tree then we can be certain the types (at + * least at this level) are correct */ + if (parent->type == JSONSL_T_OBJECT) { + if (comp->len != nkey || strncmp(key, comp->pstr, nkey) != 0) { + return JSONSL_MATCH_NOMATCH; + } + } else { + if (comp->idx != parent->nelem - 1) { + return JSONSL_MATCH_NOMATCH; + } + } + return jsonsl__match_continue(jpr, comp, parent->level, child->type); +} + JSONSL_API jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr, - jsonsl_type_t parent_type, + unsigned int parent_type, unsigned int parent_level, const char *key, size_t nkey) @@ -822,18 +1084,29 @@ jsonsl_jpr_match(jsonsl_jpr_t jpr, } } - /* Check numeric array index */ - if (p_component->ptype == JSONSL_PATH_NUMERIC - && parent_type == JSONSL_T_LIST) { - if (p_component->idx != nkey) { - return JSONSL_MATCH_NOMATCH; - } else { - if (parent_level == jpr->ncomponents-1) { - return JSONSL_MATCH_COMPLETE; + /* Check numeric array index. This gets its special block so we can avoid + * string comparisons */ + if (p_component->ptype == JSONSL_PATH_NUMERIC) { + if (parent_type == JSONSL_T_LIST) { + if (p_component->idx != nkey) { + /* Wrong index */ + return JSONSL_MATCH_NOMATCH; } else { - return JSONSL_MATCH_POSSIBLE; + if (parent_level == jpr->ncomponents-1) { + /* This is the last element of the path */ + return JSONSL_MATCH_COMPLETE; + } else { + /* Intermediate element */ + return JSONSL_MATCH_POSSIBLE; + } } + } else if (p_component->is_arridx) { + /* Numeric and an array index (set explicitly by user). But not + * a list for a parent */ + return JSONSL_MATCH_TYPE_MISMATCH; } + } else if (parent_type == JSONSL_T_LIST) { + return JSONSL_MATCH_TYPE_MISMATCH; } /* Check lengths */ @@ -843,9 +1116,7 @@ jsonsl_jpr_match(jsonsl_jpr_t jpr, /* Check string comparison */ cmpret = strncmp(p_component->pstr, key, nkey); - if (cmpret != 0) { - return JSONSL_MATCH_NOMATCH; - } else { + if (cmpret == 0) { if (parent_level == jpr->ncomponents-1) { return JSONSL_MATCH_COMPLETE; } else { @@ -853,8 +1124,6 @@ jsonsl_jpr_match(jsonsl_jpr_t jpr, } } - /* Never reached, but make the compiler happy */ - abort(); return JSONSL_MATCH_NOMATCH; } @@ -867,9 +1136,9 @@ void jsonsl_jpr_match_state_init(jsonsl_t jsn, if (njprs == 0) { return; } - jsn->jprs = malloc(sizeof(jsonsl_jpr_t) * njprs); + jsn->jprs = (jsonsl_jpr_t *)malloc(sizeof(jsonsl_jpr_t) * njprs); jsn->jpr_count = njprs; - jsn->jpr_root = calloc(1, sizeof(size_t) * njprs * jsn->levels_max); + jsn->jpr_root = (size_t*)calloc(1, sizeof(size_t) * njprs * jsn->levels_max); memcpy(jsn->jprs, jprs, sizeof(jsonsl_jpr_t) * njprs); /* Set the initial jump table values */ @@ -984,11 +1253,68 @@ const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match) #endif /* JSONSL_WITH_JPR */ -/** - * Maps literal escape sequences with special meaning to their - * actual control codes (e.g.\n => 0x20) - */ -static unsigned char *Escape_Maps; +static char * +jsonsl__writeutf8(uint32_t pt, char *out) +{ + #define ADD_OUTPUT(c) *out = (char)(c); out++; + + if (pt < 0x80) { + ADD_OUTPUT(pt); + } else if (pt < 0x800) { + ADD_OUTPUT((pt >> 6) | 0xC0); + ADD_OUTPUT((pt & 0x3F) | 0x80); + } else if (pt < 0x10000) { + ADD_OUTPUT((pt >> 12) | 0xE0); + ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80); + ADD_OUTPUT((pt & 0x3F) | 0x80); + } else { + ADD_OUTPUT((pt >> 18) | 0xF0); + ADD_OUTPUT(((pt >> 12) & 0x3F) | 0x80); + ADD_OUTPUT(((pt >> 6) & 0x3F) | 0x80); + ADD_OUTPUT((pt & 0x3F) | 0x80); + } + return out; + #undef ADD_OUTPUT +} + +/* Thanks snej (https://github.com/mnunberg/jsonsl/issues/9) */ +static int +jsonsl__digit2int(char ch) { + int d = ch - '0'; + if ((unsigned) d < 10) { + return d; + } + d = ch - 'a'; + if ((unsigned) d < 6) { + return d + 10; + } + d = ch - 'A'; + if ((unsigned) d < 6) { + return d + 10; + } + return -1; +} + +/* Assume 's' is at least 4 bytes long */ +static int +jsonsl__get_uescape_16(const char *s) +{ + int ret = 0; + int cur; + + #define GET_DIGIT(off) \ + cur = jsonsl__digit2int(s[off]); \ + if (cur == -1) { return -1; } \ + ret |= (cur << (12 - (off * 4))); + + GET_DIGIT(0); + GET_DIGIT(1); + GET_DIGIT(2); + GET_DIGIT(3); + #undef GET_DIGIT + return ret; +} + /** * Utility function to convert escape sequences */ @@ -997,33 +1323,29 @@ size_t jsonsl_util_unescape_ex(const char *in, char *out, size_t len, const int toEscape[128], - jsonsl_special_t *oflags, + unsigned *oflags, jsonsl_error_t *err, const char **errat) { const unsigned char *c = (const unsigned char*)in; - int in_escape = 0; - size_t origlen = len; - /* difference between the length of the input buffer and the output buffer */ - size_t ndiff = 0; - if (oflags) { - *oflags = 0; + char *begin_p = out; + unsigned oflags_s; + uint16_t last_codepoint = 0; + + if (!oflags) { + oflags = &oflags_s; } -#define UNESCAPE_BAIL(e,offset) \ - *err = JSONSL_ERROR_##e; \ - if (errat) { \ - *errat = (const char*)(c+ (ptrdiff_t)(offset)); \ - } \ - return 0; + *oflags = 0; + + #define UNESCAPE_BAIL(e,offset) \ + *err = JSONSL_ERROR_##e; \ + if (errat) { \ + *errat = (const char*)(c+ (ptrdiff_t)(offset)); \ + } \ + return 0; for (; len; len--, c++, out++) { - unsigned int uesc_val[2]; - if (in_escape) { - /* inside a previously ignored escape. Ignore */ - in_escape = 0; - goto GT_ASSIGN; - } - + int uescval; if (*c != '\\') { /* Not an escape, so we don't care about this */ goto GT_ASSIGN; @@ -1035,12 +1357,11 @@ size_t jsonsl_util_unescape_ex(const char *in, if (!is_allowed_escape(c[1])) { UNESCAPE_BAIL(ESCAPE_INVALID, 1) } - if ((toEscape[(unsigned char)c[1] & 0x7f] == 0 && + if ((toEscape && toEscape[(unsigned char)c[1] & 0x7f] == 0 && c[1] != '\\' && c[1] != '"')) { - /* if we don't want to unescape this string, just continue with - * the escape flag set - */ - in_escape = 1; + /* if we don't want to unescape this string, write the escape sequence to the output */ + *out++ = *c++; + --len; goto GT_ASSIGN; } @@ -1049,15 +1370,15 @@ size_t jsonsl_util_unescape_ex(const char *in, * TODO: should the maps actually reflect the desired * replacement character in toEscape? */ - if (Escape_Maps[c[1]]) { + char esctmp = get_escape_equiv(c[1]); + if (esctmp) { /* Check if there is a corresponding replacement */ - *out = Escape_Maps[c[1]]; + *out = esctmp; } else { /* Just gobble up the 'reverse-solidus' */ *out = c[1]; } len--; - ndiff++; c++; /* do not assign, just continue */ continue; @@ -1065,49 +1386,59 @@ size_t jsonsl_util_unescape_ex(const char *in, /* next == 'u' */ if (len < 6) { - /* Need at least six characters: - * { [0] = '\\', [1] = 'u', [2] = 'f', [3] = 'f', [4] = 'f', [5] = 'f' } - */ - UNESCAPE_BAIL(UESCAPE_TOOSHORT, -1); + /* Need at least six characters.. */ + UNESCAPE_BAIL(UESCAPE_TOOSHORT, 2); } - if (sscanf((const char*)(c+2), "%02x%02x", uesc_val, uesc_val+1) != 2) { - /* We treat the sequence as two octets */ - UNESCAPE_BAIL(UESCAPE_TOOSHORT, -1); + uescval = jsonsl__get_uescape_16((const char *)c + 2); + if (uescval == -1) { + UNESCAPE_BAIL(PERCENT_BADHEX, -1); } - /* By now, we gobble up all the six bytes (current implied + 5 next - * characters), and have at least four missing bytes from the output - * buffer. - */ - len -= 5; - c += 5; + if (last_codepoint) { + uint16_t w1 = last_codepoint, w2 = (uint16_t)uescval; + uint32_t cp; - ndiff += 4; - if (uesc_val[0] == 0) { - /* only one byte is extracted from the two - * possible octets. Increment the diff counter by one. - */ - *out = uesc_val[1]; - if (oflags && *(unsigned char*)out > 0x7f) { - *oflags |= JSONSL_SPECIALf_NONASCII; + if (uescval < 0xDC00 || uescval > 0xDFFF) { + UNESCAPE_BAIL(INVALID_CODEPOINT, -1); } - ndiff++; + + cp = (w1 & 0x3FF) << 10; + cp |= (w2 & 0x3FF); + cp += 0x10000; + + out = jsonsl__writeutf8(cp, out) - 1; + last_codepoint = 0; + + } else if (uescval < 0xD800 || uescval > 0xDFFF) { + *oflags |= JSONSL_SPECIALf_NONASCII; + out = jsonsl__writeutf8(uescval, out) - 1; + + } else if (uescval < 0xDC00) { + *oflags |= JSONSL_SPECIALf_NONASCII; + last_codepoint = (uint16_t)uescval; + out--; } else { - *(out++) = uesc_val[0]; - *out = uesc_val[1]; - if (oflags && (uesc_val[0] > 0x7f || uesc_val[1] > 0x7f)) { - *oflags |= JSONSL_SPECIALf_NONASCII; - } + UNESCAPE_BAIL(INVALID_CODEPOINT, 2); } + + /* Post uescape cleanup */ + len -= 5; /* Gobble up 5 chars after 'u' */ + c += 5; continue; /* Only reached by previous branches */ GT_ASSIGN: *out = *c; } + + if (last_codepoint) { + *err = JSONSL_ERROR_INVALID_CODEPOINT; + return 0; + } + *err = JSONSL_ERROR_SUCCESS; - return origlen - ndiff; + return out - begin_p; } /** @@ -1119,41 +1450,45 @@ size_t jsonsl_util_unescape_ex(const char *in, * This table contains the beginnings of non-string * allowable (bareword) values. */ -static jsonsl_special_t _special_table[0x100] = { +static unsigned short Special_Table[0x100] = { /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2c */ - /* 0x2d */ JSONSL_SPECIALf_SIGNED /* - */, /* 0x2d */ + /* 0x2d */ JSONSL_SPECIALf_DASH /* <-> */, /* 0x2d */ /* 0x2e */ 0,0, /* 0x2f */ - /* 0x30 */ JSONSL_SPECIALf_UNSIGNED /* 0 */, /* 0x30 */ - /* 0x31 */ JSONSL_SPECIALf_UNSIGNED /* 1 */, /* 0x31 */ - /* 0x32 */ JSONSL_SPECIALf_UNSIGNED /* 2 */, /* 0x32 */ - /* 0x33 */ JSONSL_SPECIALf_UNSIGNED /* 3 */, /* 0x33 */ - /* 0x34 */ JSONSL_SPECIALf_UNSIGNED /* 4 */, /* 0x34 */ - /* 0x35 */ JSONSL_SPECIALf_UNSIGNED /* 5 */, /* 0x35 */ - /* 0x36 */ JSONSL_SPECIALf_UNSIGNED /* 6 */, /* 0x36 */ - /* 0x37 */ JSONSL_SPECIALf_UNSIGNED /* 7 */, /* 0x37 */ - /* 0x38 */ JSONSL_SPECIALf_UNSIGNED /* 8 */, /* 0x38 */ - /* 0x39 */ JSONSL_SPECIALf_UNSIGNED /* 9 */, /* 0x39 */ - /* 0x3a */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x59 */ - /* 0x5a */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x65 */ - /* 0x66 */ JSONSL_SPECIALf_FALSE /* f */, /* 0x66 */ - /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */ - /* 0x6e */ JSONSL_SPECIALf_NULL /* n */, /* 0x6e */ + /* 0x30 */ JSONSL_SPECIALf_ZERO /* <0> */, /* 0x30 */ + /* 0x31 */ JSONSL_SPECIALf_UNSIGNED /* <1> */, /* 0x31 */ + /* 0x32 */ JSONSL_SPECIALf_UNSIGNED /* <2> */, /* 0x32 */ + /* 0x33 */ JSONSL_SPECIALf_UNSIGNED /* <3> */, /* 0x33 */ + /* 0x34 */ JSONSL_SPECIALf_UNSIGNED /* <4> */, /* 0x34 */ + /* 0x35 */ JSONSL_SPECIALf_UNSIGNED /* <5> */, /* 0x35 */ + /* 0x36 */ JSONSL_SPECIALf_UNSIGNED /* <6> */, /* 0x36 */ + /* 0x37 */ JSONSL_SPECIALf_UNSIGNED /* <7> */, /* 0x37 */ + /* 0x38 */ JSONSL_SPECIALf_UNSIGNED /* <8> */, /* 0x38 */ + /* 0x39 */ JSONSL_SPECIALf_UNSIGNED /* <9> */, /* 0x39 */ + /* 0x3a */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x48 */ + /* 0x49 */ JSONSL__INF_PROXY /* */, /* 0x49 */ + /* 0x4a */ 0,0,0,0, /* 0x4d */ + /* 0x4e */ JSONSL__NAN_PROXY /* */, /* 0x4e */ + /* 0x4f */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x65 */ + /* 0x66 */ JSONSL_SPECIALf_FALSE /* */, /* 0x66 */ + /* 0x67 */ 0,0, /* 0x68 */ + /* 0x69 */ JSONSL__INF_PROXY /* */, /* 0x69 */ + /* 0x6a */ 0,0,0,0, /* 0x6d */ + /* 0x6e */ JSONSL_SPECIALf_NULL|JSONSL__NAN_PROXY /* */, /* 0x6e */ /* 0x6f */ 0,0,0,0,0, /* 0x73 */ - /* 0x74 */ JSONSL_SPECIALf_TRUE /* t */, /* 0x74 */ + /* 0x74 */ JSONSL_SPECIALf_TRUE /* */, /* 0x74 */ /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */ /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */ /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */ /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */ - /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */ + /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0, /* 0xfe */ }; -static jsonsl_special_t *Special_table = _special_table; /** * Contains characters which signal the termination of any of the 'special' bareword * values. */ -static int _special_endings[0x100] = { +static int Special_Endings[0x100] = { /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */ /* 0x09 */ 1 /* */, /* 0x09 */ /* 0x0a */ 1 /* */, /* 0x0a */ @@ -1181,12 +1516,11 @@ static int _special_endings[0x100] = { /* 0xde */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfd */ /* 0xfe */ 0 /* 0xfe */ }; -static int *Special_Endings = _special_endings; /** - * Contains allowable whitespace. + * This table contains entries for the allowed whitespace as per RFC 4627 */ -static int _allowed_whitespace[0x100] = { +static int Allowed_Whitespace[0x100] = { /* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */ /* 0x09 */ 1 /* */, /* 0x09 */ /* 0x0a */ 1 /* */, /* 0x0a */ @@ -1202,12 +1536,45 @@ static int _allowed_whitespace[0x100] = { /* 0xc1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xe0 */ /* 0xe1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 0xfe */ }; -static int *Allowed_Whitespace = _allowed_whitespace; + +static const int String_No_Passthrough[0x100] = { + /* 0x00 */ 1 /* */, /* 0x00 */ + /* 0x01 */ 1 /* */, /* 0x01 */ + /* 0x02 */ 1 /* */, /* 0x02 */ + /* 0x03 */ 1 /* */, /* 0x03 */ + /* 0x04 */ 1 /* */, /* 0x04 */ + /* 0x05 */ 1 /* */, /* 0x05 */ + /* 0x06 */ 1 /* */, /* 0x06 */ + /* 0x07 */ 1 /* */, /* 0x07 */ + /* 0x08 */ 1 /* */, /* 0x08 */ + /* 0x09 */ 1 /* */, /* 0x09 */ + /* 0x0a */ 1 /* */, /* 0x0a */ + /* 0x0b */ 1 /* */, /* 0x0b */ + /* 0x0c */ 1 /* */, /* 0x0c */ + /* 0x0d */ 1 /* */, /* 0x0d */ + /* 0x0e */ 1 /* */, /* 0x0e */ + /* 0x0f */ 1 /* */, /* 0x0f */ + /* 0x10 */ 1 /* */, /* 0x10 */ + /* 0x11 */ 1 /* */, /* 0x11 */ + /* 0x12 */ 1 /* */, /* 0x12 */ + /* 0x13 */ 1 /* */, /* 0x13 */ + /* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */ + /* 0x22 */ 1 /* <"> */, /* 0x22 */ + /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */ + /* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */ + /* 0x5c */ 1 /* <\> */, /* 0x5c */ + /* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */ + /* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */ + /* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */ + /* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */ + /* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */ + /* 0xfd */ 0,0, /* 0xfe */ +}; /** * Allowable two-character 'common' escapes: */ -static int _allowed_escapes[0x100] = { +static int Allowed_Escapes[0x100] = { /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ /* 0x20 */ 0,0, /* 0x21 */ /* 0x22 */ 1 /* <"> */, /* 0x22 */ @@ -1234,10 +1601,10 @@ static int _allowed_escapes[0x100] = { /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */ }; -static int *Allowed_Escapes = _allowed_escapes; - - -static unsigned char _escape_maps[0x100] = { +/** + * This table contains the _values_ for a given (single) escaped character. + */ +static unsigned char Escape_Equivs[0x100] = { /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */ /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */ /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */ @@ -1258,6 +1625,44 @@ static unsigned char _escape_maps[0x100] = { /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */ }; -static unsigned char *Escape_Maps = _escape_maps; - +/* Definitions of above-declared static functions */ +static char get_escape_equiv(unsigned c) { + return Escape_Equivs[c & 0xff]; +} +static unsigned extract_special(unsigned c) { + return Special_Table[c & 0xff]; +} +static int is_special_end(unsigned c) { + return Special_Endings[c & 0xff]; +} +static int is_allowed_whitespace(unsigned c) { + return c == ' ' || Allowed_Whitespace[c & 0xff]; +} +static int is_allowed_escape(unsigned c) { + return Allowed_Escapes[c & 0xff]; +} +static int is_simple_char(unsigned c) { + return !String_No_Passthrough[c & 0xff]; +} +/* Clean up all our macros! */ +#undef INCR_METRIC +#undef INCR_GENERIC +#undef INCR_STRINGY_CATCH +#undef CASE_DIGITS +#undef INVOKE_ERROR +#undef STACK_PUSH +#undef STACK_POP_NOPOS +#undef STACK_POP +#undef CALLBACK_AND_POP_NOPOS +#undef CALLBACK_AND_POP +#undef SPECIAL_POP +#undef CUR_CHAR +#undef DO_CALLBACK +#undef ENSURE_HVAL +#undef VERIFY_SPECIAL +#undef STATE_SPECIAL_LENGTH +#undef IS_NORMAL_NUMBER +#undef STATE_NUM_LAST +#undef FASTPARSE_EXHAUSTED +#undef FASTPARSE_BREAK diff --git a/libtransmission/jsonsl.h b/libtransmission/jsonsl.h index 7aabbeff9..aedec3f20 100644 --- a/libtransmission/jsonsl.h +++ b/libtransmission/jsonsl.h @@ -1,28 +1,4 @@ -/* - * jsonsl - * https://github.com/mnunberg/jsonsl - * - * Copyright (c) 2012 M. Nunberg, mnunberg@haskalah.org - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE - * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +/* https://github.com/mnunberg/jsonsl */ /** * JSON Simple/Stacked/Stateful Lexer. @@ -30,6 +6,9 @@ * - Maintains state * - Callback oriented * - Lightweight and fast. One source file and one header file + * + * Copyright (C) 2012-2015 Mark Nunberg + * See included LICENSE file for license details. */ #ifndef JSONSL_H_ @@ -54,8 +33,16 @@ typedef char jsonsl_char_t; typedef unsigned char jsonsl_uchar_t; #endif /* JSONSL_USE_WCHAR */ +#ifdef JSONSL_PARSE_NAN +#define JSONSL__NAN_PROXY JSONSL_SPECIALf_NAN +#define JSONSL__INF_PROXY JSONSL_SPECIALf_INF +#else +#define JSONSL__NAN_PROXY 0 +#define JSONSL__INF_PROXY 0 +#endif + /* Stolen from http-parser.h, and possibly others */ -#if defined(_WIN32) && !defined(__MINGW32__) +#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) typedef __int8 int8_t; typedef unsigned __int8 uint8_t; typedef __int16 int16_t; @@ -74,8 +61,6 @@ typedef int ssize_t; #if (!defined(JSONSL_STATE_GENERIC)) && (!defined(JSONSL_STATE_USER_FIELDS)) -#warning "JSONSL_STATE_USER_FIELDS not defined. Define this for extra structure fields" -#warning "or define JSONSL_STATE_GENERIC" #define JSONSL_STATE_GENERIC #endif /* !defined JSONSL_STATE_GENERIC */ @@ -83,9 +68,33 @@ typedef int ssize_t; #define JSONSL_STATE_USER_FIELDS #endif /* JSONSL_STATE_GENERIC */ +/* Additional fields for component object */ +#ifndef JSONSL_JPR_COMPONENT_USER_FIELDS +#define JSONSL_JPR_COMPONENT_USER_FIELDS +#endif + #ifndef JSONSL_API +/** + * We require a /DJSONSL_DLL so that users already using this as a static + * or embedded library don't get confused + */ +#if defined(_WIN32) && defined(JSONSL_DLL) +#define JSONSL_API __declspec(dllexport) +#else #define JSONSL_API -#endif /* JSONSL_API */ +#endif /* _WIN32 */ + +#endif /* !JSONSL_API */ + +#ifndef JSONSL_INLINE +#if defined(_MSC_VER) + #define JSONSL_INLINE __inline + #elif defined(__GNUC__) + #define JSONSL_INLINE __inline__ + #else + #define JSONSL_INLINE inline + #endif /* _MSC_VER or __GNUC__ */ +#endif /* JSONSL_INLINE */ #define JSONSL_MAX_LEVELS 512 @@ -144,18 +153,36 @@ typedef enum { X(NULL, 1<<4) \ X(FLOAT, 1<<5) \ X(EXPONENT, 1<<6) \ - X(NONASCII, 1<<7) + X(NONASCII, 1<<7) \ + X(NAN, 1<<8) \ + X(INF, 1<<9) typedef enum { #define X(o,b) \ JSONSL_SPECIALf_##o = b, JSONSL_XSPECIAL #undef X /* Handy flags for checking */ - JSONSL_SPECIALf_UNKNOWN = 1 << 8, - JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED|JSONSL_SPECIALf_UNSIGNED), + + JSONSL_SPECIALf_UNKNOWN = 1 << 10, + + /** @private Private */ + JSONSL_SPECIALf_ZERO = 1 << 11 | JSONSL_SPECIALf_UNSIGNED, + /** @private */ + JSONSL_SPECIALf_DASH = 1 << 12, + /** @private */ + JSONSL_SPECIALf_POS_INF = (JSONSL_SPECIALf_INF), + JSONSL_SPECIALf_NEG_INF = (JSONSL_SPECIALf_INF|JSONSL_SPECIALf_SIGNED), + + /** Type is numeric */ + JSONSL_SPECIALf_NUMERIC = (JSONSL_SPECIALf_SIGNED| JSONSL_SPECIALf_UNSIGNED), + + /** Type is a boolean */ JSONSL_SPECIALf_BOOLEAN = (JSONSL_SPECIALf_TRUE|JSONSL_SPECIALf_FALSE), - /* For non-simple numeric types */ - JSONSL_SPECIALf_NUMNOINT = (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT) + + /** Type is an "extended", not integral type (but numeric) */ + JSONSL_SPECIALf_NUMNOINT = + (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT|JSONSL_SPECIALf_NAN + |JSONSL_SPECIALf_INF) } jsonsl_special_t; @@ -182,11 +209,12 @@ typedef enum { * Various errors which may be thrown while parsing JSON */ #define JSONSL_XERR \ - X(SUCCESS) \ /* Trailing garbage characters */ \ X(GARBAGE_TRAILING) \ /* We were expecting a 'special' (numeric, true, false, null) */ \ X(SPECIAL_EXPECTED) \ +/* The 'special' value was incomplete */ \ + X(SPECIAL_INCOMPLETE) \ /* Found a stray token */ \ X(STRAY_TOKEN) \ /* We were expecting a token before this one */ \ @@ -217,6 +245,8 @@ typedef enum { X(TRAILING_COMMA) \ /* An invalid number was passed in a numeric field */ \ X(INVALID_NUMBER) \ +/* Value is missing for object */ \ + X(VALUE_EXPECTED) \ /* The following are for JPR Stuff */ \ \ /* Found a literal '%' but it was only followed by a single valid hex digit */ \ @@ -226,9 +256,14 @@ typedef enum { /* Duplicate slash */ \ X(JPR_DUPSLASH) \ /* No leading root */ \ - X(JPR_NOROOT) + X(JPR_NOROOT) \ +/* Allocation failure */ \ + X(ENOMEM) \ +/* Invalid unicode codepoint detected (in case of escapes) */ \ + X(INVALID_CODEPOINT) typedef enum { + JSONSL_ERROR_SUCCESS = 0, #define X(e) \ JSONSL_ERROR_##e, JSONSL_XERR @@ -251,30 +286,28 @@ struct jsonsl_state_st { /** * The JSON object type */ - jsonsl_type_t type; + unsigned type; /** If this element is special, then its extended type is here */ - jsonsl_special_t special_flags; + unsigned special_flags; /** - * Position offset variables. These are relative to jsn->pos. - * pos_begin is the position at which this state was first pushed - * to the stack. pos_cur is the position at which return last controlled - * to this state (i.e. an immediate child state was popped from it). - */ - - /** - * The position at which this state was first PUSHed + * The position (in terms of number of bytes since the first call to + * jsonsl_feed()) at which the state was first pushed. This includes + * opening tokens, if applicable. + * + * @note For strings (i.e. type & JSONSL_Tf_STRINGY is nonzero) this will + * be the position of the first quote. + * + * @see jsonsl_st::pos which contains the _current_ position and can be + * used during a POP callback to get the length of the element. */ size_t pos_begin; - /** - * The position at which any immediate child was last POPped. - * Note that this field is only set when the item is popped. - */ + /**FIXME: This is redundant as the same information can be derived from + * jsonsl_st::pos at pop-time */ size_t pos_cur; - /** * Level of recursion into nesting. This is mainly a convenience * variable, as this can technically be deduced from the lexer's @@ -309,6 +342,9 @@ struct jsonsl_state_st { /** * Counter which is incremented each time an escape ('\') is encountered. + * This is used internally for non-string types and should only be + * inspected by the user if the state actually represents a string + * type. */ unsigned int nescapes; @@ -334,6 +370,26 @@ struct jsonsl_state_st { #endif /* JSONSL_STATE_USER_FIELDS */ }; +/**Gets the number of elements in the list. + * @param st The state. Must be of type JSONSL_T_LIST + * @return number of elements in the list + */ +#define JSONSL_LIST_SIZE(st) ((st)->nelem) + +/**Gets the number of key-value pairs in an object + * @param st The state. Must be of type JSONSL_T_OBJECT + * @return the number of key-value pairs in the object + */ +#define JSONSL_OBJECT_SIZE(st) ((st)->nelem / 2) + +/**Gets the numeric value. + * @param st The state. Must be of type JSONSL_T_SPECIAL and + * special_flags must have the JSONSL_SPECIALf_NUMERIC flag + * set. + * @return the numeric value of the state. + */ +#define JSONSL_NUMERIC_VALUE(st) ((st)->nelem) + /* * So now we need some special structure for keeping the * JPR info in sync. Preferrably all in a single block @@ -410,6 +466,9 @@ struct jsonsl_st { /** This is the current level of the stack */ unsigned int level; + /** Flag set to indicate we should stop processing */ + unsigned int stopfl; + /** * This is the current position, relative to the beginning * of the stream. @@ -428,7 +487,12 @@ struct jsonsl_st { /** Default callback for any action, if neither PUSH or POP callbacks are defined */ jsonsl_stack_callback action_callback; - /** Do not invoke callbacks for objects deeper than this level */ + /** + * Do not invoke callbacks for objects deeper than this level. + * NOTE: This field establishes the lower bound for ignored callbacks, + * and is thus misnamed. `min_ignore_level` would actually make more + * sense, but we don't want to break API. + */ unsigned int max_callback_level; /** The error callback. Invoked when an error happens. Should not be NULL */ @@ -550,23 +614,54 @@ void jsonsl_destroy(jsonsl_t jsn); * @param jsn the lexer * @param cur the current nest, which should be a struct jsonsl_nest_st */ -#define jsonsl_last_state(jsn, cur) \ - (cur->level > 1 ) \ - ? (jsn->stack + (cur->level-1)) \ - : NULL +static JSONSL_INLINE +struct jsonsl_state_st *jsonsl_last_state(const jsonsl_t jsn, + const struct jsonsl_state_st *state) +{ + /* Don't complain about overriding array bounds */ + if (state->level > 1) { + return jsn->stack + state->level - 1; + } else { + return NULL; + } +} +/** + * Gets the state of the last fully consumed child of this parent. This is + * only valid in the parent's POP callback. + * + * @param the lexer + * @return A pointer to the child. + */ +static JSONSL_INLINE +struct jsonsl_state_st *jsonsl_last_child(const jsonsl_t jsn, + const struct jsonsl_state_st *parent) +{ + return jsn->stack + (parent->level + 1); +} + +/**Call to instruct the parser to stop parsing and return. This is valid + * only from within a callback */ +static JSONSL_INLINE +void jsonsl_stop(jsonsl_t jsn) +{ + jsn->stopfl = 1; +} /** * This enables receiving callbacks on all events. Doesn't do * anything special but helps avoid some boilerplate. * This does not touch the UESCAPE callbacks or flags. */ -#define jsonsl_enable_all_callbacks(jsn) \ - jsn->call_HKEY = 1; \ - jsn->call_STRING = 1; \ - jsn->call_OBJECT = 1; \ - jsn->call_SPECIAL = 1; \ +static JSONSL_INLINE +void jsonsl_enable_all_callbacks(jsonsl_t jsn) +{ + jsn->call_HKEY = 1; + jsn->call_STRING = 1; + jsn->call_OBJECT = 1; + jsn->call_SPECIAL = 1; jsn->call_LIST = 1; +} /** * A macro which returns true if the current state object can @@ -639,7 +734,8 @@ void jsonsl_dump_global_metrics(void); #define JSONSL_XMATCH \ X(COMPLETE,1) \ X(POSSIBLE,0) \ - X(NOMATCH,-1) + X(NOMATCH,-1) \ + X(TYPE_MISMATCH, -2) typedef enum { @@ -663,11 +759,22 @@ typedef enum { } jsonsl_jpr_type_t; struct jsonsl_jpr_component_st { + /** The string the component points to */ char *pstr; /** if this is a numeric type, the number is 'cached' here */ unsigned long idx; + /** The length of the string */ size_t len; + /** The type of component (NUMERIC or STRING) */ jsonsl_jpr_type_t ptype; + + /** Set this to true to enforce type checking between dict keys and array + * indices. jsonsl_jpr_match() will return TYPE_MISMATCH if it detects + * that an array index is actually a child of a dictionary. */ + short is_arridx; + + /* Extra fields (for more advanced searches. Default is empty) */ + JSONSL_JPR_COMPONENT_USER_FIELDS }; struct jsonsl_jpr_st { @@ -675,6 +782,10 @@ struct jsonsl_jpr_st { struct jsonsl_jpr_component_st *components; size_t ncomponents; + /**Type of the match to be expected. If nonzero, will be compared against + * the actual type */ + unsigned match_type; + /** Base of allocated string for components */ char *basestr; @@ -683,8 +794,6 @@ struct jsonsl_jpr_st { size_t norig; }; - - /** * Create a new JPR object. * @@ -723,10 +832,42 @@ void jsonsl_jpr_destroy(jsonsl_jpr_t jpr); */ JSONSL_API jsonsl_jpr_match_t jsonsl_jpr_match(jsonsl_jpr_t jpr, - jsonsl_type_t parent_type, + unsigned int parent_type, unsigned int parent_level, const char *key, size_t nkey); +/** + * Alternate matching algorithm. This matching algorithm does not use + * JSONPointer but relies on a more structured searching mechanism. It + * assumes that there is a clear distinction between array indices and + * object keys. In this case, the jsonsl_path_component_st::ptype should + * be set to @ref JSONSL_PATH_NUMERIC for an array index (the + * jsonsl_path_comonent_st::is_arridx field will be removed in a future + * version). + * + * @param jpr The path + * @param parent The parent structure. Can be NULL if this is the root object + * @param child The child structure. Should not be NULL + * @param key Object key, if an object + * @param nkey Length of object key + * @return Status constant if successful + * + * @note + * For successful matching, both the key and the path itself should be normalized + * to contain 'proper' utf8 sequences rather than utf16 '\uXXXX' escapes. This + * should currently be done in the application. Another version of this function + * may use a temporary buffer in such circumstances (allocated by the application). + * + * Since this function also checks the state of the child, it should only + * be called on PUSH callbacks, and not POP callbacks + */ +JSONSL_API +jsonsl_jpr_match_t +jsonsl_path_match(jsonsl_jpr_t jpr, + const struct jsonsl_state_st *parent, + const struct jsonsl_state_st *child, + const char *key, size_t nkey); + /** * Associate a set of JPR objects with a lexer instance. @@ -804,6 +945,13 @@ const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match); * to escape a '/' - however this may also be desired behavior. the JSON * spec is not clear on this, and therefore jsonsl leaves it up to you. * + * Additionally, sometimes you may wish to _normalize_ JSON. This is specifically + * true when dealing with 'u-escapes' which can be expressed perfectly fine + * as utf8. One use case for normalization is JPR string comparison, in which + * case two effectively equivalent strings may not match because one is using + * u-escapes and the other proper utf8. To normalize u-escapes only, pass in + * an empty `toEscape` table, enabling only the `u` index. + * * @param in The input string. * @param out An allocated output (should be the same size as in) * @param len the size of the buffer @@ -820,13 +968,26 @@ const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match); * encountered. * * @return The effective size of the output buffer. + * + * @note + * This function now encodes the UTF8 equivalents of utf16 escapes (i.e. + * 'u-escapes'). Previously this would encode the escapes as utf16 literals, + * which while still correct in some sense was confusing for many (especially + * considering that the inputs were variations of char). + * + * @note + * The output buffer will never be larger than the input buffer, since + * standard escape sequences (i.e. '\t') occupy two bytes in the source + * but only one byte (when unescaped) in the output. Likewise u-escapes + * (i.e. \uXXXX) will occupy six bytes in the source, but at the most + * two bytes when escaped. */ JSONSL_API size_t jsonsl_util_unescape_ex(const char *in, char *out, size_t len, const int toEscape[128], - jsonsl_special_t *oflags, + unsigned *oflags, jsonsl_error_t *err, const char **errat); @@ -838,44 +999,6 @@ size_t jsonsl_util_unescape_ex(const char *in, #endif /* JSONSL_NO_JPR */ -/** - * HERE BE CHARACTER TABLES! - */ -#define JSONSL_CHARTABLE_string_nopass \ -/* 0x00 */ 1 /* */, /* 0x00 */ \ -/* 0x01 */ 1 /* */, /* 0x01 */ \ -/* 0x02 */ 1 /* */, /* 0x02 */ \ -/* 0x03 */ 1 /* */, /* 0x03 */ \ -/* 0x04 */ 1 /* */, /* 0x04 */ \ -/* 0x05 */ 1 /* */, /* 0x05 */ \ -/* 0x06 */ 1 /* */, /* 0x06 */ \ -/* 0x07 */ 1 /* */, /* 0x07 */ \ -/* 0x08 */ 1 /* */, /* 0x08 */ \ -/* 0x09 */ 1 /* */, /* 0x09 */ \ -/* 0x0a */ 1 /* */, /* 0x0a */ \ -/* 0x0b */ 1 /* */, /* 0x0b */ \ -/* 0x0c */ 1 /* */, /* 0x0c */ \ -/* 0x0d */ 1 /* */, /* 0x0d */ \ -/* 0x0e */ 1 /* */, /* 0x0e */ \ -/* 0x0f */ 1 /* */, /* 0x0f */ \ -/* 0x10 */ 1 /* */, /* 0x10 */ \ -/* 0x11 */ 1 /* */, /* 0x11 */ \ -/* 0x12 */ 1 /* */, /* 0x12 */ \ -/* 0x13 */ 1 /* */, /* 0x13 */ \ -/* 0x14 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x21 */ \ -/* 0x22 */ 1 /* <"> */, /* 0x22 */ \ -/* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x42 */ \ -/* 0x43 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */ \ -/* 0x5c */ 1 /* <\> */, /* 0x5c */ \ -/* 0x5d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x7c */ \ -/* 0x7d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x9c */ \ -/* 0x9d */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xbc */ \ -/* 0xbd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xdc */ \ -/* 0xdd */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xfc */ \ -/* 0xfd */ 0,0 /* 0xfe */ \ - - - #ifdef __cplusplus } #endif /* __cplusplus */