diff --git a/TODO b/TODO index be9282d..df68a6e 100644 --- a/TODO +++ b/TODO @@ -1,4 +1,2 @@ Implement chunked reading -Block comments. ie /**/ -Dynamic resizing of parse state stack Runtime setting for comma/comment support diff --git a/src/input.c b/src/input.c index 743b527..c892957 100644 --- a/src/input.c +++ b/src/input.c @@ -25,8 +25,9 @@ #include "memory.h" #include //strtod, strtol #include //isalpha, etc +#include //memcpy -#define MAX_DEPTH 16 +#define RJP_INITIAL_PARSE_DEPTH 16 #define rjp_lex_accept 1 typedef enum RJP_lex_node{ @@ -60,10 +61,13 @@ typedef enum RJP_lex_node{ rjp_lex_fnum_e = 44, rjp_lex_sci_num = 45, rjp_lex_slash = 46, - rjp_lex_comment = 47, + rjp_lex_line_comment = 47, rjp_lex_signed_number = 49, rjp_lex_sci_num_signed = 51, rjp_lex_newlines = 53, + rjp_lex_block_comment = 55, + rjp_lex_block_comment_end1 = 56, + rjp_lex_block_comment_end2 = 57, rjp_lex_invalid = 1000, rjp_lex_unrecognized_word = 1002 }RJP_lex_node; @@ -105,9 +109,13 @@ typedef struct RJP_lex_state{ RJP_index length; RJP_index offset; }RJP_lex_state; +typedef struct RJP_yacc_stack{ + RJP_yacc_target* stack; + RJP_index position; + RJP_index size; +}RJP_yacc_stack; typedef struct RJP_yacc_state{ - RJP_yacc_target* target_stack; - RJP_yacc_target* target; + RJP_yacc_stack target_stack; RJP_value* root; RJP_value* curr; RJP_value* lastadded; @@ -115,6 +123,37 @@ typedef struct RJP_yacc_state{ int row, column; }RJP_yacc_state; +static void irjp_init_yacc_stack(RJP_yacc_stack* s){ + s->size = RJP_INITIAL_PARSE_DEPTH; + s->stack = rjp_alloc(sizeof(RJP_yacc_target)*s->size); + s->position = 0; + s->stack[0] = rjp_yacc_end; +} +static void irjp_delete_yacc_stack(RJP_yacc_stack* s){ + rjp_free(s->stack); +} +static void irjp_resize_yacc_stack(RJP_yacc_stack* s, RJP_index newsize){ + RJP_yacc_target* newstack = rjp_alloc(sizeof(RJP_yacc_target) * newsize); + memcpy(newstack, s->stack, s->size*sizeof(RJP_yacc_target)); + rjp_free(s->stack); + s->stack = newstack; + s->size = newsize; +} +static void irjp_yacc_stack_push(RJP_yacc_stack* s, RJP_yacc_target target){ + if((s->position+1) == s->size) + irjp_resize_yacc_stack(s, s->size*2); + s->stack[++s->position] = target; +} +static RJP_yacc_target irjp_yacc_stack_pop(RJP_yacc_stack* s){ + return s->stack[s->position--]; +} +static RJP_yacc_target irjp_yacc_stack_current(RJP_yacc_stack* s){ + return s->stack[s->position]; +} +static void irjp_yacc_stack_set(RJP_yacc_stack* s, RJP_yacc_target target){ + s->stack[s->position] = target; +} + static RJP_lex_category irjp_lex_accept(RJP_lex_category val, RJP_lex_state* state){ state->node = rjp_lex_start; return val; @@ -246,6 +285,8 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){ case rjp_lex_quote: if(ch == '\\') state->node = rjp_lex_escaped; + else if(ch == '\n' || ch == '\r') + state->node = rjp_lex_invalid; else if(ch == '"') state->node = rjp_lex_string; break; @@ -259,16 +300,30 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){ //comments case rjp_lex_slash: if(ch == '/') - state->node = rjp_lex_comment; + state->node = rjp_lex_line_comment; + else if(ch == '*') + state->node = rjp_lex_block_comment; else state->node = rjp_lex_invalid; break; - case rjp_lex_comment: + case rjp_lex_line_comment: if(ch == '\n' || ch == '\r') state->node = rjp_lex_newlines; else if(ch == 0) return irjp_lex_accept(rjp_lexcat_space, state); break; + case rjp_lex_block_comment: + if(ch == '*') + state->node = rjp_lex_block_comment_end1; + break; + case rjp_lex_block_comment_end1: + if(ch == '/') + state->node = rjp_lex_block_comment_end2; + else + state->node = rjp_lex_block_comment; + break; + case rjp_lex_block_comment_end2: + return irjp_lex_accept(rjp_lexcat_space, state); #endif //true @@ -403,14 +458,12 @@ static int irjp_init_value(RJP_value* newval, RJP_lex_category cat, RJP_yacc_sta break; case rjp_lexcat_obrace: newval->type = rjp_json_object; - ++(state->target); - *(state->target) = rjp_yacc_first_mem_key; + irjp_yacc_stack_push(&state->target_stack, rjp_yacc_first_mem_key); state->curr = state->lastadded; break; case rjp_lexcat_obracket: newval->type = rjp_json_array; - ++(state->target); - *(state->target) = rjp_yacc_arr_first_value; + irjp_yacc_stack_push(&state->target_stack, rjp_yacc_arr_first_value); state->curr = state->lastadded; break; default: @@ -436,16 +489,17 @@ static RJP_value* irjp_add_value_to_object(RJP_yacc_state* state, const char* ke #define irjp_parse_error(str) do{ \ DIAG_PRINT(stderr, "%s: %d:%d\n", str, state.column, state.row); \ - rjp_free(state.target_stack); \ + irjp_delete_yacc_stack(&state.target_stack); \ rjp_free_value(state.root); \ return NULL; \ }while(0) + RJP_value* rjp_parse(const char* str){ RJP_yacc_state state = {0}; - state.target_stack = rjp_alloc(sizeof(RJP_yacc_target)*RJP_MAX_PARSE_DEPTH); - state.target = state.target_stack; - *(state.target) = rjp_yacc_end; + state.column = 1; + state.row = 1; + irjp_init_yacc_stack(&state.target_stack); state.lexstate.str = str; state.root = state.curr = state.lastadded = rjp_calloc(1, sizeof(RJP_value)); @@ -455,7 +509,7 @@ RJP_value* rjp_parse(const char* str){ if(cat == rjp_lexcat_newline){ ++state.column; - state.row = 0; + state.row = 1; }else if(cat == rjp_lexcat_space){ state.row += state.lexstate.length; }else{ @@ -475,21 +529,21 @@ RJP_value* rjp_parse(const char* str){ if(cat == rjp_lexcat_space) continue; if(cat == rjp_lexcat_newline){ - state.row = 0; + state.row = 1; ++state.column; continue; } - switch(*state.target){ + switch(irjp_yacc_stack_current(&state.target_stack)){ #ifdef RJP_ENABLE_TRAILING_COMMA case rjp_yacc_first_mem_key: case rjp_yacc_mem_key: if(cat == rjp_lexcat_cbrace){ - --state.target; + irjp_yacc_stack_pop(&state.target_stack); state.curr = state.curr->parent; }else if(cat == rjp_lexcat_string){ - *state.target = rjp_yacc_key_colon; + irjp_yacc_stack_set(&state.target_stack, rjp_yacc_key_colon); if(!irjp_add_value_to_object(&state, state.lexstate.str+state.lexstate.offset, state.lexstate.length)){ irjp_parse_error("Expected member key"); } @@ -500,10 +554,10 @@ RJP_value* rjp_parse(const char* str){ case rjp_yacc_arr_first_value: case rjp_yacc_arr_value: if(cat == rjp_lexcat_cbracket){ - --state.target; + irjp_yacc_stack_pop(&state.target_stack); state->curr = state.curr->parent; }else{ - *state.target = rjp_yacc_arr_comma; + irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_comma); if(!irjp_add_value_to_array(cat, &state)) irjp_parse_error("Expected value"); } @@ -511,13 +565,13 @@ RJP_value* rjp_parse(const char* str){ #else case rjp_yacc_first_mem_key: if(cat == rjp_lexcat_cbrace){ - --state.target; + irjp_yacc_stack_pop(&state.target_stack); state.curr = state.curr->parent; }else{ //fallthrough case rjp_yacc_mem_key: if(cat == rjp_lexcat_string){ - *state.target = rjp_yacc_key_colon; + irjp_yacc_stack_set(&state.target_stack, rjp_yacc_key_colon); if(!irjp_add_value_to_object(&state, state.lexstate.str+state.lexstate.offset, state.lexstate.length)){ irjp_parse_error("Expected member key"); } @@ -528,12 +582,12 @@ RJP_value* rjp_parse(const char* str){ break; case rjp_yacc_arr_first_value: if(cat == rjp_lexcat_cbracket){ - --state.target; + irjp_yacc_stack_pop(&state.target_stack); state.curr = state.curr->parent; }else{ //fallthrough case rjp_yacc_arr_value: - *state.target = rjp_yacc_arr_comma; + irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_comma); if(!irjp_add_value_to_array(cat, &state)) irjp_parse_error("Expected value"); } @@ -543,19 +597,19 @@ RJP_value* rjp_parse(const char* str){ case rjp_yacc_key_colon: if(cat != rjp_lexcat_colon) irjp_parse_error("Expected member key"); - *state.target = rjp_yacc_obj_value; + irjp_yacc_stack_set(&state.target_stack, rjp_yacc_obj_value); break; case rjp_yacc_obj_value: - *state.target = rjp_yacc_obj_comma; + irjp_yacc_stack_set(&state.target_stack, rjp_yacc_obj_comma); if(irjp_init_value(state.lastadded, cat, &state)){ irjp_parse_error("Expected value"); } break; case rjp_yacc_obj_comma: if(cat == rjp_lexcat_comma){ - *state.target = rjp_yacc_mem_key; + irjp_yacc_stack_set(&state.target_stack, rjp_yacc_mem_key); }else if(cat == rjp_lexcat_cbrace){ - --state.target; + irjp_yacc_stack_pop(&state.target_stack); state.curr = state.curr->parent; }else{ irjp_parse_error("Expected comma"); @@ -564,9 +618,9 @@ RJP_value* rjp_parse(const char* str){ case rjp_yacc_arr_comma: if(cat == rjp_lexcat_comma){ - *state.target = rjp_yacc_arr_value; + irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_value); }else if(cat == rjp_lexcat_cbracket){ - --state.target; + irjp_yacc_stack_pop(&state.target_stack); state.curr = state.curr->parent; }else{ irjp_parse_error("Expected comma"); @@ -578,9 +632,9 @@ RJP_value* rjp_parse(const char* str){ irjp_parse_error("Excess data after end of JSON"); }; } - if(state.target != state.target_stack) + if(state.target_stack.position != 0) irjp_parse_error("Missing closing brace"); - rjp_free(state.target_stack); + irjp_delete_yacc_stack(&state.target_stack); return state.root; } RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk){ diff --git a/src/test/test.c b/src/test/test.c index eb4d4eb..6d28722 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -49,6 +49,7 @@ int main(){ "+5.5e-6", " {}", "\n{}\n", + " { \"key\" \t:\n\n\n5 \n\t\n } ", " {\t }\n", "5.5 ", "{\"key\":5}", @@ -72,6 +73,12 @@ int main(){ #endif #ifdef RJP_ENABLE_COMMENTS "//comment\n{}", + "{\"key\"://comment\n5}", + "{\"key\"//comment\n:5}", + "{//\"key\":5\n}", + "5 //comment*/", + "{/*\"key\":5*/\"key\":5}", + "[5, /*comment*/6]", #endif }; const int should_pass_cnt = sizeof(should_pass_strings)/sizeof(should_pass_strings[0]); @@ -85,6 +92,7 @@ int main(){ "6.6e+", "{6}", "[\"key\":5]", + "\"string\n\"", "[3 4]", "\"\\uD83D\\uDE1\"", "\"\\uD83D\\uDE1Q\"", @@ -97,14 +105,27 @@ int main(){ "[,]", "{,}", "[1, 2],", + "{\"key\nkey\":5}", + "{\"key\":\"key\n\"}", #ifndef RJP_ENABLE_TRAILING_COMMA "[6,7,]", "{\"1\":1,\"2\":2,}", "[6,]", "{\"1\":1,}", #endif + "{//comment\"key\":\n5}", + "{/*\"key\":*/5}", + "[5, /*6*/, 7]", + "{/*comment}", + "{//comment}", #ifndef RJP_ENABLE_COMMENTS "//comment\n{}", + "{\"key\"://comment\n5}", + "{\"key\"//comment\n:5}", + "{//\"key\":5\n}", + "5 //comment*/", + "{/*\"key\":5*/\"key\":5}", + "[5, /*comment*/6]", #endif }; const int should_fail_cnt = sizeof(should_fail_strings)/sizeof(should_fail_strings[0]); @@ -115,15 +136,21 @@ int main(){ fprintf(stderr, "Running %d tests that should pass...\n", should_pass_cnt); for(unsigned i = 0;i < sizeof(should_pass_strings)/sizeof(should_pass_strings[0]);++i){ fprintf(stderr, "%8d) ", i+1); - if(!test(should_pass_strings[i])) + if(!test(should_pass_strings[i])){ ++passed; + }else{ + fprintf(stderr, "%13s%s\n", "", should_pass_strings[i]); + } } fprintf(stderr, "\n"); printf("Running %d tests that should fail...\n", should_fail_cnt); for(unsigned i = 0;i < sizeof(should_fail_strings)/sizeof(should_fail_strings[0]);++i){ fprintf(stderr, "%8d) ", i+1); - if(test(should_fail_strings[i])) + if(test(should_fail_strings[i])){ ++passed; + }else{ + fprintf(stderr, "%13s%s\n", "", should_fail_strings[i]); + } } fprintf(stderr, "\nResults: %d/%d tests passed\n", passed, total_tests); }