Add block comments and make RJP_yacc_target stack dynamically resize
This commit is contained in:
parent
d6a5d3009b
commit
bfd1777fef
2
TODO
2
TODO
@ -1,4 +1,2 @@
|
||||
Implement chunked reading
|
||||
Block comments. ie /**/
|
||||
Dynamic resizing of parse state stack
|
||||
Runtime setting for comma/comment support
|
||||
|
||||
120
src/input.c
120
src/input.c
@ -25,8 +25,9 @@
|
||||
#include "memory.h"
|
||||
#include <stdlib.h> //strtod, strtol
|
||||
#include <ctype.h> //isalpha, etc
|
||||
#include <string.h> //memcpy
|
||||
|
||||
#define MAX_DEPTH 16
|
||||
#define RJP_INITIAL_PARSE_DEPTH 16
|
||||
|
||||
#define rjp_lex_accept 1
|
||||
typedef enum RJP_lex_node{
|
||||
@ -60,10 +61,13 @@ typedef enum RJP_lex_node{
|
||||
rjp_lex_fnum_e = 44,
|
||||
rjp_lex_sci_num = 45,
|
||||
rjp_lex_slash = 46,
|
||||
rjp_lex_comment = 47,
|
||||
rjp_lex_line_comment = 47,
|
||||
rjp_lex_signed_number = 49,
|
||||
rjp_lex_sci_num_signed = 51,
|
||||
rjp_lex_newlines = 53,
|
||||
rjp_lex_block_comment = 55,
|
||||
rjp_lex_block_comment_end1 = 56,
|
||||
rjp_lex_block_comment_end2 = 57,
|
||||
rjp_lex_invalid = 1000,
|
||||
rjp_lex_unrecognized_word = 1002
|
||||
}RJP_lex_node;
|
||||
@ -105,9 +109,13 @@ typedef struct RJP_lex_state{
|
||||
RJP_index length;
|
||||
RJP_index offset;
|
||||
}RJP_lex_state;
|
||||
typedef struct RJP_yacc_stack{
|
||||
RJP_yacc_target* stack;
|
||||
RJP_index position;
|
||||
RJP_index size;
|
||||
}RJP_yacc_stack;
|
||||
typedef struct RJP_yacc_state{
|
||||
RJP_yacc_target* target_stack;
|
||||
RJP_yacc_target* target;
|
||||
RJP_yacc_stack target_stack;
|
||||
RJP_value* root;
|
||||
RJP_value* curr;
|
||||
RJP_value* lastadded;
|
||||
@ -115,6 +123,37 @@ typedef struct RJP_yacc_state{
|
||||
int row, column;
|
||||
}RJP_yacc_state;
|
||||
|
||||
static void irjp_init_yacc_stack(RJP_yacc_stack* s){
|
||||
s->size = RJP_INITIAL_PARSE_DEPTH;
|
||||
s->stack = rjp_alloc(sizeof(RJP_yacc_target)*s->size);
|
||||
s->position = 0;
|
||||
s->stack[0] = rjp_yacc_end;
|
||||
}
|
||||
static void irjp_delete_yacc_stack(RJP_yacc_stack* s){
|
||||
rjp_free(s->stack);
|
||||
}
|
||||
static void irjp_resize_yacc_stack(RJP_yacc_stack* s, RJP_index newsize){
|
||||
RJP_yacc_target* newstack = rjp_alloc(sizeof(RJP_yacc_target) * newsize);
|
||||
memcpy(newstack, s->stack, s->size*sizeof(RJP_yacc_target));
|
||||
rjp_free(s->stack);
|
||||
s->stack = newstack;
|
||||
s->size = newsize;
|
||||
}
|
||||
static void irjp_yacc_stack_push(RJP_yacc_stack* s, RJP_yacc_target target){
|
||||
if((s->position+1) == s->size)
|
||||
irjp_resize_yacc_stack(s, s->size*2);
|
||||
s->stack[++s->position] = target;
|
||||
}
|
||||
static RJP_yacc_target irjp_yacc_stack_pop(RJP_yacc_stack* s){
|
||||
return s->stack[s->position--];
|
||||
}
|
||||
static RJP_yacc_target irjp_yacc_stack_current(RJP_yacc_stack* s){
|
||||
return s->stack[s->position];
|
||||
}
|
||||
static void irjp_yacc_stack_set(RJP_yacc_stack* s, RJP_yacc_target target){
|
||||
s->stack[s->position] = target;
|
||||
}
|
||||
|
||||
static RJP_lex_category irjp_lex_accept(RJP_lex_category val, RJP_lex_state* state){
|
||||
state->node = rjp_lex_start;
|
||||
return val;
|
||||
@ -246,6 +285,8 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){
|
||||
case rjp_lex_quote:
|
||||
if(ch == '\\')
|
||||
state->node = rjp_lex_escaped;
|
||||
else if(ch == '\n' || ch == '\r')
|
||||
state->node = rjp_lex_invalid;
|
||||
else if(ch == '"')
|
||||
state->node = rjp_lex_string;
|
||||
break;
|
||||
@ -259,16 +300,30 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){
|
||||
//comments
|
||||
case rjp_lex_slash:
|
||||
if(ch == '/')
|
||||
state->node = rjp_lex_comment;
|
||||
state->node = rjp_lex_line_comment;
|
||||
else if(ch == '*')
|
||||
state->node = rjp_lex_block_comment;
|
||||
else
|
||||
state->node = rjp_lex_invalid;
|
||||
break;
|
||||
case rjp_lex_comment:
|
||||
case rjp_lex_line_comment:
|
||||
if(ch == '\n' || ch == '\r')
|
||||
state->node = rjp_lex_newlines;
|
||||
else if(ch == 0)
|
||||
return irjp_lex_accept(rjp_lexcat_space, state);
|
||||
break;
|
||||
case rjp_lex_block_comment:
|
||||
if(ch == '*')
|
||||
state->node = rjp_lex_block_comment_end1;
|
||||
break;
|
||||
case rjp_lex_block_comment_end1:
|
||||
if(ch == '/')
|
||||
state->node = rjp_lex_block_comment_end2;
|
||||
else
|
||||
state->node = rjp_lex_block_comment;
|
||||
break;
|
||||
case rjp_lex_block_comment_end2:
|
||||
return irjp_lex_accept(rjp_lexcat_space, state);
|
||||
#endif
|
||||
|
||||
//true
|
||||
@ -403,14 +458,12 @@ static int irjp_init_value(RJP_value* newval, RJP_lex_category cat, RJP_yacc_sta
|
||||
break;
|
||||
case rjp_lexcat_obrace:
|
||||
newval->type = rjp_json_object;
|
||||
++(state->target);
|
||||
*(state->target) = rjp_yacc_first_mem_key;
|
||||
irjp_yacc_stack_push(&state->target_stack, rjp_yacc_first_mem_key);
|
||||
state->curr = state->lastadded;
|
||||
break;
|
||||
case rjp_lexcat_obracket:
|
||||
newval->type = rjp_json_array;
|
||||
++(state->target);
|
||||
*(state->target) = rjp_yacc_arr_first_value;
|
||||
irjp_yacc_stack_push(&state->target_stack, rjp_yacc_arr_first_value);
|
||||
state->curr = state->lastadded;
|
||||
break;
|
||||
default:
|
||||
@ -436,16 +489,17 @@ static RJP_value* irjp_add_value_to_object(RJP_yacc_state* state, const char* ke
|
||||
|
||||
#define irjp_parse_error(str) do{ \
|
||||
DIAG_PRINT(stderr, "%s: %d:%d\n", str, state.column, state.row); \
|
||||
rjp_free(state.target_stack); \
|
||||
irjp_delete_yacc_stack(&state.target_stack); \
|
||||
rjp_free_value(state.root); \
|
||||
return NULL; \
|
||||
}while(0)
|
||||
|
||||
|
||||
RJP_value* rjp_parse(const char* str){
|
||||
RJP_yacc_state state = {0};
|
||||
state.target_stack = rjp_alloc(sizeof(RJP_yacc_target)*RJP_MAX_PARSE_DEPTH);
|
||||
state.target = state.target_stack;
|
||||
*(state.target) = rjp_yacc_end;
|
||||
state.column = 1;
|
||||
state.row = 1;
|
||||
irjp_init_yacc_stack(&state.target_stack);
|
||||
state.lexstate.str = str;
|
||||
state.root = state.curr = state.lastadded = rjp_calloc(1, sizeof(RJP_value));
|
||||
|
||||
@ -455,7 +509,7 @@ RJP_value* rjp_parse(const char* str){
|
||||
|
||||
if(cat == rjp_lexcat_newline){
|
||||
++state.column;
|
||||
state.row = 0;
|
||||
state.row = 1;
|
||||
}else if(cat == rjp_lexcat_space){
|
||||
state.row += state.lexstate.length;
|
||||
}else{
|
||||
@ -475,21 +529,21 @@ RJP_value* rjp_parse(const char* str){
|
||||
if(cat == rjp_lexcat_space)
|
||||
continue;
|
||||
if(cat == rjp_lexcat_newline){
|
||||
state.row = 0;
|
||||
state.row = 1;
|
||||
++state.column;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch(*state.target){
|
||||
switch(irjp_yacc_stack_current(&state.target_stack)){
|
||||
|
||||
#ifdef RJP_ENABLE_TRAILING_COMMA
|
||||
case rjp_yacc_first_mem_key:
|
||||
case rjp_yacc_mem_key:
|
||||
if(cat == rjp_lexcat_cbrace){
|
||||
--state.target;
|
||||
irjp_yacc_stack_pop(&state.target_stack);
|
||||
state.curr = state.curr->parent;
|
||||
}else if(cat == rjp_lexcat_string){
|
||||
*state.target = rjp_yacc_key_colon;
|
||||
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_key_colon);
|
||||
if(!irjp_add_value_to_object(&state, state.lexstate.str+state.lexstate.offset, state.lexstate.length)){
|
||||
irjp_parse_error("Expected member key");
|
||||
}
|
||||
@ -500,10 +554,10 @@ RJP_value* rjp_parse(const char* str){
|
||||
case rjp_yacc_arr_first_value:
|
||||
case rjp_yacc_arr_value:
|
||||
if(cat == rjp_lexcat_cbracket){
|
||||
--state.target;
|
||||
irjp_yacc_stack_pop(&state.target_stack);
|
||||
state->curr = state.curr->parent;
|
||||
}else{
|
||||
*state.target = rjp_yacc_arr_comma;
|
||||
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_comma);
|
||||
if(!irjp_add_value_to_array(cat, &state))
|
||||
irjp_parse_error("Expected value");
|
||||
}
|
||||
@ -511,13 +565,13 @@ RJP_value* rjp_parse(const char* str){
|
||||
#else
|
||||
case rjp_yacc_first_mem_key:
|
||||
if(cat == rjp_lexcat_cbrace){
|
||||
--state.target;
|
||||
irjp_yacc_stack_pop(&state.target_stack);
|
||||
state.curr = state.curr->parent;
|
||||
}else{
|
||||
//fallthrough
|
||||
case rjp_yacc_mem_key:
|
||||
if(cat == rjp_lexcat_string){
|
||||
*state.target = rjp_yacc_key_colon;
|
||||
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_key_colon);
|
||||
if(!irjp_add_value_to_object(&state, state.lexstate.str+state.lexstate.offset, state.lexstate.length)){
|
||||
irjp_parse_error("Expected member key");
|
||||
}
|
||||
@ -528,12 +582,12 @@ RJP_value* rjp_parse(const char* str){
|
||||
break;
|
||||
case rjp_yacc_arr_first_value:
|
||||
if(cat == rjp_lexcat_cbracket){
|
||||
--state.target;
|
||||
irjp_yacc_stack_pop(&state.target_stack);
|
||||
state.curr = state.curr->parent;
|
||||
}else{
|
||||
//fallthrough
|
||||
case rjp_yacc_arr_value:
|
||||
*state.target = rjp_yacc_arr_comma;
|
||||
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_comma);
|
||||
if(!irjp_add_value_to_array(cat, &state))
|
||||
irjp_parse_error("Expected value");
|
||||
}
|
||||
@ -543,19 +597,19 @@ RJP_value* rjp_parse(const char* str){
|
||||
case rjp_yacc_key_colon:
|
||||
if(cat != rjp_lexcat_colon)
|
||||
irjp_parse_error("Expected member key");
|
||||
*state.target = rjp_yacc_obj_value;
|
||||
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_obj_value);
|
||||
break;
|
||||
case rjp_yacc_obj_value:
|
||||
*state.target = rjp_yacc_obj_comma;
|
||||
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_obj_comma);
|
||||
if(irjp_init_value(state.lastadded, cat, &state)){
|
||||
irjp_parse_error("Expected value");
|
||||
}
|
||||
break;
|
||||
case rjp_yacc_obj_comma:
|
||||
if(cat == rjp_lexcat_comma){
|
||||
*state.target = rjp_yacc_mem_key;
|
||||
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_mem_key);
|
||||
}else if(cat == rjp_lexcat_cbrace){
|
||||
--state.target;
|
||||
irjp_yacc_stack_pop(&state.target_stack);
|
||||
state.curr = state.curr->parent;
|
||||
}else{
|
||||
irjp_parse_error("Expected comma");
|
||||
@ -564,9 +618,9 @@ RJP_value* rjp_parse(const char* str){
|
||||
|
||||
case rjp_yacc_arr_comma:
|
||||
if(cat == rjp_lexcat_comma){
|
||||
*state.target = rjp_yacc_arr_value;
|
||||
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_value);
|
||||
}else if(cat == rjp_lexcat_cbracket){
|
||||
--state.target;
|
||||
irjp_yacc_stack_pop(&state.target_stack);
|
||||
state.curr = state.curr->parent;
|
||||
}else{
|
||||
irjp_parse_error("Expected comma");
|
||||
@ -578,9 +632,9 @@ RJP_value* rjp_parse(const char* str){
|
||||
irjp_parse_error("Excess data after end of JSON");
|
||||
};
|
||||
}
|
||||
if(state.target != state.target_stack)
|
||||
if(state.target_stack.position != 0)
|
||||
irjp_parse_error("Missing closing brace");
|
||||
rjp_free(state.target_stack);
|
||||
irjp_delete_yacc_stack(&state.target_stack);
|
||||
return state.root;
|
||||
}
|
||||
RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk){
|
||||
|
||||
@ -49,6 +49,7 @@ int main(){
|
||||
"+5.5e-6",
|
||||
" {}",
|
||||
"\n{}\n",
|
||||
" { \"key\" \t:\n\n\n5 \n\t\n } ",
|
||||
" {\t }\n",
|
||||
"5.5 ",
|
||||
"{\"key\":5}",
|
||||
@ -72,6 +73,12 @@ int main(){
|
||||
#endif
|
||||
#ifdef RJP_ENABLE_COMMENTS
|
||||
"//comment\n{}",
|
||||
"{\"key\"://comment\n5}",
|
||||
"{\"key\"//comment\n:5}",
|
||||
"{//\"key\":5\n}",
|
||||
"5 //comment*/",
|
||||
"{/*\"key\":5*/\"key\":5}",
|
||||
"[5, /*comment*/6]",
|
||||
#endif
|
||||
};
|
||||
const int should_pass_cnt = sizeof(should_pass_strings)/sizeof(should_pass_strings[0]);
|
||||
@ -85,6 +92,7 @@ int main(){
|
||||
"6.6e+",
|
||||
"{6}",
|
||||
"[\"key\":5]",
|
||||
"\"string\n\"",
|
||||
"[3 4]",
|
||||
"\"\\uD83D\\uDE1\"",
|
||||
"\"\\uD83D\\uDE1Q\"",
|
||||
@ -97,14 +105,27 @@ int main(){
|
||||
"[,]",
|
||||
"{,}",
|
||||
"[1, 2],",
|
||||
"{\"key\nkey\":5}",
|
||||
"{\"key\":\"key\n\"}",
|
||||
#ifndef RJP_ENABLE_TRAILING_COMMA
|
||||
"[6,7,]",
|
||||
"{\"1\":1,\"2\":2,}",
|
||||
"[6,]",
|
||||
"{\"1\":1,}",
|
||||
#endif
|
||||
"{//comment\"key\":\n5}",
|
||||
"{/*\"key\":*/5}",
|
||||
"[5, /*6*/, 7]",
|
||||
"{/*comment}",
|
||||
"{//comment}",
|
||||
#ifndef RJP_ENABLE_COMMENTS
|
||||
"//comment\n{}",
|
||||
"{\"key\"://comment\n5}",
|
||||
"{\"key\"//comment\n:5}",
|
||||
"{//\"key\":5\n}",
|
||||
"5 //comment*/",
|
||||
"{/*\"key\":5*/\"key\":5}",
|
||||
"[5, /*comment*/6]",
|
||||
#endif
|
||||
};
|
||||
const int should_fail_cnt = sizeof(should_fail_strings)/sizeof(should_fail_strings[0]);
|
||||
@ -115,15 +136,21 @@ int main(){
|
||||
fprintf(stderr, "Running %d tests that should pass...\n", should_pass_cnt);
|
||||
for(unsigned i = 0;i < sizeof(should_pass_strings)/sizeof(should_pass_strings[0]);++i){
|
||||
fprintf(stderr, "%8d) ", i+1);
|
||||
if(!test(should_pass_strings[i]))
|
||||
if(!test(should_pass_strings[i])){
|
||||
++passed;
|
||||
}else{
|
||||
fprintf(stderr, "%13s%s\n", "", should_pass_strings[i]);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
printf("Running %d tests that should fail...\n", should_fail_cnt);
|
||||
for(unsigned i = 0;i < sizeof(should_fail_strings)/sizeof(should_fail_strings[0]);++i){
|
||||
fprintf(stderr, "%8d) ", i+1);
|
||||
if(test(should_fail_strings[i]))
|
||||
if(test(should_fail_strings[i])){
|
||||
++passed;
|
||||
}else{
|
||||
fprintf(stderr, "%13s%s\n", "", should_fail_strings[i]);
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "\nResults: %d/%d tests passed\n", passed, total_tests);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user