Add block comments and make RJP_yacc_target stack dynamically resize

This commit is contained in:
rexy712 2020-03-10 12:56:17 -07:00
parent d6a5d3009b
commit bfd1777fef
3 changed files with 116 additions and 37 deletions

2
TODO
View File

@ -1,4 +1,2 @@
Implement chunked reading
Block comments. ie /**/
Dynamic resizing of parse state stack
Runtime setting for comma/comment support

View File

@ -25,8 +25,9 @@
#include "memory.h"
#include <stdlib.h> //strtod, strtol
#include <ctype.h> //isalpha, etc
#include <string.h> //memcpy
#define MAX_DEPTH 16
#define RJP_INITIAL_PARSE_DEPTH 16
#define rjp_lex_accept 1
typedef enum RJP_lex_node{
@ -60,10 +61,13 @@ typedef enum RJP_lex_node{
rjp_lex_fnum_e = 44,
rjp_lex_sci_num = 45,
rjp_lex_slash = 46,
rjp_lex_comment = 47,
rjp_lex_line_comment = 47,
rjp_lex_signed_number = 49,
rjp_lex_sci_num_signed = 51,
rjp_lex_newlines = 53,
rjp_lex_block_comment = 55,
rjp_lex_block_comment_end1 = 56,
rjp_lex_block_comment_end2 = 57,
rjp_lex_invalid = 1000,
rjp_lex_unrecognized_word = 1002
}RJP_lex_node;
@ -105,9 +109,13 @@ typedef struct RJP_lex_state{
RJP_index length;
RJP_index offset;
}RJP_lex_state;
typedef struct RJP_yacc_stack{
RJP_yacc_target* stack;
RJP_index position;
RJP_index size;
}RJP_yacc_stack;
typedef struct RJP_yacc_state{
RJP_yacc_target* target_stack;
RJP_yacc_target* target;
RJP_yacc_stack target_stack;
RJP_value* root;
RJP_value* curr;
RJP_value* lastadded;
@ -115,6 +123,37 @@ typedef struct RJP_yacc_state{
int row, column;
}RJP_yacc_state;
static void irjp_init_yacc_stack(RJP_yacc_stack* s){
s->size = RJP_INITIAL_PARSE_DEPTH;
s->stack = rjp_alloc(sizeof(RJP_yacc_target)*s->size);
s->position = 0;
s->stack[0] = rjp_yacc_end;
}
static void irjp_delete_yacc_stack(RJP_yacc_stack* s){
rjp_free(s->stack);
}
static void irjp_resize_yacc_stack(RJP_yacc_stack* s, RJP_index newsize){
RJP_yacc_target* newstack = rjp_alloc(sizeof(RJP_yacc_target) * newsize);
memcpy(newstack, s->stack, s->size*sizeof(RJP_yacc_target));
rjp_free(s->stack);
s->stack = newstack;
s->size = newsize;
}
static void irjp_yacc_stack_push(RJP_yacc_stack* s, RJP_yacc_target target){
if((s->position+1) == s->size)
irjp_resize_yacc_stack(s, s->size*2);
s->stack[++s->position] = target;
}
static RJP_yacc_target irjp_yacc_stack_pop(RJP_yacc_stack* s){
return s->stack[s->position--];
}
static RJP_yacc_target irjp_yacc_stack_current(RJP_yacc_stack* s){
return s->stack[s->position];
}
static void irjp_yacc_stack_set(RJP_yacc_stack* s, RJP_yacc_target target){
s->stack[s->position] = target;
}
static RJP_lex_category irjp_lex_accept(RJP_lex_category val, RJP_lex_state* state){
state->node = rjp_lex_start;
return val;
@ -246,6 +285,8 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){
case rjp_lex_quote:
if(ch == '\\')
state->node = rjp_lex_escaped;
else if(ch == '\n' || ch == '\r')
state->node = rjp_lex_invalid;
else if(ch == '"')
state->node = rjp_lex_string;
break;
@ -259,16 +300,30 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){
//comments
case rjp_lex_slash:
if(ch == '/')
state->node = rjp_lex_comment;
state->node = rjp_lex_line_comment;
else if(ch == '*')
state->node = rjp_lex_block_comment;
else
state->node = rjp_lex_invalid;
break;
case rjp_lex_comment:
case rjp_lex_line_comment:
if(ch == '\n' || ch == '\r')
state->node = rjp_lex_newlines;
else if(ch == 0)
return irjp_lex_accept(rjp_lexcat_space, state);
break;
case rjp_lex_block_comment:
if(ch == '*')
state->node = rjp_lex_block_comment_end1;
break;
case rjp_lex_block_comment_end1:
if(ch == '/')
state->node = rjp_lex_block_comment_end2;
else
state->node = rjp_lex_block_comment;
break;
case rjp_lex_block_comment_end2:
return irjp_lex_accept(rjp_lexcat_space, state);
#endif
//true
@ -403,14 +458,12 @@ static int irjp_init_value(RJP_value* newval, RJP_lex_category cat, RJP_yacc_sta
break;
case rjp_lexcat_obrace:
newval->type = rjp_json_object;
++(state->target);
*(state->target) = rjp_yacc_first_mem_key;
irjp_yacc_stack_push(&state->target_stack, rjp_yacc_first_mem_key);
state->curr = state->lastadded;
break;
case rjp_lexcat_obracket:
newval->type = rjp_json_array;
++(state->target);
*(state->target) = rjp_yacc_arr_first_value;
irjp_yacc_stack_push(&state->target_stack, rjp_yacc_arr_first_value);
state->curr = state->lastadded;
break;
default:
@ -436,16 +489,17 @@ static RJP_value* irjp_add_value_to_object(RJP_yacc_state* state, const char* ke
#define irjp_parse_error(str) do{ \
DIAG_PRINT(stderr, "%s: %d:%d\n", str, state.column, state.row); \
rjp_free(state.target_stack); \
irjp_delete_yacc_stack(&state.target_stack); \
rjp_free_value(state.root); \
return NULL; \
}while(0)
RJP_value* rjp_parse(const char* str){
RJP_yacc_state state = {0};
state.target_stack = rjp_alloc(sizeof(RJP_yacc_target)*RJP_MAX_PARSE_DEPTH);
state.target = state.target_stack;
*(state.target) = rjp_yacc_end;
state.column = 1;
state.row = 1;
irjp_init_yacc_stack(&state.target_stack);
state.lexstate.str = str;
state.root = state.curr = state.lastadded = rjp_calloc(1, sizeof(RJP_value));
@ -455,7 +509,7 @@ RJP_value* rjp_parse(const char* str){
if(cat == rjp_lexcat_newline){
++state.column;
state.row = 0;
state.row = 1;
}else if(cat == rjp_lexcat_space){
state.row += state.lexstate.length;
}else{
@ -475,21 +529,21 @@ RJP_value* rjp_parse(const char* str){
if(cat == rjp_lexcat_space)
continue;
if(cat == rjp_lexcat_newline){
state.row = 0;
state.row = 1;
++state.column;
continue;
}
switch(*state.target){
switch(irjp_yacc_stack_current(&state.target_stack)){
#ifdef RJP_ENABLE_TRAILING_COMMA
case rjp_yacc_first_mem_key:
case rjp_yacc_mem_key:
if(cat == rjp_lexcat_cbrace){
--state.target;
irjp_yacc_stack_pop(&state.target_stack);
state.curr = state.curr->parent;
}else if(cat == rjp_lexcat_string){
*state.target = rjp_yacc_key_colon;
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_key_colon);
if(!irjp_add_value_to_object(&state, state.lexstate.str+state.lexstate.offset, state.lexstate.length)){
irjp_parse_error("Expected member key");
}
@ -500,10 +554,10 @@ RJP_value* rjp_parse(const char* str){
case rjp_yacc_arr_first_value:
case rjp_yacc_arr_value:
if(cat == rjp_lexcat_cbracket){
--state.target;
irjp_yacc_stack_pop(&state.target_stack);
state->curr = state.curr->parent;
}else{
*state.target = rjp_yacc_arr_comma;
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_comma);
if(!irjp_add_value_to_array(cat, &state))
irjp_parse_error("Expected value");
}
@ -511,13 +565,13 @@ RJP_value* rjp_parse(const char* str){
#else
case rjp_yacc_first_mem_key:
if(cat == rjp_lexcat_cbrace){
--state.target;
irjp_yacc_stack_pop(&state.target_stack);
state.curr = state.curr->parent;
}else{
//fallthrough
case rjp_yacc_mem_key:
if(cat == rjp_lexcat_string){
*state.target = rjp_yacc_key_colon;
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_key_colon);
if(!irjp_add_value_to_object(&state, state.lexstate.str+state.lexstate.offset, state.lexstate.length)){
irjp_parse_error("Expected member key");
}
@ -528,12 +582,12 @@ RJP_value* rjp_parse(const char* str){
break;
case rjp_yacc_arr_first_value:
if(cat == rjp_lexcat_cbracket){
--state.target;
irjp_yacc_stack_pop(&state.target_stack);
state.curr = state.curr->parent;
}else{
//fallthrough
case rjp_yacc_arr_value:
*state.target = rjp_yacc_arr_comma;
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_comma);
if(!irjp_add_value_to_array(cat, &state))
irjp_parse_error("Expected value");
}
@ -543,19 +597,19 @@ RJP_value* rjp_parse(const char* str){
case rjp_yacc_key_colon:
if(cat != rjp_lexcat_colon)
irjp_parse_error("Expected member key");
*state.target = rjp_yacc_obj_value;
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_obj_value);
break;
case rjp_yacc_obj_value:
*state.target = rjp_yacc_obj_comma;
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_obj_comma);
if(irjp_init_value(state.lastadded, cat, &state)){
irjp_parse_error("Expected value");
}
break;
case rjp_yacc_obj_comma:
if(cat == rjp_lexcat_comma){
*state.target = rjp_yacc_mem_key;
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_mem_key);
}else if(cat == rjp_lexcat_cbrace){
--state.target;
irjp_yacc_stack_pop(&state.target_stack);
state.curr = state.curr->parent;
}else{
irjp_parse_error("Expected comma");
@ -564,9 +618,9 @@ RJP_value* rjp_parse(const char* str){
case rjp_yacc_arr_comma:
if(cat == rjp_lexcat_comma){
*state.target = rjp_yacc_arr_value;
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_value);
}else if(cat == rjp_lexcat_cbracket){
--state.target;
irjp_yacc_stack_pop(&state.target_stack);
state.curr = state.curr->parent;
}else{
irjp_parse_error("Expected comma");
@ -578,9 +632,9 @@ RJP_value* rjp_parse(const char* str){
irjp_parse_error("Excess data after end of JSON");
};
}
if(state.target != state.target_stack)
if(state.target_stack.position != 0)
irjp_parse_error("Missing closing brace");
rjp_free(state.target_stack);
irjp_delete_yacc_stack(&state.target_stack);
return state.root;
}
RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk){

View File

@ -49,6 +49,7 @@ int main(){
"+5.5e-6",
" {}",
"\n{}\n",
" { \"key\" \t:\n\n\n5 \n\t\n } ",
" {\t }\n",
"5.5 ",
"{\"key\":5}",
@ -72,6 +73,12 @@ int main(){
#endif
#ifdef RJP_ENABLE_COMMENTS
"//comment\n{}",
"{\"key\"://comment\n5}",
"{\"key\"//comment\n:5}",
"{//\"key\":5\n}",
"5 //comment*/",
"{/*\"key\":5*/\"key\":5}",
"[5, /*comment*/6]",
#endif
};
const int should_pass_cnt = sizeof(should_pass_strings)/sizeof(should_pass_strings[0]);
@ -85,6 +92,7 @@ int main(){
"6.6e+",
"{6}",
"[\"key\":5]",
"\"string\n\"",
"[3 4]",
"\"\\uD83D\\uDE1\"",
"\"\\uD83D\\uDE1Q\"",
@ -97,14 +105,27 @@ int main(){
"[,]",
"{,}",
"[1, 2],",
"{\"key\nkey\":5}",
"{\"key\":\"key\n\"}",
#ifndef RJP_ENABLE_TRAILING_COMMA
"[6,7,]",
"{\"1\":1,\"2\":2,}",
"[6,]",
"{\"1\":1,}",
#endif
"{//comment\"key\":\n5}",
"{/*\"key\":*/5}",
"[5, /*6*/, 7]",
"{/*comment}",
"{//comment}",
#ifndef RJP_ENABLE_COMMENTS
"//comment\n{}",
"{\"key\"://comment\n5}",
"{\"key\"//comment\n:5}",
"{//\"key\":5\n}",
"5 //comment*/",
"{/*\"key\":5*/\"key\":5}",
"[5, /*comment*/6]",
#endif
};
const int should_fail_cnt = sizeof(should_fail_strings)/sizeof(should_fail_strings[0]);
@ -115,15 +136,21 @@ int main(){
fprintf(stderr, "Running %d tests that should pass...\n", should_pass_cnt);
for(unsigned i = 0;i < sizeof(should_pass_strings)/sizeof(should_pass_strings[0]);++i){
fprintf(stderr, "%8d) ", i+1);
if(!test(should_pass_strings[i]))
if(!test(should_pass_strings[i])){
++passed;
}else{
fprintf(stderr, "%13s%s\n", "", should_pass_strings[i]);
}
}
fprintf(stderr, "\n");
printf("Running %d tests that should fail...\n", should_fail_cnt);
for(unsigned i = 0;i < sizeof(should_fail_strings)/sizeof(should_fail_strings[0]);++i){
fprintf(stderr, "%8d) ", i+1);
if(test(should_fail_strings[i]))
if(test(should_fail_strings[i])){
++passed;
}else{
fprintf(stderr, "%13s%s\n", "", should_fail_strings[i]);
}
}
fprintf(stderr, "\nResults: %d/%d tests passed\n", passed, total_tests);
}