From 32db7680d789fabac747b17f11cdfadac342e72e Mon Sep 17 00:00:00 2001 From: rexy712 Date: Sun, 22 Mar 2020 16:07:52 -0700 Subject: [PATCH] Cleanup and comment a bit more of the source --- include/rjp.h | 6 ++++- include/rjp_lex.h | 19 ++++++++------- src/rjp_lex.c | 25 +++++++++++++++---- src/rjp_parse.c | 61 ++++++++++++++++++++++++++--------------------- 4 files changed, 69 insertions(+), 42 deletions(-) diff --git a/include/rjp.h b/include/rjp.h index ba2dd7f..73447b0 100644 --- a/include/rjp.h +++ b/include/rjp.h @@ -31,6 +31,8 @@ extern "C"{ # else # define DEPRECATED(str) # endif +# else +# define DEPRECATED(str) # endif #else # if __STDC_VERSION__ > 201710L @@ -63,11 +65,12 @@ typedef enum RJP_format_flag{ RJP_FORMAT_PRETTY = 1 }RJP_format_flag; +//used with rjp_parse typedef enum RJP_parse_flag{ RJP_PARSE_NONE = 0, RJP_PARSE_ALLOW_COMMENTS = 1, RJP_PARSE_ALLOW_TRAILING_COMMA = 2, - RJP_PARSE_ALL_EXT = 3 + RJP_PARSE_ALL_EXT = RJP_PARSE_ALLOW_COMMENTS | RJP_PARSE_ALLOW_TRAILING_COMMA }RJP_parse_flag; //type of data @@ -130,6 +133,7 @@ RJP_string rjp_escape(const char* src); /***************** GENERIC OPERATIONS *******************/ //Convert C string consisting of json data into RJP's format RJP_value* rjp_parse(const char* str, int flags); +//Read json data in using a user supplied callback and convert it to RJP's format RJP_value* rjp_parse_cback(int flags, RJP_parse_callback* cbacks); char* rjp_to_json(const RJP_value* root, int pretty); diff --git a/include/rjp_lex.h b/include/rjp_lex.h index 92ee81e..7baa4d5 100644 --- a/include/rjp_lex.h +++ b/include/rjp_lex.h @@ -25,6 +25,7 @@ #define RJP_LEX_CBACK_STR_SIZE 64 #define rjp_lex_accept 1 +//DFA states. odd numbers are accepting states typedef enum RJP_lex_category{ rjp_lex_start = 0, rjp_lex_obracket = 3, @@ -69,15 +70,15 @@ typedef enum RJP_lex_category{ }RJP_lex_category; typedef struct RJP_lex_state{ - char* str; - char* buff; - RJP_index strl; - RJP_index buffl; - RJP_index buffcap; - RJP_index buffpos; - RJP_lex_category node; - RJP_index length; - RJP_index offset; + char* str; //must hold value parser will use to create tokens. eg contents of strings + char* buff; //holds temporary data in callback based lexer + RJP_index strcap; //capacity of str. used in callback lexer + RJP_index buffl; //length of buff currently in use. used in callback lexer + RJP_index buffcap; //capacity of buff. used in callback lexer + RJP_index buffpos; //current position in buff being read. used in callback lexer + RJP_lex_category node; //tracks current dfa state + RJP_index length; //length of current token which parser will utilize + RJP_index offset; //offset in the str buffer that the parser should start from. must be 0 in callback lexer }RJP_lex_state; void irjp_init_lex_cback_state(RJP_lex_state* state); diff --git a/src/rjp_lex.c b/src/rjp_lex.c index 1357a15..e2e91d0 100644 --- a/src/rjp_lex.c +++ b/src/rjp_lex.c @@ -24,7 +24,7 @@ void irjp_init_lex_cback_state(RJP_lex_state* state){ state->str = rjp_alloc(RJP_LEX_CBACK_STR_SIZE+1); state->str[RJP_LEX_CBACK_STR_SIZE] = 0; - state->strl = RJP_LEX_CBACK_STR_SIZE; + state->strcap = RJP_LEX_CBACK_STR_SIZE; state->buff = rjp_alloc(RJP_LEX_CBACK_BUFFER_SIZE); state->buffcap = RJP_LEX_CBACK_BUFFER_SIZE; } @@ -292,6 +292,9 @@ static RJP_lex_category irjp_lex_char(char ch, RJP_lex_category node){ }; return node; } +//straight forward lex. All json data in single string +//use state->str as constant string. index into it using state->offset and +//state->length to acquire tokens in the parser RJP_lex_category irjp_lex(RJP_lex_state* state){ state->offset += state->length; state->length = 0; @@ -308,26 +311,36 @@ RJP_lex_category irjp_lex(RJP_lex_state* state){ static void irjp_lex_resize_strbuf(RJP_lex_state* state, int newsize){ char* newbuf = rjp_alloc(newsize+1); - memcpy(newbuf, state->str, state->strl); + memcpy(newbuf, state->str, state->strcap); newbuf[newsize] = 0; rjp_free(state->str); state->str = newbuf; - state->strl = newsize; + state->strcap = newsize; } +//user callback based lexer. Not all json data is available at one time. +//Tokens need saved in a secondary buffer for the parser to have access. +//state->str is where the secondary buffer is located and state->length +//is used to track its size. state->offset MUST be 0 for parser to get proper +//token values RJP_lex_category irjp_lex_cback(RJP_lex_state* state, RJP_parse_callback* cbacks){ state->length = 0; + //pick up from previous invocation RJP_index chars_read = state->buffl; if(chars_read == 0){ state->buffpos = 0; chars_read = cbacks->read(state->buff, state->buffcap, cbacks->data); state->buffl = chars_read; } + + //loop until callback returns 0 new chars while(chars_read > 0){ + //loop over all characters in current buffer for(RJP_index i = 0;(i+state->buffpos) < chars_read;++i,++state->length){ - if(state->length == state->strl) - irjp_lex_resize_strbuf(state, state->strl*2); + if(state->length == state->strcap) //need more space to store lex token + irjp_lex_resize_strbuf(state, state->strcap*2); RJP_lex_category cat = irjp_lex_char(state->buff[state->buffpos+i], state->node); if(cat == rjp_lex_invalid){ + //save necessary state and return previous state state->buffpos = i + state->buffpos; state->str[state->length] = 0; return irjp_lex_accept(state->node, state); @@ -335,10 +348,12 @@ RJP_lex_category irjp_lex_cback(RJP_lex_state* state, RJP_parse_callback* cbacks state->str[state->length] = state->buff[state->buffpos+i]; state->node = cat; } + //read new values into buffer, reset buffer related state chars_read = cbacks->read(state->buff, state->buffcap, cbacks->data); state->buffpos = 0; state->buffl = chars_read; } + //lexing cannot continue due to lack of input ++state->buffpos; state->str[state->length] = 0; RJP_lex_category cat = state->node; diff --git a/src/rjp_parse.c b/src/rjp_parse.c index 01a9c85..9c95ac8 100644 --- a/src/rjp_parse.c +++ b/src/rjp_parse.c @@ -29,6 +29,9 @@ #define RJP_INITIAL_PARSE_DEPTH 16 +#define RJP_PARSE_STATUS_ERR 1 +#define RJP_PARSE_STATUS_SUC 2 + typedef enum RJP_parse_target{ rjp_parse_end, @@ -173,13 +176,10 @@ static void irjp_delete_parse_state(RJP_parse_state* state){ state->root = NULL; } -#define RJP_PARSE_STATUS_ERR 1 -#define RJP_PARSE_STATUS_SUC 2 - -#define irjp_parse_error(str) do{ \ - DIAG_PRINT(stderr, "%s: %d:%d\n", str, state->column, state->row); \ - return RJP_PARSE_STATUS_ERR; \ - }while(0) +static inline int irjp_parse_error(RJP_parse_state* state, const char* str){ + DIAG_PRINT(stderr, "%s: %d:%d\n", str, state->column, state->row); + return RJP_PARSE_STATUS_ERR; +} static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state){ if(cat == rjp_lex_line_comment || cat == rjp_lex_block_comment) @@ -193,14 +193,14 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state return RJP_PARSE_STATUS_SUC; } if(cat == rjp_lex_invalid) - irjp_parse_error("Invalid token"); + return irjp_parse_error(state, "Invalid token"); switch(irjp_parse_stack_current(&state->target_stack)){ case rjp_parse_start: irjp_parse_stack_set(&state->target_stack, rjp_parse_end); if(irjp_init_value(state->root, cat, state)){ - irjp_parse_error("Expected value"); + return irjp_parse_error(state, "Expected value"); } break; case rjp_parse_first_mem_key: @@ -213,10 +213,10 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state if(cat == rjp_lex_string){ irjp_parse_stack_set(&state->target_stack, rjp_parse_key_colon); if(!irjp_add_value_to_object(state, state->lexstate.str+state->lexstate.offset, state->lexstate.length)){ - irjp_parse_error("Expected member key"); + return irjp_parse_error(state, "Expected member key"); } }else{ - irjp_parse_error("Expected member key"); + return irjp_parse_error(state, "Expected member key"); } } break; @@ -229,19 +229,19 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state case rjp_parse_arr_value: irjp_parse_stack_set(&state->target_stack, rjp_parse_arr_comma); if(!irjp_add_value_to_array(cat, state)) - irjp_parse_error("Expected value"); + return irjp_parse_error(state, "Expected value"); } break; case rjp_parse_key_colon: if(cat != rjp_lex_colon) - irjp_parse_error("Expected member key"); + return irjp_parse_error(state, "Expected member key"); irjp_parse_stack_set(&state->target_stack, rjp_parse_obj_value); break; case rjp_parse_obj_value: irjp_parse_stack_set(&state->target_stack, rjp_parse_obj_comma); if(irjp_init_value(state->lastadded, cat, state)){ - irjp_parse_error("Expected value"); + return irjp_parse_error(state, "Expected value"); } break; case rjp_parse_obj_comma: @@ -251,7 +251,7 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state irjp_parse_stack_pop(&state->target_stack); state->curr = state->curr->parent; }else{ - irjp_parse_error("Expected comma"); + return irjp_parse_error(state, "Expected comma"); } break; @@ -262,39 +262,44 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state irjp_parse_stack_pop(&state->target_stack); state->curr = state->curr->parent; }else{ - irjp_parse_error("Expected comma"); + return irjp_parse_error(state, "Expected comma"); } break; case rjp_parse_end: if(state->lexstate.str[state->lexstate.offset] != 0) - irjp_parse_error("Excess data after end of JSON"); + return irjp_parse_error(state, "Excess data after end of JSON"); }; return RJP_PARSE_STATUS_SUC; } +//Handle the final token returned by the lexer. rjp_lex_end is a nonaccepting state to break the +//parse loop. it is a successful state though as it just indicates end of input. +static int irjp_handle_final_parse_token(RJP_parse_state* state, RJP_lex_category cat){ + if(state->target_stack.position != 0) + return irjp_parse_error(state, "Missing closing brace"); + if(cat == rjp_lex_end) + return RJP_PARSE_STATUS_SUC; + return irjp_parse_error(state, "Invalid Token"); +} + +//Basic parse loop static int irjp_parse(RJP_parse_state* state){ RJP_lex_category cat; for(cat = irjp_lex(&state->lexstate);cat & rjp_lex_accept;cat = irjp_lex(&state->lexstate),state->row += state->lexstate.length){ if(irjp_parse_handle_lexcat(cat, state) != RJP_PARSE_STATUS_SUC) return RJP_PARSE_STATUS_ERR; } - if(state->target_stack.position != 0) - irjp_parse_error("Missing closing brace"); - if(cat == rjp_lex_end) - return RJP_PARSE_STATUS_SUC; - irjp_parse_error("Invalid Token"); + return irjp_handle_final_parse_token(state, cat); } + +//Callback parse loop static int irjp_parse_cback(RJP_parse_state* state, RJP_parse_callback* cback){ RJP_lex_category cat; for(cat = irjp_lex_cback(&state->lexstate, cback);cat & rjp_lex_accept;cat = irjp_lex_cback(&state->lexstate, cback),state->row += state->lexstate.length){ if(irjp_parse_handle_lexcat(cat, state) != RJP_PARSE_STATUS_SUC) return RJP_PARSE_STATUS_ERR; } - if(state->target_stack.position != 0) - irjp_parse_error("Missing closing brace"); - if(cat == rjp_lex_end) - return RJP_PARSE_STATUS_SUC; - irjp_parse_error("Invalid Token"); + return irjp_handle_final_parse_token(state, cat); } RJP_value* rjp_parse(const char* str, int flags){ RJP_parse_state state = {.allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS), @@ -311,6 +316,8 @@ RJP_value* rjp_parse(const char* str, int flags){ } } +//Callback based parse. Runs identical to normal parsing except sets up callback +//lex state and calls callback lex function RJP_value* rjp_parse_cback(int flags, RJP_parse_callback* cback){ RJP_parse_state state = {.allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS), .allow_trail_comma = (flags & RJP_PARSE_ALLOW_TRAILING_COMMA)