Cleanup and comment a bit more of the source

This commit is contained in:
rexy712 2020-03-22 16:07:52 -07:00
parent d6213cfe1b
commit 32db7680d7
4 changed files with 69 additions and 42 deletions

View File

@ -31,6 +31,8 @@ extern "C"{
# else
# define DEPRECATED(str)
# endif
# else
# define DEPRECATED(str)
# endif
#else
# if __STDC_VERSION__ > 201710L
@ -63,11 +65,12 @@ typedef enum RJP_format_flag{
RJP_FORMAT_PRETTY = 1
}RJP_format_flag;
//used with rjp_parse
typedef enum RJP_parse_flag{
RJP_PARSE_NONE = 0,
RJP_PARSE_ALLOW_COMMENTS = 1,
RJP_PARSE_ALLOW_TRAILING_COMMA = 2,
RJP_PARSE_ALL_EXT = 3
RJP_PARSE_ALL_EXT = RJP_PARSE_ALLOW_COMMENTS | RJP_PARSE_ALLOW_TRAILING_COMMA
}RJP_parse_flag;
//type of data
@ -130,6 +133,7 @@ RJP_string rjp_escape(const char* src);
/***************** GENERIC OPERATIONS *******************/
//Convert C string consisting of json data into RJP's format
RJP_value* rjp_parse(const char* str, int flags);
//Read json data in using a user supplied callback and convert it to RJP's format
RJP_value* rjp_parse_cback(int flags, RJP_parse_callback* cbacks);
char* rjp_to_json(const RJP_value* root, int pretty);

View File

@ -25,6 +25,7 @@
#define RJP_LEX_CBACK_STR_SIZE 64
#define rjp_lex_accept 1
//DFA states. odd numbers are accepting states
typedef enum RJP_lex_category{
rjp_lex_start = 0,
rjp_lex_obracket = 3,
@ -69,15 +70,15 @@ typedef enum RJP_lex_category{
}RJP_lex_category;
typedef struct RJP_lex_state{
char* str;
char* buff;
RJP_index strl;
RJP_index buffl;
RJP_index buffcap;
RJP_index buffpos;
RJP_lex_category node;
RJP_index length;
RJP_index offset;
char* str; //must hold value parser will use to create tokens. eg contents of strings
char* buff; //holds temporary data in callback based lexer
RJP_index strcap; //capacity of str. used in callback lexer
RJP_index buffl; //length of buff currently in use. used in callback lexer
RJP_index buffcap; //capacity of buff. used in callback lexer
RJP_index buffpos; //current position in buff being read. used in callback lexer
RJP_lex_category node; //tracks current dfa state
RJP_index length; //length of current token which parser will utilize
RJP_index offset; //offset in the str buffer that the parser should start from. must be 0 in callback lexer
}RJP_lex_state;
void irjp_init_lex_cback_state(RJP_lex_state* state);

View File

@ -24,7 +24,7 @@
void irjp_init_lex_cback_state(RJP_lex_state* state){
state->str = rjp_alloc(RJP_LEX_CBACK_STR_SIZE+1);
state->str[RJP_LEX_CBACK_STR_SIZE] = 0;
state->strl = RJP_LEX_CBACK_STR_SIZE;
state->strcap = RJP_LEX_CBACK_STR_SIZE;
state->buff = rjp_alloc(RJP_LEX_CBACK_BUFFER_SIZE);
state->buffcap = RJP_LEX_CBACK_BUFFER_SIZE;
}
@ -292,6 +292,9 @@ static RJP_lex_category irjp_lex_char(char ch, RJP_lex_category node){
};
return node;
}
//straight forward lex. All json data in single string
//use state->str as constant string. index into it using state->offset and
//state->length to acquire tokens in the parser
RJP_lex_category irjp_lex(RJP_lex_state* state){
state->offset += state->length;
state->length = 0;
@ -308,26 +311,36 @@ RJP_lex_category irjp_lex(RJP_lex_state* state){
static void irjp_lex_resize_strbuf(RJP_lex_state* state, int newsize){
char* newbuf = rjp_alloc(newsize+1);
memcpy(newbuf, state->str, state->strl);
memcpy(newbuf, state->str, state->strcap);
newbuf[newsize] = 0;
rjp_free(state->str);
state->str = newbuf;
state->strl = newsize;
state->strcap = newsize;
}
//user callback based lexer. Not all json data is available at one time.
//Tokens need saved in a secondary buffer for the parser to have access.
//state->str is where the secondary buffer is located and state->length
//is used to track its size. state->offset MUST be 0 for parser to get proper
//token values
RJP_lex_category irjp_lex_cback(RJP_lex_state* state, RJP_parse_callback* cbacks){
state->length = 0;
//pick up from previous invocation
RJP_index chars_read = state->buffl;
if(chars_read == 0){
state->buffpos = 0;
chars_read = cbacks->read(state->buff, state->buffcap, cbacks->data);
state->buffl = chars_read;
}
//loop until callback returns 0 new chars
while(chars_read > 0){
//loop over all characters in current buffer
for(RJP_index i = 0;(i+state->buffpos) < chars_read;++i,++state->length){
if(state->length == state->strl)
irjp_lex_resize_strbuf(state, state->strl*2);
if(state->length == state->strcap) //need more space to store lex token
irjp_lex_resize_strbuf(state, state->strcap*2);
RJP_lex_category cat = irjp_lex_char(state->buff[state->buffpos+i], state->node);
if(cat == rjp_lex_invalid){
//save necessary state and return previous state
state->buffpos = i + state->buffpos;
state->str[state->length] = 0;
return irjp_lex_accept(state->node, state);
@ -335,10 +348,12 @@ RJP_lex_category irjp_lex_cback(RJP_lex_state* state, RJP_parse_callback* cbacks
state->str[state->length] = state->buff[state->buffpos+i];
state->node = cat;
}
//read new values into buffer, reset buffer related state
chars_read = cbacks->read(state->buff, state->buffcap, cbacks->data);
state->buffpos = 0;
state->buffl = chars_read;
}
//lexing cannot continue due to lack of input
++state->buffpos;
state->str[state->length] = 0;
RJP_lex_category cat = state->node;

View File

@ -29,6 +29,9 @@
#define RJP_INITIAL_PARSE_DEPTH 16
#define RJP_PARSE_STATUS_ERR 1
#define RJP_PARSE_STATUS_SUC 2
typedef enum RJP_parse_target{
rjp_parse_end,
@ -173,13 +176,10 @@ static void irjp_delete_parse_state(RJP_parse_state* state){
state->root = NULL;
}
#define RJP_PARSE_STATUS_ERR 1
#define RJP_PARSE_STATUS_SUC 2
#define irjp_parse_error(str) do{ \
DIAG_PRINT(stderr, "%s: %d:%d\n", str, state->column, state->row); \
return RJP_PARSE_STATUS_ERR; \
}while(0)
static inline int irjp_parse_error(RJP_parse_state* state, const char* str){
DIAG_PRINT(stderr, "%s: %d:%d\n", str, state->column, state->row);
return RJP_PARSE_STATUS_ERR;
}
static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state){
if(cat == rjp_lex_line_comment || cat == rjp_lex_block_comment)
@ -193,14 +193,14 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
return RJP_PARSE_STATUS_SUC;
}
if(cat == rjp_lex_invalid)
irjp_parse_error("Invalid token");
return irjp_parse_error(state, "Invalid token");
switch(irjp_parse_stack_current(&state->target_stack)){
case rjp_parse_start:
irjp_parse_stack_set(&state->target_stack, rjp_parse_end);
if(irjp_init_value(state->root, cat, state)){
irjp_parse_error("Expected value");
return irjp_parse_error(state, "Expected value");
}
break;
case rjp_parse_first_mem_key:
@ -213,10 +213,10 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
if(cat == rjp_lex_string){
irjp_parse_stack_set(&state->target_stack, rjp_parse_key_colon);
if(!irjp_add_value_to_object(state, state->lexstate.str+state->lexstate.offset, state->lexstate.length)){
irjp_parse_error("Expected member key");
return irjp_parse_error(state, "Expected member key");
}
}else{
irjp_parse_error("Expected member key");
return irjp_parse_error(state, "Expected member key");
}
}
break;
@ -229,19 +229,19 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
case rjp_parse_arr_value:
irjp_parse_stack_set(&state->target_stack, rjp_parse_arr_comma);
if(!irjp_add_value_to_array(cat, state))
irjp_parse_error("Expected value");
return irjp_parse_error(state, "Expected value");
}
break;
case rjp_parse_key_colon:
if(cat != rjp_lex_colon)
irjp_parse_error("Expected member key");
return irjp_parse_error(state, "Expected member key");
irjp_parse_stack_set(&state->target_stack, rjp_parse_obj_value);
break;
case rjp_parse_obj_value:
irjp_parse_stack_set(&state->target_stack, rjp_parse_obj_comma);
if(irjp_init_value(state->lastadded, cat, state)){
irjp_parse_error("Expected value");
return irjp_parse_error(state, "Expected value");
}
break;
case rjp_parse_obj_comma:
@ -251,7 +251,7 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
irjp_parse_stack_pop(&state->target_stack);
state->curr = state->curr->parent;
}else{
irjp_parse_error("Expected comma");
return irjp_parse_error(state, "Expected comma");
}
break;
@ -262,39 +262,44 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
irjp_parse_stack_pop(&state->target_stack);
state->curr = state->curr->parent;
}else{
irjp_parse_error("Expected comma");
return irjp_parse_error(state, "Expected comma");
}
break;
case rjp_parse_end:
if(state->lexstate.str[state->lexstate.offset] != 0)
irjp_parse_error("Excess data after end of JSON");
return irjp_parse_error(state, "Excess data after end of JSON");
};
return RJP_PARSE_STATUS_SUC;
}
//Handle the final token returned by the lexer. rjp_lex_end is a nonaccepting state to break the
//parse loop. it is a successful state though as it just indicates end of input.
static int irjp_handle_final_parse_token(RJP_parse_state* state, RJP_lex_category cat){
if(state->target_stack.position != 0)
return irjp_parse_error(state, "Missing closing brace");
if(cat == rjp_lex_end)
return RJP_PARSE_STATUS_SUC;
return irjp_parse_error(state, "Invalid Token");
}
//Basic parse loop
static int irjp_parse(RJP_parse_state* state){
RJP_lex_category cat;
for(cat = irjp_lex(&state->lexstate);cat & rjp_lex_accept;cat = irjp_lex(&state->lexstate),state->row += state->lexstate.length){
if(irjp_parse_handle_lexcat(cat, state) != RJP_PARSE_STATUS_SUC)
return RJP_PARSE_STATUS_ERR;
}
if(state->target_stack.position != 0)
irjp_parse_error("Missing closing brace");
if(cat == rjp_lex_end)
return RJP_PARSE_STATUS_SUC;
irjp_parse_error("Invalid Token");
return irjp_handle_final_parse_token(state, cat);
}
//Callback parse loop
static int irjp_parse_cback(RJP_parse_state* state, RJP_parse_callback* cback){
RJP_lex_category cat;
for(cat = irjp_lex_cback(&state->lexstate, cback);cat & rjp_lex_accept;cat = irjp_lex_cback(&state->lexstate, cback),state->row += state->lexstate.length){
if(irjp_parse_handle_lexcat(cat, state) != RJP_PARSE_STATUS_SUC)
return RJP_PARSE_STATUS_ERR;
}
if(state->target_stack.position != 0)
irjp_parse_error("Missing closing brace");
if(cat == rjp_lex_end)
return RJP_PARSE_STATUS_SUC;
irjp_parse_error("Invalid Token");
return irjp_handle_final_parse_token(state, cat);
}
RJP_value* rjp_parse(const char* str, int flags){
RJP_parse_state state = {.allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS),
@ -311,6 +316,8 @@ RJP_value* rjp_parse(const char* str, int flags){
}
}
//Callback based parse. Runs identical to normal parsing except sets up callback
//lex state and calls callback lex function
RJP_value* rjp_parse_cback(int flags, RJP_parse_callback* cback){
RJP_parse_state state = {.allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS),
.allow_trail_comma = (flags & RJP_PARSE_ALLOW_TRAILING_COMMA)