Cleanup and comment a bit more of the source
This commit is contained in:
parent
d6213cfe1b
commit
32db7680d7
@ -31,6 +31,8 @@ extern "C"{
|
||||
# else
|
||||
# define DEPRECATED(str)
|
||||
# endif
|
||||
# else
|
||||
# define DEPRECATED(str)
|
||||
# endif
|
||||
#else
|
||||
# if __STDC_VERSION__ > 201710L
|
||||
@ -63,11 +65,12 @@ typedef enum RJP_format_flag{
|
||||
RJP_FORMAT_PRETTY = 1
|
||||
}RJP_format_flag;
|
||||
|
||||
//used with rjp_parse
|
||||
typedef enum RJP_parse_flag{
|
||||
RJP_PARSE_NONE = 0,
|
||||
RJP_PARSE_ALLOW_COMMENTS = 1,
|
||||
RJP_PARSE_ALLOW_TRAILING_COMMA = 2,
|
||||
RJP_PARSE_ALL_EXT = 3
|
||||
RJP_PARSE_ALL_EXT = RJP_PARSE_ALLOW_COMMENTS | RJP_PARSE_ALLOW_TRAILING_COMMA
|
||||
}RJP_parse_flag;
|
||||
|
||||
//type of data
|
||||
@ -130,6 +133,7 @@ RJP_string rjp_escape(const char* src);
|
||||
/***************** GENERIC OPERATIONS *******************/
|
||||
//Convert C string consisting of json data into RJP's format
|
||||
RJP_value* rjp_parse(const char* str, int flags);
|
||||
//Read json data in using a user supplied callback and convert it to RJP's format
|
||||
RJP_value* rjp_parse_cback(int flags, RJP_parse_callback* cbacks);
|
||||
char* rjp_to_json(const RJP_value* root, int pretty);
|
||||
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#define RJP_LEX_CBACK_STR_SIZE 64
|
||||
|
||||
#define rjp_lex_accept 1
|
||||
//DFA states. odd numbers are accepting states
|
||||
typedef enum RJP_lex_category{
|
||||
rjp_lex_start = 0,
|
||||
rjp_lex_obracket = 3,
|
||||
@ -69,15 +70,15 @@ typedef enum RJP_lex_category{
|
||||
}RJP_lex_category;
|
||||
|
||||
typedef struct RJP_lex_state{
|
||||
char* str;
|
||||
char* buff;
|
||||
RJP_index strl;
|
||||
RJP_index buffl;
|
||||
RJP_index buffcap;
|
||||
RJP_index buffpos;
|
||||
RJP_lex_category node;
|
||||
RJP_index length;
|
||||
RJP_index offset;
|
||||
char* str; //must hold value parser will use to create tokens. eg contents of strings
|
||||
char* buff; //holds temporary data in callback based lexer
|
||||
RJP_index strcap; //capacity of str. used in callback lexer
|
||||
RJP_index buffl; //length of buff currently in use. used in callback lexer
|
||||
RJP_index buffcap; //capacity of buff. used in callback lexer
|
||||
RJP_index buffpos; //current position in buff being read. used in callback lexer
|
||||
RJP_lex_category node; //tracks current dfa state
|
||||
RJP_index length; //length of current token which parser will utilize
|
||||
RJP_index offset; //offset in the str buffer that the parser should start from. must be 0 in callback lexer
|
||||
}RJP_lex_state;
|
||||
|
||||
void irjp_init_lex_cback_state(RJP_lex_state* state);
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
void irjp_init_lex_cback_state(RJP_lex_state* state){
|
||||
state->str = rjp_alloc(RJP_LEX_CBACK_STR_SIZE+1);
|
||||
state->str[RJP_LEX_CBACK_STR_SIZE] = 0;
|
||||
state->strl = RJP_LEX_CBACK_STR_SIZE;
|
||||
state->strcap = RJP_LEX_CBACK_STR_SIZE;
|
||||
state->buff = rjp_alloc(RJP_LEX_CBACK_BUFFER_SIZE);
|
||||
state->buffcap = RJP_LEX_CBACK_BUFFER_SIZE;
|
||||
}
|
||||
@ -292,6 +292,9 @@ static RJP_lex_category irjp_lex_char(char ch, RJP_lex_category node){
|
||||
};
|
||||
return node;
|
||||
}
|
||||
//straight forward lex. All json data in single string
|
||||
//use state->str as constant string. index into it using state->offset and
|
||||
//state->length to acquire tokens in the parser
|
||||
RJP_lex_category irjp_lex(RJP_lex_state* state){
|
||||
state->offset += state->length;
|
||||
state->length = 0;
|
||||
@ -308,26 +311,36 @@ RJP_lex_category irjp_lex(RJP_lex_state* state){
|
||||
|
||||
static void irjp_lex_resize_strbuf(RJP_lex_state* state, int newsize){
|
||||
char* newbuf = rjp_alloc(newsize+1);
|
||||
memcpy(newbuf, state->str, state->strl);
|
||||
memcpy(newbuf, state->str, state->strcap);
|
||||
newbuf[newsize] = 0;
|
||||
rjp_free(state->str);
|
||||
state->str = newbuf;
|
||||
state->strl = newsize;
|
||||
state->strcap = newsize;
|
||||
}
|
||||
//user callback based lexer. Not all json data is available at one time.
|
||||
//Tokens need saved in a secondary buffer for the parser to have access.
|
||||
//state->str is where the secondary buffer is located and state->length
|
||||
//is used to track its size. state->offset MUST be 0 for parser to get proper
|
||||
//token values
|
||||
RJP_lex_category irjp_lex_cback(RJP_lex_state* state, RJP_parse_callback* cbacks){
|
||||
state->length = 0;
|
||||
//pick up from previous invocation
|
||||
RJP_index chars_read = state->buffl;
|
||||
if(chars_read == 0){
|
||||
state->buffpos = 0;
|
||||
chars_read = cbacks->read(state->buff, state->buffcap, cbacks->data);
|
||||
state->buffl = chars_read;
|
||||
}
|
||||
|
||||
//loop until callback returns 0 new chars
|
||||
while(chars_read > 0){
|
||||
//loop over all characters in current buffer
|
||||
for(RJP_index i = 0;(i+state->buffpos) < chars_read;++i,++state->length){
|
||||
if(state->length == state->strl)
|
||||
irjp_lex_resize_strbuf(state, state->strl*2);
|
||||
if(state->length == state->strcap) //need more space to store lex token
|
||||
irjp_lex_resize_strbuf(state, state->strcap*2);
|
||||
RJP_lex_category cat = irjp_lex_char(state->buff[state->buffpos+i], state->node);
|
||||
if(cat == rjp_lex_invalid){
|
||||
//save necessary state and return previous state
|
||||
state->buffpos = i + state->buffpos;
|
||||
state->str[state->length] = 0;
|
||||
return irjp_lex_accept(state->node, state);
|
||||
@ -335,10 +348,12 @@ RJP_lex_category irjp_lex_cback(RJP_lex_state* state, RJP_parse_callback* cbacks
|
||||
state->str[state->length] = state->buff[state->buffpos+i];
|
||||
state->node = cat;
|
||||
}
|
||||
//read new values into buffer, reset buffer related state
|
||||
chars_read = cbacks->read(state->buff, state->buffcap, cbacks->data);
|
||||
state->buffpos = 0;
|
||||
state->buffl = chars_read;
|
||||
}
|
||||
//lexing cannot continue due to lack of input
|
||||
++state->buffpos;
|
||||
state->str[state->length] = 0;
|
||||
RJP_lex_category cat = state->node;
|
||||
|
||||
@ -29,6 +29,9 @@
|
||||
|
||||
|
||||
#define RJP_INITIAL_PARSE_DEPTH 16
|
||||
#define RJP_PARSE_STATUS_ERR 1
|
||||
#define RJP_PARSE_STATUS_SUC 2
|
||||
|
||||
|
||||
typedef enum RJP_parse_target{
|
||||
rjp_parse_end,
|
||||
@ -173,13 +176,10 @@ static void irjp_delete_parse_state(RJP_parse_state* state){
|
||||
state->root = NULL;
|
||||
}
|
||||
|
||||
#define RJP_PARSE_STATUS_ERR 1
|
||||
#define RJP_PARSE_STATUS_SUC 2
|
||||
|
||||
#define irjp_parse_error(str) do{ \
|
||||
DIAG_PRINT(stderr, "%s: %d:%d\n", str, state->column, state->row); \
|
||||
return RJP_PARSE_STATUS_ERR; \
|
||||
}while(0)
|
||||
static inline int irjp_parse_error(RJP_parse_state* state, const char* str){
|
||||
DIAG_PRINT(stderr, "%s: %d:%d\n", str, state->column, state->row);
|
||||
return RJP_PARSE_STATUS_ERR;
|
||||
}
|
||||
|
||||
static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state){
|
||||
if(cat == rjp_lex_line_comment || cat == rjp_lex_block_comment)
|
||||
@ -193,14 +193,14 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
|
||||
return RJP_PARSE_STATUS_SUC;
|
||||
}
|
||||
if(cat == rjp_lex_invalid)
|
||||
irjp_parse_error("Invalid token");
|
||||
return irjp_parse_error(state, "Invalid token");
|
||||
|
||||
switch(irjp_parse_stack_current(&state->target_stack)){
|
||||
|
||||
case rjp_parse_start:
|
||||
irjp_parse_stack_set(&state->target_stack, rjp_parse_end);
|
||||
if(irjp_init_value(state->root, cat, state)){
|
||||
irjp_parse_error("Expected value");
|
||||
return irjp_parse_error(state, "Expected value");
|
||||
}
|
||||
break;
|
||||
case rjp_parse_first_mem_key:
|
||||
@ -213,10 +213,10 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
|
||||
if(cat == rjp_lex_string){
|
||||
irjp_parse_stack_set(&state->target_stack, rjp_parse_key_colon);
|
||||
if(!irjp_add_value_to_object(state, state->lexstate.str+state->lexstate.offset, state->lexstate.length)){
|
||||
irjp_parse_error("Expected member key");
|
||||
return irjp_parse_error(state, "Expected member key");
|
||||
}
|
||||
}else{
|
||||
irjp_parse_error("Expected member key");
|
||||
return irjp_parse_error(state, "Expected member key");
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -229,19 +229,19 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
|
||||
case rjp_parse_arr_value:
|
||||
irjp_parse_stack_set(&state->target_stack, rjp_parse_arr_comma);
|
||||
if(!irjp_add_value_to_array(cat, state))
|
||||
irjp_parse_error("Expected value");
|
||||
return irjp_parse_error(state, "Expected value");
|
||||
}
|
||||
break;
|
||||
|
||||
case rjp_parse_key_colon:
|
||||
if(cat != rjp_lex_colon)
|
||||
irjp_parse_error("Expected member key");
|
||||
return irjp_parse_error(state, "Expected member key");
|
||||
irjp_parse_stack_set(&state->target_stack, rjp_parse_obj_value);
|
||||
break;
|
||||
case rjp_parse_obj_value:
|
||||
irjp_parse_stack_set(&state->target_stack, rjp_parse_obj_comma);
|
||||
if(irjp_init_value(state->lastadded, cat, state)){
|
||||
irjp_parse_error("Expected value");
|
||||
return irjp_parse_error(state, "Expected value");
|
||||
}
|
||||
break;
|
||||
case rjp_parse_obj_comma:
|
||||
@ -251,7 +251,7 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
|
||||
irjp_parse_stack_pop(&state->target_stack);
|
||||
state->curr = state->curr->parent;
|
||||
}else{
|
||||
irjp_parse_error("Expected comma");
|
||||
return irjp_parse_error(state, "Expected comma");
|
||||
}
|
||||
break;
|
||||
|
||||
@ -262,39 +262,44 @@ static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state
|
||||
irjp_parse_stack_pop(&state->target_stack);
|
||||
state->curr = state->curr->parent;
|
||||
}else{
|
||||
irjp_parse_error("Expected comma");
|
||||
return irjp_parse_error(state, "Expected comma");
|
||||
}
|
||||
break;
|
||||
|
||||
case rjp_parse_end:
|
||||
if(state->lexstate.str[state->lexstate.offset] != 0)
|
||||
irjp_parse_error("Excess data after end of JSON");
|
||||
return irjp_parse_error(state, "Excess data after end of JSON");
|
||||
};
|
||||
return RJP_PARSE_STATUS_SUC;
|
||||
}
|
||||
//Handle the final token returned by the lexer. rjp_lex_end is a nonaccepting state to break the
|
||||
//parse loop. it is a successful state though as it just indicates end of input.
|
||||
static int irjp_handle_final_parse_token(RJP_parse_state* state, RJP_lex_category cat){
|
||||
if(state->target_stack.position != 0)
|
||||
return irjp_parse_error(state, "Missing closing brace");
|
||||
if(cat == rjp_lex_end)
|
||||
return RJP_PARSE_STATUS_SUC;
|
||||
return irjp_parse_error(state, "Invalid Token");
|
||||
}
|
||||
|
||||
//Basic parse loop
|
||||
static int irjp_parse(RJP_parse_state* state){
|
||||
RJP_lex_category cat;
|
||||
for(cat = irjp_lex(&state->lexstate);cat & rjp_lex_accept;cat = irjp_lex(&state->lexstate),state->row += state->lexstate.length){
|
||||
if(irjp_parse_handle_lexcat(cat, state) != RJP_PARSE_STATUS_SUC)
|
||||
return RJP_PARSE_STATUS_ERR;
|
||||
}
|
||||
if(state->target_stack.position != 0)
|
||||
irjp_parse_error("Missing closing brace");
|
||||
if(cat == rjp_lex_end)
|
||||
return RJP_PARSE_STATUS_SUC;
|
||||
irjp_parse_error("Invalid Token");
|
||||
return irjp_handle_final_parse_token(state, cat);
|
||||
}
|
||||
|
||||
//Callback parse loop
|
||||
static int irjp_parse_cback(RJP_parse_state* state, RJP_parse_callback* cback){
|
||||
RJP_lex_category cat;
|
||||
for(cat = irjp_lex_cback(&state->lexstate, cback);cat & rjp_lex_accept;cat = irjp_lex_cback(&state->lexstate, cback),state->row += state->lexstate.length){
|
||||
if(irjp_parse_handle_lexcat(cat, state) != RJP_PARSE_STATUS_SUC)
|
||||
return RJP_PARSE_STATUS_ERR;
|
||||
}
|
||||
if(state->target_stack.position != 0)
|
||||
irjp_parse_error("Missing closing brace");
|
||||
if(cat == rjp_lex_end)
|
||||
return RJP_PARSE_STATUS_SUC;
|
||||
irjp_parse_error("Invalid Token");
|
||||
return irjp_handle_final_parse_token(state, cat);
|
||||
}
|
||||
RJP_value* rjp_parse(const char* str, int flags){
|
||||
RJP_parse_state state = {.allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS),
|
||||
@ -311,6 +316,8 @@ RJP_value* rjp_parse(const char* str, int flags){
|
||||
}
|
||||
}
|
||||
|
||||
//Callback based parse. Runs identical to normal parsing except sets up callback
|
||||
//lex state and calls callback lex function
|
||||
RJP_value* rjp_parse_cback(int flags, RJP_parse_callback* cback){
|
||||
RJP_parse_state state = {.allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS),
|
||||
.allow_trail_comma = (flags & RJP_PARSE_ALLOW_TRAILING_COMMA)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user