diff --git a/TODO b/TODO new file mode 100644 index 0000000..8c9f83d --- /dev/null +++ b/TODO @@ -0,0 +1,3 @@ +Change string handling to work with chunked reading +Change numeral handling to work with chunked reading +handle scientific notation diff --git a/include/rjp.h b/include/rjp.h index 1943b72..58a115a 100644 --- a/include/rjp.h +++ b/include/rjp.h @@ -84,6 +84,7 @@ typedef struct RJP_search_res{ //Convert C string consisting of json data into RJP's format RJP_value* rjp_parse(const char* str); +RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk); //Initialize a root RJP_value to NULL RJP_value* rjp_init_json(void); diff --git a/src/input.c b/src/input.c index 2ebd604..5e50ec7 100644 --- a/src/input.c +++ b/src/input.c @@ -24,14 +24,17 @@ #include "memory.h" #include //strtod, strtol #include //fprintf, stderr +#include //memset //types of searches in the text typedef enum json_search_target{ - json_key, - json_colon, - json_comma, - json_value, - json_none + json_target_key, + json_target_colon, + json_target_comma, + json_target_value, + json_target_string, + json_target_numeral, + json_target_none }json_search_target; static RJP_value* _rjp__add_value(RJP_value* curr, RJP_value new_val){ @@ -49,234 +52,324 @@ static RJP_value* _rjp__add_value(RJP_value* curr, RJP_value new_val){ curr->object.last->value = new_val; return &curr->object.last->value; } -#define syntax_error(msg, row, column)\ - do{DIAG_PRINT(stderr, "Syntax error! %s (%i:%i)\n", msg, row, column);rjp_free_value(root);return NULL;}while(0) - #define MAX_DEPTH 16 -RJP_value* rjp_parse(const char* str){ - RJP_value* root = 0; - RJP_value* curr = 0; - int row = 1, column = 0; - int in_line_comment = 0; - int in_block_comment = 0; - //keep track of where we are in a given subobject - int state_stack[MAX_DEPTH] = {0},*top = state_stack; +typedef struct RJP_string_state{ + int escaped; + int in_utf_sequence; + char* buffer; //store partial string here only when chunked reading and chunk ends mid string +}RJP_string_state; + +typedef struct RJP_numeral_state{ + int numlen; + char* buffer; //store partial number string here only when chunked reading and chunk ends mid number +}RJP_numeral_state; + +typedef struct RJP_parse_state{ + RJP_value* root; + RJP_value* curr; + union{ + RJP_string_state str_state; + RJP_numeral_state num_state; + }; + int row, column; + int in_line_comment; + int in_block_comment; + int target_stack[MAX_DEPTH]; + int* target; +}RJP_parse_state; + +void _rjp__init_parse_state(RJP_parse_state* state){ + state->root = NULL; + state->curr = NULL; + state->row = state->column = 0; + state->in_line_comment = 0; + state->in_block_comment = 0; + memset(state->target_stack, 0, MAX_DEPTH*sizeof(int)); + state->target = state->target_stack; +} + +static void syntax_error(const char* msg, RJP_parse_state* state){ + DIAG_PRINT(stderr, "Syntax error! %s (%i:%i)\n", msg, state->row, state->column); + rjp_free_value(state->root); +} + +//Return number of characters handled while processing comment +int _rjp__handle_comment(const char* str, RJP_parse_state* state){ + char c = *str; + if(state->in_line_comment){ + if(c == '\n') + state->in_line_comment = 0; + return 1; + }else if(state->in_block_comment){ + if(c == '*' && *(str+1) == '/'){ + state->in_block_comment = 0; + return 2; + } + return 1; + }else if(c == '/' && *(str+1) == '/'){ + state->in_block_comment = 1; + return 2; + }else if(c == '/' && *(str+1) == '/'){ + state->in_line_comment = 1; + return 2; + } + return 0; +} +int _rjp__handle_key(const char* str, RJP_parse_state* state){ + char c = *str; + //start of key + if(c == '"'){ + if(state->curr == NULL){ + syntax_error("Key found outside of object definition!", state); + return -1; + } + + int keylen; + int inclen; + char* new_string = _rjp__parse_string(state->root, str+1, &inclen, &keylen, &state->row, &state->column); + if(!new_string){ + if(!keylen) + syntax_error("Cannot have empty key name!", state); + return -1; + } + _rjp__add_member_no_alloc(&(state->curr->object), new_string, keylen); + *state->target = json_target_colon; + return inclen+2; + //end of this object (object is empty) + }else if(c == '}'){ + state->curr = state->curr->parent; + if(state->target != state->target_stack) + --state->target; + return 1; + + //unrecognized character + }else if(!_rjp__is_whitespace(c)){ + syntax_error("Unexpected character, expected '\"'!", state); + return -1; + } + return 1; +} + +int _rjp__handle_colon(const char* str, RJP_parse_state* state){ + char c = *str; + //colon after a key + if(c == ':'){ + *state->target = json_target_value; + //unrecognized character + }else if(!_rjp__is_whitespace(c)){ + syntax_error( "Unexpected character, expected ':'!", state); + return -1; + } + return 1; +} +int _rjp__handle_comma(const char* str, RJP_parse_state* state){ + char c = *str; + //comma separating keys in an object or values in an array + if(c == ','){ + *state->target = (state->curr->type == json_array ? json_target_value : json_target_key); + + //end of object + }else if(c == '}'){ + if(state->curr->type == json_array){ + syntax_error("Unexpected end of object within array!", state); + return -1; + } + state->curr = state->curr->parent; + if(state->target != state->target_stack) + --state->target; + //end of array + }else if(c == ']' && state->curr->type == json_array){ + state->curr = state->curr->parent; + //unrecognized character + }else if(!_rjp__is_whitespace(c)){ + syntax_error("Unexpected character, expected ','!", state); + return -1; + } + return 1; +} + +int _rjp__handle_value(const char* str, RJP_parse_state* state){ + //object + char c = *str; + if(c == '{'){ + if(!state->root){ + state->root = _rjp__add_value(NULL, rjp_object()); + state->curr = state->root; + *state->target = json_target_key; + }else{ + state->curr = _rjp__add_value(state->curr, rjp_object()); + *state->target = json_target_comma; + ++state->target; + *state->target = json_target_key; + } + return 1; + } + else if(c == '['){ + if(!state->root){ + state->root = _rjp__add_value(NULL, rjp_array()); + state->curr = state->root; + + }else{ + state->curr = _rjp__add_value(state->curr, rjp_array()); + } + return 1; + } + else if(c == ']' && state->curr->type == json_array){ //empty array + *state->target = json_target_comma; + state->curr = state->curr->parent; + return 1; + } + //strings + else if(c == '"'){ + int vallen, inclen; + char* new_string = _rjp__parse_string(state->root, str+1, &inclen, &vallen, &state->row, &state->column); + if(!new_string){ + if(vallen == 0){ + new_string = rjp_calloc(1, 1); + }else{ + return -1; + } + } + _rjp__add_value(state->curr, rjp_string(new_string, vallen)); + *state->target = json_target_comma; + return inclen+2; + } + //numbers + else if((c >= '0' && c <= '9') || c == '-'){ + if(!state->curr) + *state->target = json_target_none; + else + *state->target = json_target_comma; + int numlen; + int floating = 0; //is an int or a double + for(numlen = 1;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen); + if(*(str+numlen) == '.'){ //if we have a decimal, make it a double and continue parsing as a number + int i = ++numlen; + for(;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen); + if(i == numlen){ //no number after decimal + syntax_error("Missing numerals after decimal place!", state); + return -1; + } + floating = 1; + } + if(*(str+numlen) == '\0' && state->curr){ //hit EOF early + syntax_error("Unexpected EOF before end of object!", state); + return -1; + } + if(c == '-' && numlen == 1){ //only have a '-' with no numbers + syntax_error("Missing numerals after '-' sign!", state); + return -1; + } + if(floating){ + if(!state->root){ + state->root = state->curr = _rjp__add_value(NULL, rjp_dfloat(strtod(str, NULL))); + }else{ + _rjp__add_value(state->curr, rjp_dfloat(strtod(str, NULL))); + } + }else{ + if(!state->root){ + state->root = state->curr = _rjp__add_value(NULL, rjp_integer(strtol(str, NULL, 10))); + }else{ + _rjp__add_value(state->curr, rjp_integer(strtol(str, NULL, 10))); + } + } + state->column += numlen; + return numlen; + } + //booleans and null + else if(!strncmp(str, "true", 4)){ + if(!state->curr){ + *state->target = json_target_none; + state->root = state->curr = _rjp__add_value(state->curr, rjp_boolean(1)); + }else{ + *state->target = json_target_comma; + _rjp__add_value(state->curr, rjp_boolean(1)); + } + state->column += 3; + return 4; + }else if(!strncmp(str, "false", 5)){ + if(!state->curr){ + *state->target = json_target_none; + state->root = state->curr = _rjp__add_value(state->curr, rjp_boolean(0)); + }else{ + *state->target = json_target_comma; + _rjp__add_value(state->curr, rjp_boolean(0)); + } + state->column += 4; + return 5; + }else if(!strncmp(str, "null", 4)){ + if(!state->curr){ + *state->target = json_target_none; + state->root = state->curr = _rjp__add_value(state->curr, rjp_null()); + }else{ + *state->target = json_target_comma; + _rjp__add_value(state->curr, rjp_null()); + } + state->column += 3; + return 4; + } + //unrecognized character + else if(!_rjp__is_whitespace(c)){ + syntax_error("Unexpected character!", state); + return -1; + } + return 1; +} + +RJP_value* rjp_parse(const char* str){ + RJP_parse_state state; + _rjp__init_parse_state(&state); //initially search for the root object - *top = json_value; + *state.target = json_target_value; - for(;*str != '\0';++str){ + int inc = 0; + for(;*str != '\0';str += inc){ char c = *str; //keep track of position in input file if(c == '\n'){ - ++row; - column = 0; + ++state.row; + state.column = 0; }else{ - ++column; + ++state.column; } - //Handle comments - if(in_line_comment){ - if(c == '\n') - in_line_comment = 0; - } - else if(in_block_comment){ - if(c == '*' && *(str+1) == '/'){ - in_block_comment = 0; - ++str; - } - } - else if(c == '/' && *(str+1) == '/'){ - in_line_comment = 1; - ++str; - } - else if(c == '/' && *(str+1) == '*'){ - in_block_comment = 1; - ++str; + if((inc = _rjp__handle_comment(str, &state))){ + continue; } - else if(*top == json_key){ - //start of key - if(c == '"'){ - if(curr == NULL) - syntax_error("Key found outside of object definition!", row, column); - - int keylen; - int inclen; - char* new_string = _rjp__parse_string(root, ++str, &inclen, &keylen, &row, &column); - if(!new_string){ - if(!keylen) - syntax_error("Cannot have empty key name!", row, column); - return NULL; - } - _rjp__add_member_no_alloc(&curr->object, new_string, keylen); - str += inclen; - *top = json_colon; - //end of this object (object is empty) - }else if(c == '}'){ - curr = curr->parent; - if(top != state_stack) - --top; - - //unrecognized character - }else if(!_rjp__is_whitespace(c)){ - syntax_error("Unexpected character, expected '\"'!", row, column); + switch(*state.target){ + case json_target_key: + inc = _rjp__handle_key(str, &state); + break; + case json_target_colon: + inc = _rjp__handle_colon(str, &state); + break; + case json_target_comma: + inc = _rjp__handle_comma(str, &state); + break; + case json_target_value: + inc = _rjp__handle_value(str, &state); + break; + case json_target_none: + if(!_rjp__is_whitespace(*str)){ + syntax_error("Unexpected character!", &state); + return NULL; } - } - else if(*top == json_colon){ - //colon after a key - if(c == ':'){ - *top = json_value; - //unrecognized character - }else if(!_rjp__is_whitespace(c)){ - syntax_error( "Unexpected character, expected ':'!", row, column); - } - } - else if(*top == json_comma){ - //comma separating keys in an object or values in an array - if(c == ','){ - *top = (curr->type == json_array ? json_value : json_key); - - //end of object - }else if(c == '}'){ - if(curr->type == json_array){ - syntax_error("Unexpected end of object within array!", row, column); - } - curr = curr->parent; - if(top != state_stack) - --top; - //end of array - }else if(c == ']' && curr->type == json_array){ - curr = curr->parent; - //unrecognized character - }else if(!_rjp__is_whitespace(c)){ - syntax_error("Unexpected character, expected ','!", row, column); - } - } - else if(*top == json_value){ - //object - if(c == '{'){ - if(!root){ - root = _rjp__add_value(NULL, rjp_object()); - curr = root; - *top = json_key; - }else{ - curr = _rjp__add_value(curr, rjp_object()); - *top = json_comma; - ++top; - *top = json_key; - } - } - else if(c == '['){ - if(!root){ - root = _rjp__add_value(NULL, rjp_array()); - curr = root; - - }else{ - curr = _rjp__add_value(curr, rjp_array()); - } - } - else if(c == ']' && curr->type == json_array){ //empty array - *top = json_comma; - curr = curr->parent; - } - //strings - else if(c == '"'){ - int vallen, inclen; - ++str; - char* new_string = _rjp__parse_string(root, str, &inclen, &vallen, &row, &column); - if(!new_string){ - if(vallen == 0){ - new_string = rjp_calloc(1, 1); - }else{ - return NULL; - } - } - _rjp__add_value(curr, rjp_string(new_string, vallen)); - str += inclen; - *top = json_comma; - } - //numbers - else if((c >= '0' && c <= '9') || c == '-'){ - if(!curr) - *top = json_none; - else - *top = json_comma; - int numlen; - int floating = 0; //is an int or a double - for(numlen = 1;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen); - if(*(str+numlen) == '.'){ //if we have a decimal, make it a double and continue parsing as a number - int i = ++numlen; - for(;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen); - if(i == numlen){ //no number after decimal - syntax_error("Missing numerals after decimal place!", row, column); - } - floating = 1; - } - if(*(str+numlen) == '\0' && curr){ //hit EOF early - syntax_error("Unexpected EOF before end of object!", row, column); - } - if(c == '-' && numlen == 1){ //only have a '-' with no numbers - syntax_error("Missing numerals ofter '-' sign!", row, column); - } - if(floating){ - if(!root){ - root = curr = _rjp__add_value(NULL, rjp_dfloat(strtod(str, NULL))); - }else{ - _rjp__add_value(curr, rjp_dfloat(strtod(str, NULL))); - } - }else{ - if(!root){ - root = curr = _rjp__add_value(NULL, rjp_integer(strtol(str, NULL, 10))); - }else{ - _rjp__add_value(curr, rjp_integer(strtol(str, NULL, 10))); - } - } - str += (numlen-1); - column += numlen; - } - //booleans and null - else if(!strncmp(str, "true", 4)){ - if(!curr){ - *top = json_none; - root = curr = _rjp__add_value(curr, rjp_boolean(1)); - }else{ - *top = json_comma; - _rjp__add_value(curr, rjp_boolean(1)); - } - str += 3;column += 3; - }else if(!strncmp(str, "false", 5)){ - if(!curr){ - *top = json_none; - root = curr = _rjp__add_value(curr, rjp_boolean(0)); - }else{ - *top = json_comma; - _rjp__add_value(curr, rjp_boolean(0)); - } - str += 4;column += 4; - }else if(!strncmp(str, "null", 4)){ - if(!curr){ - *top = json_none; - root = curr = _rjp__add_value(curr, rjp_null()); - }else{ - *top = json_comma; - _rjp__add_value(curr, rjp_null()); - } - str += 3;column += 3; - } - //unrecognized character - else if(!_rjp__is_whitespace(c)){ - syntax_error("Unexpected character!", row, column); - } - }else if(*top == json_none && !_rjp__is_whitespace(c)){ - syntax_error("Unexpected character!", row, column); - } + inc = 1; + break; + default: + inc = 1; + break; + }; } - return root; + return state.root; +} +RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk){ + if(!prev_chunk){ + return rjp_parse(str); + } + return NULL; } - -#undef syntax_error - -