Working on enabling chunked reading

This commit is contained in:
Rexy712 2019-06-10 17:51:45 -07:00
parent 4577836f8e
commit dc0c003785
3 changed files with 316 additions and 219 deletions

3
TODO Normal file
View File

@ -0,0 +1,3 @@
Change string handling to work with chunked reading
Change numeral handling to work with chunked reading
handle scientific notation

View File

@ -84,6 +84,7 @@ typedef struct RJP_search_res{
//Convert C string consisting of json data into RJP's format
RJP_value* rjp_parse(const char* str);
RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk);
//Initialize a root RJP_value to NULL
RJP_value* rjp_init_json(void);

View File

@ -24,14 +24,17 @@
#include "memory.h"
#include <stdlib.h> //strtod, strtol
#include <stdio.h> //fprintf, stderr
#include <string.h> //memset
//types of searches in the text
typedef enum json_search_target{
json_key,
json_colon,
json_comma,
json_value,
json_none
json_target_key,
json_target_colon,
json_target_comma,
json_target_value,
json_target_string,
json_target_numeral,
json_target_none
}json_search_target;
static RJP_value* _rjp__add_value(RJP_value* curr, RJP_value new_val){
@ -49,234 +52,324 @@ static RJP_value* _rjp__add_value(RJP_value* curr, RJP_value new_val){
curr->object.last->value = new_val;
return &curr->object.last->value;
}
#define syntax_error(msg, row, column)\
do{DIAG_PRINT(stderr, "Syntax error! %s (%i:%i)\n", msg, row, column);rjp_free_value(root);return NULL;}while(0)
#define MAX_DEPTH 16
RJP_value* rjp_parse(const char* str){
RJP_value* root = 0;
RJP_value* curr = 0;
int row = 1, column = 0;
int in_line_comment = 0;
int in_block_comment = 0;
//keep track of where we are in a given subobject
int state_stack[MAX_DEPTH] = {0},*top = state_stack;
typedef struct RJP_string_state{
int escaped;
int in_utf_sequence;
char* buffer; //store partial string here only when chunked reading and chunk ends mid string
}RJP_string_state;
typedef struct RJP_numeral_state{
int numlen;
char* buffer; //store partial number string here only when chunked reading and chunk ends mid number
}RJP_numeral_state;
typedef struct RJP_parse_state{
RJP_value* root;
RJP_value* curr;
union{
RJP_string_state str_state;
RJP_numeral_state num_state;
};
int row, column;
int in_line_comment;
int in_block_comment;
int target_stack[MAX_DEPTH];
int* target;
}RJP_parse_state;
void _rjp__init_parse_state(RJP_parse_state* state){
state->root = NULL;
state->curr = NULL;
state->row = state->column = 0;
state->in_line_comment = 0;
state->in_block_comment = 0;
memset(state->target_stack, 0, MAX_DEPTH*sizeof(int));
state->target = state->target_stack;
}
static void syntax_error(const char* msg, RJP_parse_state* state){
DIAG_PRINT(stderr, "Syntax error! %s (%i:%i)\n", msg, state->row, state->column);
rjp_free_value(state->root);
}
//Return number of characters handled while processing comment
int _rjp__handle_comment(const char* str, RJP_parse_state* state){
char c = *str;
if(state->in_line_comment){
if(c == '\n')
state->in_line_comment = 0;
return 1;
}else if(state->in_block_comment){
if(c == '*' && *(str+1) == '/'){
state->in_block_comment = 0;
return 2;
}
return 1;
}else if(c == '/' && *(str+1) == '/'){
state->in_block_comment = 1;
return 2;
}else if(c == '/' && *(str+1) == '/'){
state->in_line_comment = 1;
return 2;
}
return 0;
}
int _rjp__handle_key(const char* str, RJP_parse_state* state){
char c = *str;
//start of key
if(c == '"'){
if(state->curr == NULL){
syntax_error("Key found outside of object definition!", state);
return -1;
}
int keylen;
int inclen;
char* new_string = _rjp__parse_string(state->root, str+1, &inclen, &keylen, &state->row, &state->column);
if(!new_string){
if(!keylen)
syntax_error("Cannot have empty key name!", state);
return -1;
}
_rjp__add_member_no_alloc(&(state->curr->object), new_string, keylen);
*state->target = json_target_colon;
return inclen+2;
//end of this object (object is empty)
}else if(c == '}'){
state->curr = state->curr->parent;
if(state->target != state->target_stack)
--state->target;
return 1;
//unrecognized character
}else if(!_rjp__is_whitespace(c)){
syntax_error("Unexpected character, expected '\"'!", state);
return -1;
}
return 1;
}
int _rjp__handle_colon(const char* str, RJP_parse_state* state){
char c = *str;
//colon after a key
if(c == ':'){
*state->target = json_target_value;
//unrecognized character
}else if(!_rjp__is_whitespace(c)){
syntax_error( "Unexpected character, expected ':'!", state);
return -1;
}
return 1;
}
int _rjp__handle_comma(const char* str, RJP_parse_state* state){
char c = *str;
//comma separating keys in an object or values in an array
if(c == ','){
*state->target = (state->curr->type == json_array ? json_target_value : json_target_key);
//end of object
}else if(c == '}'){
if(state->curr->type == json_array){
syntax_error("Unexpected end of object within array!", state);
return -1;
}
state->curr = state->curr->parent;
if(state->target != state->target_stack)
--state->target;
//end of array
}else if(c == ']' && state->curr->type == json_array){
state->curr = state->curr->parent;
//unrecognized character
}else if(!_rjp__is_whitespace(c)){
syntax_error("Unexpected character, expected ','!", state);
return -1;
}
return 1;
}
int _rjp__handle_value(const char* str, RJP_parse_state* state){
//object
char c = *str;
if(c == '{'){
if(!state->root){
state->root = _rjp__add_value(NULL, rjp_object());
state->curr = state->root;
*state->target = json_target_key;
}else{
state->curr = _rjp__add_value(state->curr, rjp_object());
*state->target = json_target_comma;
++state->target;
*state->target = json_target_key;
}
return 1;
}
else if(c == '['){
if(!state->root){
state->root = _rjp__add_value(NULL, rjp_array());
state->curr = state->root;
}else{
state->curr = _rjp__add_value(state->curr, rjp_array());
}
return 1;
}
else if(c == ']' && state->curr->type == json_array){ //empty array
*state->target = json_target_comma;
state->curr = state->curr->parent;
return 1;
}
//strings
else if(c == '"'){
int vallen, inclen;
char* new_string = _rjp__parse_string(state->root, str+1, &inclen, &vallen, &state->row, &state->column);
if(!new_string){
if(vallen == 0){
new_string = rjp_calloc(1, 1);
}else{
return -1;
}
}
_rjp__add_value(state->curr, rjp_string(new_string, vallen));
*state->target = json_target_comma;
return inclen+2;
}
//numbers
else if((c >= '0' && c <= '9') || c == '-'){
if(!state->curr)
*state->target = json_target_none;
else
*state->target = json_target_comma;
int numlen;
int floating = 0; //is an int or a double
for(numlen = 1;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen);
if(*(str+numlen) == '.'){ //if we have a decimal, make it a double and continue parsing as a number
int i = ++numlen;
for(;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen);
if(i == numlen){ //no number after decimal
syntax_error("Missing numerals after decimal place!", state);
return -1;
}
floating = 1;
}
if(*(str+numlen) == '\0' && state->curr){ //hit EOF early
syntax_error("Unexpected EOF before end of object!", state);
return -1;
}
if(c == '-' && numlen == 1){ //only have a '-' with no numbers
syntax_error("Missing numerals after '-' sign!", state);
return -1;
}
if(floating){
if(!state->root){
state->root = state->curr = _rjp__add_value(NULL, rjp_dfloat(strtod(str, NULL)));
}else{
_rjp__add_value(state->curr, rjp_dfloat(strtod(str, NULL)));
}
}else{
if(!state->root){
state->root = state->curr = _rjp__add_value(NULL, rjp_integer(strtol(str, NULL, 10)));
}else{
_rjp__add_value(state->curr, rjp_integer(strtol(str, NULL, 10)));
}
}
state->column += numlen;
return numlen;
}
//booleans and null
else if(!strncmp(str, "true", 4)){
if(!state->curr){
*state->target = json_target_none;
state->root = state->curr = _rjp__add_value(state->curr, rjp_boolean(1));
}else{
*state->target = json_target_comma;
_rjp__add_value(state->curr, rjp_boolean(1));
}
state->column += 3;
return 4;
}else if(!strncmp(str, "false", 5)){
if(!state->curr){
*state->target = json_target_none;
state->root = state->curr = _rjp__add_value(state->curr, rjp_boolean(0));
}else{
*state->target = json_target_comma;
_rjp__add_value(state->curr, rjp_boolean(0));
}
state->column += 4;
return 5;
}else if(!strncmp(str, "null", 4)){
if(!state->curr){
*state->target = json_target_none;
state->root = state->curr = _rjp__add_value(state->curr, rjp_null());
}else{
*state->target = json_target_comma;
_rjp__add_value(state->curr, rjp_null());
}
state->column += 3;
return 4;
}
//unrecognized character
else if(!_rjp__is_whitespace(c)){
syntax_error("Unexpected character!", state);
return -1;
}
return 1;
}
RJP_value* rjp_parse(const char* str){
RJP_parse_state state;
_rjp__init_parse_state(&state);
//initially search for the root object
*top = json_value;
*state.target = json_target_value;
for(;*str != '\0';++str){
int inc = 0;
for(;*str != '\0';str += inc){
char c = *str;
//keep track of position in input file
if(c == '\n'){
++row;
column = 0;
++state.row;
state.column = 0;
}else{
++column;
++state.column;
}
//Handle comments
if(in_line_comment){
if(c == '\n')
in_line_comment = 0;
}
else if(in_block_comment){
if(c == '*' && *(str+1) == '/'){
in_block_comment = 0;
++str;
}
}
else if(c == '/' && *(str+1) == '/'){
in_line_comment = 1;
++str;
}
else if(c == '/' && *(str+1) == '*'){
in_block_comment = 1;
++str;
if((inc = _rjp__handle_comment(str, &state))){
continue;
}
else if(*top == json_key){
//start of key
if(c == '"'){
if(curr == NULL)
syntax_error("Key found outside of object definition!", row, column);
int keylen;
int inclen;
char* new_string = _rjp__parse_string(root, ++str, &inclen, &keylen, &row, &column);
if(!new_string){
if(!keylen)
syntax_error("Cannot have empty key name!", row, column);
return NULL;
}
_rjp__add_member_no_alloc(&curr->object, new_string, keylen);
str += inclen;
*top = json_colon;
//end of this object (object is empty)
}else if(c == '}'){
curr = curr->parent;
if(top != state_stack)
--top;
//unrecognized character
}else if(!_rjp__is_whitespace(c)){
syntax_error("Unexpected character, expected '\"'!", row, column);
switch(*state.target){
case json_target_key:
inc = _rjp__handle_key(str, &state);
break;
case json_target_colon:
inc = _rjp__handle_colon(str, &state);
break;
case json_target_comma:
inc = _rjp__handle_comma(str, &state);
break;
case json_target_value:
inc = _rjp__handle_value(str, &state);
break;
case json_target_none:
if(!_rjp__is_whitespace(*str)){
syntax_error("Unexpected character!", &state);
return NULL;
}
}
else if(*top == json_colon){
//colon after a key
if(c == ':'){
*top = json_value;
//unrecognized character
}else if(!_rjp__is_whitespace(c)){
syntax_error( "Unexpected character, expected ':'!", row, column);
}
}
else if(*top == json_comma){
//comma separating keys in an object or values in an array
if(c == ','){
*top = (curr->type == json_array ? json_value : json_key);
//end of object
}else if(c == '}'){
if(curr->type == json_array){
syntax_error("Unexpected end of object within array!", row, column);
}
curr = curr->parent;
if(top != state_stack)
--top;
//end of array
}else if(c == ']' && curr->type == json_array){
curr = curr->parent;
//unrecognized character
}else if(!_rjp__is_whitespace(c)){
syntax_error("Unexpected character, expected ','!", row, column);
}
}
else if(*top == json_value){
//object
if(c == '{'){
if(!root){
root = _rjp__add_value(NULL, rjp_object());
curr = root;
*top = json_key;
}else{
curr = _rjp__add_value(curr, rjp_object());
*top = json_comma;
++top;
*top = json_key;
}
}
else if(c == '['){
if(!root){
root = _rjp__add_value(NULL, rjp_array());
curr = root;
}else{
curr = _rjp__add_value(curr, rjp_array());
}
}
else if(c == ']' && curr->type == json_array){ //empty array
*top = json_comma;
curr = curr->parent;
}
//strings
else if(c == '"'){
int vallen, inclen;
++str;
char* new_string = _rjp__parse_string(root, str, &inclen, &vallen, &row, &column);
if(!new_string){
if(vallen == 0){
new_string = rjp_calloc(1, 1);
}else{
return NULL;
}
}
_rjp__add_value(curr, rjp_string(new_string, vallen));
str += inclen;
*top = json_comma;
}
//numbers
else if((c >= '0' && c <= '9') || c == '-'){
if(!curr)
*top = json_none;
else
*top = json_comma;
int numlen;
int floating = 0; //is an int or a double
for(numlen = 1;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen);
if(*(str+numlen) == '.'){ //if we have a decimal, make it a double and continue parsing as a number
int i = ++numlen;
for(;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen);
if(i == numlen){ //no number after decimal
syntax_error("Missing numerals after decimal place!", row, column);
}
floating = 1;
}
if(*(str+numlen) == '\0' && curr){ //hit EOF early
syntax_error("Unexpected EOF before end of object!", row, column);
}
if(c == '-' && numlen == 1){ //only have a '-' with no numbers
syntax_error("Missing numerals ofter '-' sign!", row, column);
}
if(floating){
if(!root){
root = curr = _rjp__add_value(NULL, rjp_dfloat(strtod(str, NULL)));
}else{
_rjp__add_value(curr, rjp_dfloat(strtod(str, NULL)));
}
}else{
if(!root){
root = curr = _rjp__add_value(NULL, rjp_integer(strtol(str, NULL, 10)));
}else{
_rjp__add_value(curr, rjp_integer(strtol(str, NULL, 10)));
}
}
str += (numlen-1);
column += numlen;
}
//booleans and null
else if(!strncmp(str, "true", 4)){
if(!curr){
*top = json_none;
root = curr = _rjp__add_value(curr, rjp_boolean(1));
}else{
*top = json_comma;
_rjp__add_value(curr, rjp_boolean(1));
}
str += 3;column += 3;
}else if(!strncmp(str, "false", 5)){
if(!curr){
*top = json_none;
root = curr = _rjp__add_value(curr, rjp_boolean(0));
}else{
*top = json_comma;
_rjp__add_value(curr, rjp_boolean(0));
}
str += 4;column += 4;
}else if(!strncmp(str, "null", 4)){
if(!curr){
*top = json_none;
root = curr = _rjp__add_value(curr, rjp_null());
}else{
*top = json_comma;
_rjp__add_value(curr, rjp_null());
}
str += 3;column += 3;
}
//unrecognized character
else if(!_rjp__is_whitespace(c)){
syntax_error("Unexpected character!", row, column);
}
}else if(*top == json_none && !_rjp__is_whitespace(c)){
syntax_error("Unexpected character!", row, column);
}
inc = 1;
break;
default:
inc = 1;
break;
};
}
return root;
return state.root;
}
RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk){
if(!prev_chunk){
return rjp_parse(str);
}
return NULL;
}
#undef syntax_error