Working on enabling chunked reading
This commit is contained in:
parent
4577836f8e
commit
dc0c003785
3
TODO
Normal file
3
TODO
Normal file
@ -0,0 +1,3 @@
|
||||
Change string handling to work with chunked reading
|
||||
Change numeral handling to work with chunked reading
|
||||
handle scientific notation
|
||||
@ -84,6 +84,7 @@ typedef struct RJP_search_res{
|
||||
|
||||
//Convert C string consisting of json data into RJP's format
|
||||
RJP_value* rjp_parse(const char* str);
|
||||
RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk);
|
||||
|
||||
//Initialize a root RJP_value to NULL
|
||||
RJP_value* rjp_init_json(void);
|
||||
|
||||
531
src/input.c
531
src/input.c
@ -24,14 +24,17 @@
|
||||
#include "memory.h"
|
||||
#include <stdlib.h> //strtod, strtol
|
||||
#include <stdio.h> //fprintf, stderr
|
||||
#include <string.h> //memset
|
||||
|
||||
//types of searches in the text
|
||||
typedef enum json_search_target{
|
||||
json_key,
|
||||
json_colon,
|
||||
json_comma,
|
||||
json_value,
|
||||
json_none
|
||||
json_target_key,
|
||||
json_target_colon,
|
||||
json_target_comma,
|
||||
json_target_value,
|
||||
json_target_string,
|
||||
json_target_numeral,
|
||||
json_target_none
|
||||
}json_search_target;
|
||||
|
||||
static RJP_value* _rjp__add_value(RJP_value* curr, RJP_value new_val){
|
||||
@ -49,234 +52,324 @@ static RJP_value* _rjp__add_value(RJP_value* curr, RJP_value new_val){
|
||||
curr->object.last->value = new_val;
|
||||
return &curr->object.last->value;
|
||||
}
|
||||
#define syntax_error(msg, row, column)\
|
||||
do{DIAG_PRINT(stderr, "Syntax error! %s (%i:%i)\n", msg, row, column);rjp_free_value(root);return NULL;}while(0)
|
||||
|
||||
#define MAX_DEPTH 16
|
||||
RJP_value* rjp_parse(const char* str){
|
||||
RJP_value* root = 0;
|
||||
RJP_value* curr = 0;
|
||||
int row = 1, column = 0;
|
||||
int in_line_comment = 0;
|
||||
int in_block_comment = 0;
|
||||
|
||||
//keep track of where we are in a given subobject
|
||||
int state_stack[MAX_DEPTH] = {0},*top = state_stack;
|
||||
typedef struct RJP_string_state{
|
||||
int escaped;
|
||||
int in_utf_sequence;
|
||||
char* buffer; //store partial string here only when chunked reading and chunk ends mid string
|
||||
}RJP_string_state;
|
||||
|
||||
typedef struct RJP_numeral_state{
|
||||
int numlen;
|
||||
char* buffer; //store partial number string here only when chunked reading and chunk ends mid number
|
||||
}RJP_numeral_state;
|
||||
|
||||
typedef struct RJP_parse_state{
|
||||
RJP_value* root;
|
||||
RJP_value* curr;
|
||||
union{
|
||||
RJP_string_state str_state;
|
||||
RJP_numeral_state num_state;
|
||||
};
|
||||
int row, column;
|
||||
int in_line_comment;
|
||||
int in_block_comment;
|
||||
int target_stack[MAX_DEPTH];
|
||||
int* target;
|
||||
}RJP_parse_state;
|
||||
|
||||
void _rjp__init_parse_state(RJP_parse_state* state){
|
||||
state->root = NULL;
|
||||
state->curr = NULL;
|
||||
state->row = state->column = 0;
|
||||
state->in_line_comment = 0;
|
||||
state->in_block_comment = 0;
|
||||
memset(state->target_stack, 0, MAX_DEPTH*sizeof(int));
|
||||
state->target = state->target_stack;
|
||||
}
|
||||
|
||||
static void syntax_error(const char* msg, RJP_parse_state* state){
|
||||
DIAG_PRINT(stderr, "Syntax error! %s (%i:%i)\n", msg, state->row, state->column);
|
||||
rjp_free_value(state->root);
|
||||
}
|
||||
|
||||
//Return number of characters handled while processing comment
|
||||
int _rjp__handle_comment(const char* str, RJP_parse_state* state){
|
||||
char c = *str;
|
||||
if(state->in_line_comment){
|
||||
if(c == '\n')
|
||||
state->in_line_comment = 0;
|
||||
return 1;
|
||||
}else if(state->in_block_comment){
|
||||
if(c == '*' && *(str+1) == '/'){
|
||||
state->in_block_comment = 0;
|
||||
return 2;
|
||||
}
|
||||
return 1;
|
||||
}else if(c == '/' && *(str+1) == '/'){
|
||||
state->in_block_comment = 1;
|
||||
return 2;
|
||||
}else if(c == '/' && *(str+1) == '/'){
|
||||
state->in_line_comment = 1;
|
||||
return 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
int _rjp__handle_key(const char* str, RJP_parse_state* state){
|
||||
char c = *str;
|
||||
//start of key
|
||||
if(c == '"'){
|
||||
if(state->curr == NULL){
|
||||
syntax_error("Key found outside of object definition!", state);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int keylen;
|
||||
int inclen;
|
||||
char* new_string = _rjp__parse_string(state->root, str+1, &inclen, &keylen, &state->row, &state->column);
|
||||
if(!new_string){
|
||||
if(!keylen)
|
||||
syntax_error("Cannot have empty key name!", state);
|
||||
return -1;
|
||||
}
|
||||
_rjp__add_member_no_alloc(&(state->curr->object), new_string, keylen);
|
||||
*state->target = json_target_colon;
|
||||
return inclen+2;
|
||||
//end of this object (object is empty)
|
||||
}else if(c == '}'){
|
||||
state->curr = state->curr->parent;
|
||||
if(state->target != state->target_stack)
|
||||
--state->target;
|
||||
return 1;
|
||||
|
||||
//unrecognized character
|
||||
}else if(!_rjp__is_whitespace(c)){
|
||||
syntax_error("Unexpected character, expected '\"'!", state);
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int _rjp__handle_colon(const char* str, RJP_parse_state* state){
|
||||
char c = *str;
|
||||
//colon after a key
|
||||
if(c == ':'){
|
||||
*state->target = json_target_value;
|
||||
//unrecognized character
|
||||
}else if(!_rjp__is_whitespace(c)){
|
||||
syntax_error( "Unexpected character, expected ':'!", state);
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
int _rjp__handle_comma(const char* str, RJP_parse_state* state){
|
||||
char c = *str;
|
||||
//comma separating keys in an object or values in an array
|
||||
if(c == ','){
|
||||
*state->target = (state->curr->type == json_array ? json_target_value : json_target_key);
|
||||
|
||||
//end of object
|
||||
}else if(c == '}'){
|
||||
if(state->curr->type == json_array){
|
||||
syntax_error("Unexpected end of object within array!", state);
|
||||
return -1;
|
||||
}
|
||||
state->curr = state->curr->parent;
|
||||
if(state->target != state->target_stack)
|
||||
--state->target;
|
||||
//end of array
|
||||
}else if(c == ']' && state->curr->type == json_array){
|
||||
state->curr = state->curr->parent;
|
||||
//unrecognized character
|
||||
}else if(!_rjp__is_whitespace(c)){
|
||||
syntax_error("Unexpected character, expected ','!", state);
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int _rjp__handle_value(const char* str, RJP_parse_state* state){
|
||||
//object
|
||||
char c = *str;
|
||||
if(c == '{'){
|
||||
if(!state->root){
|
||||
state->root = _rjp__add_value(NULL, rjp_object());
|
||||
state->curr = state->root;
|
||||
*state->target = json_target_key;
|
||||
}else{
|
||||
state->curr = _rjp__add_value(state->curr, rjp_object());
|
||||
*state->target = json_target_comma;
|
||||
++state->target;
|
||||
*state->target = json_target_key;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
else if(c == '['){
|
||||
if(!state->root){
|
||||
state->root = _rjp__add_value(NULL, rjp_array());
|
||||
state->curr = state->root;
|
||||
|
||||
}else{
|
||||
state->curr = _rjp__add_value(state->curr, rjp_array());
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
else if(c == ']' && state->curr->type == json_array){ //empty array
|
||||
*state->target = json_target_comma;
|
||||
state->curr = state->curr->parent;
|
||||
return 1;
|
||||
}
|
||||
//strings
|
||||
else if(c == '"'){
|
||||
int vallen, inclen;
|
||||
char* new_string = _rjp__parse_string(state->root, str+1, &inclen, &vallen, &state->row, &state->column);
|
||||
if(!new_string){
|
||||
if(vallen == 0){
|
||||
new_string = rjp_calloc(1, 1);
|
||||
}else{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
_rjp__add_value(state->curr, rjp_string(new_string, vallen));
|
||||
*state->target = json_target_comma;
|
||||
return inclen+2;
|
||||
}
|
||||
//numbers
|
||||
else if((c >= '0' && c <= '9') || c == '-'){
|
||||
if(!state->curr)
|
||||
*state->target = json_target_none;
|
||||
else
|
||||
*state->target = json_target_comma;
|
||||
int numlen;
|
||||
int floating = 0; //is an int or a double
|
||||
for(numlen = 1;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen);
|
||||
if(*(str+numlen) == '.'){ //if we have a decimal, make it a double and continue parsing as a number
|
||||
int i = ++numlen;
|
||||
for(;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen);
|
||||
if(i == numlen){ //no number after decimal
|
||||
syntax_error("Missing numerals after decimal place!", state);
|
||||
return -1;
|
||||
}
|
||||
floating = 1;
|
||||
}
|
||||
if(*(str+numlen) == '\0' && state->curr){ //hit EOF early
|
||||
syntax_error("Unexpected EOF before end of object!", state);
|
||||
return -1;
|
||||
}
|
||||
if(c == '-' && numlen == 1){ //only have a '-' with no numbers
|
||||
syntax_error("Missing numerals after '-' sign!", state);
|
||||
return -1;
|
||||
}
|
||||
if(floating){
|
||||
if(!state->root){
|
||||
state->root = state->curr = _rjp__add_value(NULL, rjp_dfloat(strtod(str, NULL)));
|
||||
}else{
|
||||
_rjp__add_value(state->curr, rjp_dfloat(strtod(str, NULL)));
|
||||
}
|
||||
}else{
|
||||
if(!state->root){
|
||||
state->root = state->curr = _rjp__add_value(NULL, rjp_integer(strtol(str, NULL, 10)));
|
||||
}else{
|
||||
_rjp__add_value(state->curr, rjp_integer(strtol(str, NULL, 10)));
|
||||
}
|
||||
}
|
||||
state->column += numlen;
|
||||
return numlen;
|
||||
}
|
||||
//booleans and null
|
||||
else if(!strncmp(str, "true", 4)){
|
||||
if(!state->curr){
|
||||
*state->target = json_target_none;
|
||||
state->root = state->curr = _rjp__add_value(state->curr, rjp_boolean(1));
|
||||
}else{
|
||||
*state->target = json_target_comma;
|
||||
_rjp__add_value(state->curr, rjp_boolean(1));
|
||||
}
|
||||
state->column += 3;
|
||||
return 4;
|
||||
}else if(!strncmp(str, "false", 5)){
|
||||
if(!state->curr){
|
||||
*state->target = json_target_none;
|
||||
state->root = state->curr = _rjp__add_value(state->curr, rjp_boolean(0));
|
||||
}else{
|
||||
*state->target = json_target_comma;
|
||||
_rjp__add_value(state->curr, rjp_boolean(0));
|
||||
}
|
||||
state->column += 4;
|
||||
return 5;
|
||||
}else if(!strncmp(str, "null", 4)){
|
||||
if(!state->curr){
|
||||
*state->target = json_target_none;
|
||||
state->root = state->curr = _rjp__add_value(state->curr, rjp_null());
|
||||
}else{
|
||||
*state->target = json_target_comma;
|
||||
_rjp__add_value(state->curr, rjp_null());
|
||||
}
|
||||
state->column += 3;
|
||||
return 4;
|
||||
}
|
||||
//unrecognized character
|
||||
else if(!_rjp__is_whitespace(c)){
|
||||
syntax_error("Unexpected character!", state);
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
RJP_value* rjp_parse(const char* str){
|
||||
RJP_parse_state state;
|
||||
_rjp__init_parse_state(&state);
|
||||
|
||||
//initially search for the root object
|
||||
*top = json_value;
|
||||
*state.target = json_target_value;
|
||||
|
||||
for(;*str != '\0';++str){
|
||||
int inc = 0;
|
||||
for(;*str != '\0';str += inc){
|
||||
char c = *str;
|
||||
|
||||
//keep track of position in input file
|
||||
if(c == '\n'){
|
||||
++row;
|
||||
column = 0;
|
||||
++state.row;
|
||||
state.column = 0;
|
||||
}else{
|
||||
++column;
|
||||
++state.column;
|
||||
}
|
||||
|
||||
//Handle comments
|
||||
if(in_line_comment){
|
||||
if(c == '\n')
|
||||
in_line_comment = 0;
|
||||
}
|
||||
else if(in_block_comment){
|
||||
if(c == '*' && *(str+1) == '/'){
|
||||
in_block_comment = 0;
|
||||
++str;
|
||||
}
|
||||
}
|
||||
else if(c == '/' && *(str+1) == '/'){
|
||||
in_line_comment = 1;
|
||||
++str;
|
||||
}
|
||||
else if(c == '/' && *(str+1) == '*'){
|
||||
in_block_comment = 1;
|
||||
++str;
|
||||
if((inc = _rjp__handle_comment(str, &state))){
|
||||
continue;
|
||||
}
|
||||
|
||||
else if(*top == json_key){
|
||||
//start of key
|
||||
if(c == '"'){
|
||||
if(curr == NULL)
|
||||
syntax_error("Key found outside of object definition!", row, column);
|
||||
|
||||
int keylen;
|
||||
int inclen;
|
||||
char* new_string = _rjp__parse_string(root, ++str, &inclen, &keylen, &row, &column);
|
||||
if(!new_string){
|
||||
if(!keylen)
|
||||
syntax_error("Cannot have empty key name!", row, column);
|
||||
return NULL;
|
||||
}
|
||||
_rjp__add_member_no_alloc(&curr->object, new_string, keylen);
|
||||
str += inclen;
|
||||
*top = json_colon;
|
||||
//end of this object (object is empty)
|
||||
}else if(c == '}'){
|
||||
curr = curr->parent;
|
||||
if(top != state_stack)
|
||||
--top;
|
||||
|
||||
//unrecognized character
|
||||
}else if(!_rjp__is_whitespace(c)){
|
||||
syntax_error("Unexpected character, expected '\"'!", row, column);
|
||||
switch(*state.target){
|
||||
case json_target_key:
|
||||
inc = _rjp__handle_key(str, &state);
|
||||
break;
|
||||
case json_target_colon:
|
||||
inc = _rjp__handle_colon(str, &state);
|
||||
break;
|
||||
case json_target_comma:
|
||||
inc = _rjp__handle_comma(str, &state);
|
||||
break;
|
||||
case json_target_value:
|
||||
inc = _rjp__handle_value(str, &state);
|
||||
break;
|
||||
case json_target_none:
|
||||
if(!_rjp__is_whitespace(*str)){
|
||||
syntax_error("Unexpected character!", &state);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else if(*top == json_colon){
|
||||
//colon after a key
|
||||
if(c == ':'){
|
||||
*top = json_value;
|
||||
//unrecognized character
|
||||
}else if(!_rjp__is_whitespace(c)){
|
||||
syntax_error( "Unexpected character, expected ':'!", row, column);
|
||||
}
|
||||
}
|
||||
else if(*top == json_comma){
|
||||
//comma separating keys in an object or values in an array
|
||||
if(c == ','){
|
||||
*top = (curr->type == json_array ? json_value : json_key);
|
||||
|
||||
//end of object
|
||||
}else if(c == '}'){
|
||||
if(curr->type == json_array){
|
||||
syntax_error("Unexpected end of object within array!", row, column);
|
||||
}
|
||||
curr = curr->parent;
|
||||
if(top != state_stack)
|
||||
--top;
|
||||
//end of array
|
||||
}else if(c == ']' && curr->type == json_array){
|
||||
curr = curr->parent;
|
||||
//unrecognized character
|
||||
}else if(!_rjp__is_whitespace(c)){
|
||||
syntax_error("Unexpected character, expected ','!", row, column);
|
||||
}
|
||||
}
|
||||
else if(*top == json_value){
|
||||
//object
|
||||
if(c == '{'){
|
||||
if(!root){
|
||||
root = _rjp__add_value(NULL, rjp_object());
|
||||
curr = root;
|
||||
*top = json_key;
|
||||
}else{
|
||||
curr = _rjp__add_value(curr, rjp_object());
|
||||
*top = json_comma;
|
||||
++top;
|
||||
*top = json_key;
|
||||
}
|
||||
}
|
||||
else if(c == '['){
|
||||
if(!root){
|
||||
root = _rjp__add_value(NULL, rjp_array());
|
||||
curr = root;
|
||||
|
||||
}else{
|
||||
curr = _rjp__add_value(curr, rjp_array());
|
||||
}
|
||||
}
|
||||
else if(c == ']' && curr->type == json_array){ //empty array
|
||||
*top = json_comma;
|
||||
curr = curr->parent;
|
||||
}
|
||||
//strings
|
||||
else if(c == '"'){
|
||||
int vallen, inclen;
|
||||
++str;
|
||||
char* new_string = _rjp__parse_string(root, str, &inclen, &vallen, &row, &column);
|
||||
if(!new_string){
|
||||
if(vallen == 0){
|
||||
new_string = rjp_calloc(1, 1);
|
||||
}else{
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
_rjp__add_value(curr, rjp_string(new_string, vallen));
|
||||
str += inclen;
|
||||
*top = json_comma;
|
||||
}
|
||||
//numbers
|
||||
else if((c >= '0' && c <= '9') || c == '-'){
|
||||
if(!curr)
|
||||
*top = json_none;
|
||||
else
|
||||
*top = json_comma;
|
||||
int numlen;
|
||||
int floating = 0; //is an int or a double
|
||||
for(numlen = 1;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen);
|
||||
if(*(str+numlen) == '.'){ //if we have a decimal, make it a double and continue parsing as a number
|
||||
int i = ++numlen;
|
||||
for(;*(str+numlen) >= '0' && *(str+numlen) <= '9';++numlen);
|
||||
if(i == numlen){ //no number after decimal
|
||||
syntax_error("Missing numerals after decimal place!", row, column);
|
||||
}
|
||||
floating = 1;
|
||||
}
|
||||
if(*(str+numlen) == '\0' && curr){ //hit EOF early
|
||||
syntax_error("Unexpected EOF before end of object!", row, column);
|
||||
}
|
||||
if(c == '-' && numlen == 1){ //only have a '-' with no numbers
|
||||
syntax_error("Missing numerals ofter '-' sign!", row, column);
|
||||
}
|
||||
if(floating){
|
||||
if(!root){
|
||||
root = curr = _rjp__add_value(NULL, rjp_dfloat(strtod(str, NULL)));
|
||||
}else{
|
||||
_rjp__add_value(curr, rjp_dfloat(strtod(str, NULL)));
|
||||
}
|
||||
}else{
|
||||
if(!root){
|
||||
root = curr = _rjp__add_value(NULL, rjp_integer(strtol(str, NULL, 10)));
|
||||
}else{
|
||||
_rjp__add_value(curr, rjp_integer(strtol(str, NULL, 10)));
|
||||
}
|
||||
}
|
||||
str += (numlen-1);
|
||||
column += numlen;
|
||||
}
|
||||
//booleans and null
|
||||
else if(!strncmp(str, "true", 4)){
|
||||
if(!curr){
|
||||
*top = json_none;
|
||||
root = curr = _rjp__add_value(curr, rjp_boolean(1));
|
||||
}else{
|
||||
*top = json_comma;
|
||||
_rjp__add_value(curr, rjp_boolean(1));
|
||||
}
|
||||
str += 3;column += 3;
|
||||
}else if(!strncmp(str, "false", 5)){
|
||||
if(!curr){
|
||||
*top = json_none;
|
||||
root = curr = _rjp__add_value(curr, rjp_boolean(0));
|
||||
}else{
|
||||
*top = json_comma;
|
||||
_rjp__add_value(curr, rjp_boolean(0));
|
||||
}
|
||||
str += 4;column += 4;
|
||||
}else if(!strncmp(str, "null", 4)){
|
||||
if(!curr){
|
||||
*top = json_none;
|
||||
root = curr = _rjp__add_value(curr, rjp_null());
|
||||
}else{
|
||||
*top = json_comma;
|
||||
_rjp__add_value(curr, rjp_null());
|
||||
}
|
||||
str += 3;column += 3;
|
||||
}
|
||||
//unrecognized character
|
||||
else if(!_rjp__is_whitespace(c)){
|
||||
syntax_error("Unexpected character!", row, column);
|
||||
}
|
||||
}else if(*top == json_none && !_rjp__is_whitespace(c)){
|
||||
syntax_error("Unexpected character!", row, column);
|
||||
}
|
||||
inc = 1;
|
||||
break;
|
||||
default:
|
||||
inc = 1;
|
||||
break;
|
||||
};
|
||||
}
|
||||
return root;
|
||||
return state.root;
|
||||
}
|
||||
RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk){
|
||||
if(!prev_chunk){
|
||||
return rjp_parse(str);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#undef syntax_error
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user