435 lines
14 KiB
C
435 lines
14 KiB
C
/**
|
|
rjp
|
|
Copyright (C) 2018-2020 rexy712
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
//TODO: Scientific notation
|
|
|
|
#include "rjp.h"
|
|
#include "rjp_internal.h"
|
|
#include "rjp_value.h"
|
|
#include "rjp_string.h"
|
|
#include "rjp_lex.h"
|
|
#include <stdlib.h> //strtod, strtol
|
|
#include <string.h> //memcpy
|
|
|
|
|
|
#define RJP_INITIAL_PARSE_DEPTH 16
|
|
|
|
typedef enum RJP_parse_status{
|
|
RJP_PARSE_STATUS_SUC,
|
|
RJP_PARSE_STATUS_ERR,
|
|
RJP_PARSE_STATUS_MISSING_VALUE,
|
|
RJP_PARSE_STATUS_MISSING_COMMA,
|
|
RJP_PARSE_STATUS_INVALID,
|
|
RJP_PARSE_STATUS_NO_ROOT_VALUE,
|
|
RJP_PARSE_STATUS_MISSING_KEY,
|
|
RJP_PARSE_STATUS_MISSING_COLON,
|
|
RJP_PARSE_STATUS_EXCESS_DATA,
|
|
RJP_PARSE_STATUS_MISSING_CLOSE_BRACE,
|
|
}RJP_parse_status;
|
|
|
|
|
|
typedef enum RJP_parse_target{
|
|
rjp_parse_end,
|
|
rjp_parse_start,
|
|
rjp_parse_first_mem_key,
|
|
rjp_parse_mem_key,
|
|
rjp_parse_arr_first_value,
|
|
rjp_parse_arr_value,
|
|
rjp_parse_arr_comma,
|
|
rjp_parse_key_colon,
|
|
rjp_parse_obj_value,
|
|
rjp_parse_obj_comma
|
|
}RJP_parse_target;
|
|
|
|
typedef struct RJP_parse_stack{
|
|
RJP_parse_target* stack;
|
|
RJP_index position;
|
|
RJP_index size;
|
|
}RJP_parse_stack;
|
|
typedef struct RJP_parse_state{
|
|
RJP_parse_stack target_stack;
|
|
RJP_value* root;
|
|
RJP_value* curr;
|
|
RJP_value* lastadded;
|
|
RJP_lex_state lexstate;
|
|
int row, column;
|
|
_Bool allow_comments;
|
|
_Bool allow_trail_comma;
|
|
}RJP_parse_state;
|
|
|
|
static void irjp_init_parse_stack(RJP_parse_stack* s){
|
|
s->size = RJP_INITIAL_PARSE_DEPTH;
|
|
s->stack = rjp_alloc(sizeof(RJP_parse_target)*s->size);
|
|
s->position = 0;
|
|
s->stack[0] = rjp_parse_start;
|
|
}
|
|
static void irjp_delete_parse_stack(RJP_parse_stack* s){
|
|
rjp_free(s->stack);
|
|
s->stack = NULL;
|
|
}
|
|
static void irjp_resize_parse_stack(RJP_parse_stack* s, RJP_index newsize){
|
|
RJP_parse_target* newstack = rjp_alloc(sizeof(RJP_parse_target) * newsize);
|
|
memcpy(newstack, s->stack, s->size*sizeof(RJP_parse_target));
|
|
rjp_free(s->stack);
|
|
s->stack = newstack;
|
|
s->size = newsize;
|
|
}
|
|
static void irjp_parse_stack_push(RJP_parse_stack* s, RJP_parse_target target){
|
|
if((s->position+1) == s->size)
|
|
irjp_resize_parse_stack(s, s->size*2);
|
|
s->stack[++s->position] = target;
|
|
}
|
|
static RJP_parse_target irjp_parse_stack_pop(RJP_parse_stack* s){
|
|
return s->stack[s->position--];
|
|
}
|
|
static RJP_parse_target irjp_parse_stack_current(RJP_parse_stack* s){
|
|
return s->stack[s->position];
|
|
}
|
|
static void irjp_parse_stack_set(RJP_parse_stack* s, RJP_parse_target target){
|
|
s->stack[s->position] = target;
|
|
}
|
|
|
|
static int irjp_init_value(RJP_value* newval, RJP_lex_category cat, RJP_parse_state* state){
|
|
RJP_index length = state->lexstate.length;
|
|
RJP_index offset = state->lexstate.offset;
|
|
const char* str = state->lexstate.str + offset;
|
|
switch(cat){
|
|
case rjp_lex_string:;
|
|
RJP_index newlength;
|
|
newval->type = rjp_json_string;
|
|
newval->string.value = irjp_convert_string(str, length, &newlength);
|
|
if(!newval->string.value)
|
|
return 1;
|
|
newval->string.length = newlength;
|
|
break;
|
|
case rjp_lex_number:
|
|
newval->type = rjp_json_integer;
|
|
newval->integer = strtoll(str, NULL, 10);
|
|
break;
|
|
case rjp_lex_fnumber:
|
|
case rjp_lex_sci_num:
|
|
newval->type = rjp_json_dfloat;
|
|
newval->dfloat = strtod(str, NULL);
|
|
break;
|
|
case rjp_lex_true:
|
|
newval->type = rjp_json_boolean;
|
|
newval->boolean = 1;
|
|
break;
|
|
case rjp_lex_false:
|
|
newval->type = rjp_json_boolean;
|
|
newval->boolean = 0;
|
|
break;
|
|
case rjp_lex_null:
|
|
newval->type = rjp_json_null;
|
|
break;
|
|
case rjp_lex_obrace:
|
|
newval->type = rjp_json_object;
|
|
irjp_parse_stack_push(&state->target_stack, rjp_parse_first_mem_key);
|
|
state->curr = state->lastadded;
|
|
break;
|
|
case rjp_lex_obracket:
|
|
newval->type = rjp_json_array;
|
|
irjp_parse_stack_push(&state->target_stack, rjp_parse_arr_first_value);
|
|
state->curr = state->lastadded;
|
|
break;
|
|
default:
|
|
return 1;
|
|
};
|
|
return 0;
|
|
}
|
|
static RJP_value* irjp_add_value_to_array(RJP_lex_category cat, RJP_parse_state* state){
|
|
state->lastadded = rjp_new_element(state->curr);
|
|
if(irjp_init_value(state->lastadded, cat, state))
|
|
return NULL;
|
|
return state->lastadded;
|
|
}
|
|
static RJP_value* irjp_add_value_to_object(RJP_parse_state* state, const char* key, RJP_index keylen){
|
|
RJP_index newlen;
|
|
char* newkey = irjp_convert_string(key, keylen, &newlen);
|
|
if(!newlen){ //cannot have empty key
|
|
rjp_free(newkey);
|
|
return NULL;
|
|
}
|
|
return (state->lastadded = rjp_new_member_steal_key(state->curr, newkey, newlen));
|
|
}
|
|
|
|
|
|
|
|
static RJP_lex_category irjp_convert_comment(_Bool allow_comments){
|
|
if(allow_comments)
|
|
return rjp_lex_spaces;
|
|
return rjp_lex_invalid;
|
|
}
|
|
|
|
static void irjp_init_parse_state(RJP_parse_state* state, const char* str){
|
|
state->column = 1;
|
|
state->row = 1;
|
|
|
|
irjp_init_parse_stack(&state->target_stack);
|
|
state->lexstate.str = (char*)str;
|
|
state->root = state->curr = state->lastadded = rjp_calloc(1, sizeof(RJP_value));
|
|
}
|
|
static void irjp_delete_parse_state(RJP_parse_state* state){
|
|
irjp_delete_parse_stack(&state->target_stack);
|
|
irjp_delete_lex_state(&state->lexstate);
|
|
}
|
|
static void irjp_delete_parse_state_no_preserve_root(RJP_parse_state* state){
|
|
irjp_delete_parse_state(state);
|
|
rjp_free_value(state->root);
|
|
state->root = NULL;
|
|
}
|
|
|
|
static int irjp_parse_handle_lexcat(RJP_lex_category cat, RJP_parse_state* state){
|
|
if(cat == rjp_lex_line_comment || cat == rjp_lex_block_comment)
|
|
cat = irjp_convert_comment(state->allow_comments);
|
|
|
|
if(cat == rjp_lex_spaces)
|
|
return RJP_PARSE_STATUS_SUC;
|
|
if(cat == rjp_lex_newlines){
|
|
state->row = 1;
|
|
++(state->column);
|
|
return RJP_PARSE_STATUS_SUC;
|
|
}
|
|
if(cat == rjp_lex_invalid)
|
|
return RJP_PARSE_STATUS_INVALID;
|
|
|
|
switch(irjp_parse_stack_current(&state->target_stack)){
|
|
|
|
case rjp_parse_start:
|
|
irjp_parse_stack_set(&state->target_stack, rjp_parse_end);
|
|
if(irjp_init_value(state->root, cat, state)){
|
|
return RJP_PARSE_STATUS_NO_ROOT_VALUE;
|
|
}
|
|
break;
|
|
case rjp_parse_first_mem_key:
|
|
if(cat == rjp_lex_cbrace){
|
|
irjp_parse_stack_pop(&state->target_stack);
|
|
state->curr = state->curr->parent;
|
|
}else{
|
|
//fallthrough
|
|
case rjp_parse_mem_key:
|
|
if(cat == rjp_lex_string){
|
|
irjp_parse_stack_set(&state->target_stack, rjp_parse_key_colon);
|
|
if(!irjp_add_value_to_object(state, state->lexstate.str+state->lexstate.offset, state->lexstate.length)){
|
|
return RJP_PARSE_STATUS_MISSING_KEY;
|
|
}
|
|
}else{
|
|
return RJP_PARSE_STATUS_MISSING_KEY;
|
|
}
|
|
}
|
|
break;
|
|
case rjp_parse_arr_first_value:
|
|
if(cat == rjp_lex_cbracket){
|
|
irjp_parse_stack_pop(&state->target_stack);
|
|
state->curr = state->curr->parent;
|
|
}else{
|
|
//fallthrough
|
|
case rjp_parse_arr_value:
|
|
irjp_parse_stack_set(&state->target_stack, rjp_parse_arr_comma);
|
|
if(!irjp_add_value_to_array(cat, state))
|
|
return RJP_PARSE_STATUS_MISSING_VALUE;
|
|
}
|
|
break;
|
|
|
|
case rjp_parse_key_colon:
|
|
if(cat != rjp_lex_colon)
|
|
return RJP_PARSE_STATUS_MISSING_COLON;
|
|
irjp_parse_stack_set(&state->target_stack, rjp_parse_obj_value);
|
|
break;
|
|
case rjp_parse_obj_value:
|
|
irjp_parse_stack_set(&state->target_stack, rjp_parse_obj_comma);
|
|
if(irjp_init_value(state->lastadded, cat, state)){
|
|
return RJP_PARSE_STATUS_MISSING_VALUE;
|
|
}
|
|
break;
|
|
case rjp_parse_obj_comma:
|
|
if(cat == rjp_lex_comma){
|
|
irjp_parse_stack_set(&state->target_stack, state->allow_trail_comma ? rjp_parse_first_mem_key : rjp_parse_mem_key);
|
|
}else if(cat == rjp_lex_cbrace){
|
|
irjp_parse_stack_pop(&state->target_stack);
|
|
state->curr = state->curr->parent;
|
|
}else{
|
|
return RJP_PARSE_STATUS_MISSING_COMMA;
|
|
}
|
|
break;
|
|
|
|
case rjp_parse_arr_comma:
|
|
if(cat == rjp_lex_comma){
|
|
irjp_parse_stack_set(&state->target_stack, state->allow_trail_comma ? rjp_parse_arr_first_value : rjp_parse_arr_value);
|
|
}else if(cat == rjp_lex_cbracket){
|
|
irjp_parse_stack_pop(&state->target_stack);
|
|
state->curr = state->curr->parent;
|
|
}else{
|
|
return RJP_PARSE_STATUS_MISSING_COMMA;
|
|
}
|
|
break;
|
|
|
|
case rjp_parse_end:
|
|
if(state->lexstate.str[state->lexstate.offset] != 0)
|
|
return RJP_PARSE_STATUS_EXCESS_DATA;
|
|
};
|
|
return RJP_PARSE_STATUS_SUC;
|
|
}
|
|
//Handle the final token returned by the lexer. rjp_lex_end is a nonaccepting state to break the
|
|
//parse loop. it is a successful state though as it just indicates end of input.
|
|
static int irjp_handle_final_parse_token(RJP_parse_state* state, RJP_lex_category cat){
|
|
if(state->target_stack.position != 0)
|
|
return RJP_PARSE_STATUS_MISSING_CLOSE_BRACE;
|
|
if(cat == rjp_lex_end)
|
|
return RJP_PARSE_STATUS_SUC;
|
|
return RJP_PARSE_STATUS_INVALID;
|
|
}
|
|
|
|
//Basic parse loop
|
|
static int irjp_parse(RJP_parse_state* state){
|
|
RJP_lex_category cat;
|
|
RJP_parse_status status;
|
|
for(cat = irjp_lex(&state->lexstate);cat & rjp_lex_accept;cat = irjp_lex(&state->lexstate),state->row += state->lexstate.length){
|
|
if((status = irjp_parse_handle_lexcat(cat, state)) != RJP_PARSE_STATUS_SUC)
|
|
return status;
|
|
}
|
|
return irjp_handle_final_parse_token(state, cat);
|
|
}
|
|
|
|
//Callback parse loop
|
|
static int irjp_parse_cback(RJP_parse_state* state, RJP_parse_callback* cback){
|
|
RJP_lex_category cat;
|
|
RJP_parse_status status;
|
|
for(cat = irjp_lex_cback(&state->lexstate, cback);cat & rjp_lex_accept;cat = irjp_lex_cback(&state->lexstate, cback),state->row += state->lexstate.length){
|
|
if((status = irjp_parse_handle_lexcat(cat, state)) != RJP_PARSE_STATUS_SUC)
|
|
return status;
|
|
}
|
|
return irjp_handle_final_parse_token(state, cat);
|
|
}
|
|
|
|
char* rjp_parse_error_to_string(const RJP_parse_error* err){
|
|
const RJP_parse_state* state = (const RJP_parse_state*)err->parsestate;
|
|
RJP_parse_status status = err->errcode;
|
|
char* buffer = NULL;
|
|
const char* format = NULL;
|
|
switch(status){
|
|
case RJP_PARSE_STATUS_MISSING_VALUE:
|
|
format = "Expected value before '%.*s'";
|
|
buffer = rjp_alloc(snprintf(NULL, 0, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset)) + 1);
|
|
sprintf(buffer, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset));
|
|
break;
|
|
case RJP_PARSE_STATUS_MISSING_COMMA:
|
|
format = "Expected comma before '%.*s'";
|
|
buffer = rjp_alloc(snprintf(NULL, 0, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset)) + 1);
|
|
sprintf(buffer, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset));
|
|
break;
|
|
case RJP_PARSE_STATUS_INVALID:
|
|
format = "Invalid lex token '%.*s'";
|
|
buffer = rjp_alloc(snprintf(NULL, 0, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset)) + 1);
|
|
sprintf(buffer, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset));
|
|
break;
|
|
case RJP_PARSE_STATUS_NO_ROOT_VALUE:
|
|
format = "Missing root JSON value";
|
|
buffer = rjp_alloc(snprintf(NULL, 0, "%s", format) + 1);
|
|
sprintf(buffer, "%s", format);
|
|
break;
|
|
case RJP_PARSE_STATUS_MISSING_KEY:
|
|
format = "Expected key before '%.*s'";
|
|
buffer = rjp_alloc(snprintf(NULL, 0, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset)) + 1);
|
|
sprintf(buffer, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset));
|
|
break;
|
|
case RJP_PARSE_STATUS_MISSING_COLON:
|
|
format = "Expected colon before '%.*s'";
|
|
buffer = rjp_alloc(snprintf(NULL, 0, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset)) + 1);
|
|
sprintf(buffer, format, (int)state->lexstate.length, (state->lexstate.str + state->lexstate.offset));
|
|
break;
|
|
case RJP_PARSE_STATUS_EXCESS_DATA:
|
|
format = "Excess data after JSON";
|
|
buffer = rjp_alloc(snprintf(NULL, 0, "%s", format) + 1);
|
|
sprintf(buffer, "%s", format);
|
|
break;
|
|
case RJP_PARSE_STATUS_MISSING_CLOSE_BRACE:
|
|
format = "Missing closing brace";
|
|
buffer = rjp_alloc(snprintf(NULL, 0, "%s", format) + 1);
|
|
sprintf(buffer, "%s", format);
|
|
break;
|
|
default:
|
|
break;
|
|
};
|
|
return buffer;
|
|
}
|
|
void rjp_delete_parse_error(RJP_parse_error* err){
|
|
irjp_delete_parse_state_no_preserve_root((RJP_parse_state*)err->parsestate);
|
|
rjp_free(err->parsestate);
|
|
}
|
|
|
|
RJP_value* rjp_simple_parse(const char* str){
|
|
return rjp_parse(str, RJP_PARSE_NONE, NULL);
|
|
}
|
|
|
|
RJP_value* rjp_parse(const char* str, int flags, RJP_parse_error* err){
|
|
RJP_parse_state* state = rjp_calloc(sizeof(RJP_parse_state), 1);
|
|
state->allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS);
|
|
state->allow_trail_comma = (flags & RJP_PARSE_ALLOW_TRAILING_COMMA);
|
|
|
|
irjp_init_parse_state(state, str);
|
|
irjp_init_lex_state(&state->lexstate);
|
|
int status = irjp_parse(state);
|
|
if(status == RJP_PARSE_STATUS_SUC){
|
|
irjp_delete_parse_state(state);
|
|
RJP_value* root = state->root;
|
|
rjp_free(state);
|
|
return root;
|
|
}else{
|
|
if(err){
|
|
err->parsestate = state;
|
|
err->errcode = status;
|
|
err->row = state->column;
|
|
err->column = state->row;
|
|
}else{
|
|
irjp_delete_parse_state_no_preserve_root(state);
|
|
rjp_free(state);
|
|
}
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
//Callback based parse. Runs identical to normal parsing except sets up callback
|
|
//lex state and calls callback lex function
|
|
RJP_value* rjp_parse_cback(int flags, RJP_parse_callback* cback, RJP_parse_error* err){
|
|
RJP_parse_state* state = rjp_calloc(sizeof(RJP_parse_state), 1);
|
|
state->allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS);
|
|
state->allow_trail_comma = (flags & RJP_PARSE_ALLOW_TRAILING_COMMA);
|
|
|
|
irjp_init_parse_state(state, NULL);
|
|
irjp_init_lex_cback_state(&state->lexstate);
|
|
int status = irjp_parse_cback(state, cback);
|
|
if(status == RJP_PARSE_STATUS_SUC){
|
|
irjp_delete_parse_state(state);
|
|
RJP_value* root = state->root;
|
|
rjp_free(state);
|
|
return root;
|
|
}else{
|
|
if(err){
|
|
err->parsestate = state;
|
|
err->errcode = status;
|
|
err->row = state->column;
|
|
err->column = state->row;
|
|
}else{
|
|
irjp_delete_parse_state_no_preserve_root(state);
|
|
rjp_free(state);
|
|
}
|
|
return NULL;
|
|
}
|
|
}
|