Change comment/excess comma support to runtime behavior

This commit is contained in:
rexy712 2020-03-11 12:55:30 -07:00
parent e947976340
commit dd76bc9253
8 changed files with 160 additions and 184 deletions

View File

@ -12,8 +12,6 @@ set(RJP_LIBFLAGS "-lrjp")
option(ENABLE_DIAGNOSTICS "Print diagnostic messages when parsing json to help identify issues" ON)
option(ENABLE_SHARED "Build shared library" ON)
option(ENABLE_COMMENTS "Allow C++ style comments in JSON" ON)
option(ENABLE_TRAILING_COMMA "Allow a comma after the last element of an array/object" OFF)
option(ENABLE_C++ "Build C++ wrapper library" ON)
option(BUILD_TESTS "Build test programs" OFF)
option(ENABLE_PROFILING "Enable asan" OFF)
@ -27,12 +25,6 @@ else()
add_library(rjp STATIC ${SOURCE_LIST})
endif()
if(ENABLE_COMMENTS)
set(RJP_ENABLE_COMMENTS 1)
endif()
if(ENABLE_TRAILING_COMMA)
set(RJP_ENABLE_TRAILING_COMMA 1)
endif()
if(ENABLE_DIAGNOSTICS)
set(RJP_ENABLE_DIAGNOSTICS 1)
endif()

1
TODO
View File

@ -1,2 +1 @@
Implement chunked reading
Runtime setting for comma/comment support

View File

@ -23,8 +23,6 @@
#define RJP_VERSION_MINOR @rjp_VERSION_MINOR@
#define RJP_VERSION_REVISION @rjp_VERSION_REVISION@
#cmakedefine RJP_ENABLE_COMMENTS
#cmakedefine RJP_ENABLE_TRAILING_COMMA
#cmakedefine RJP_ENABLE_DIAGNOSTICS
#endif

View File

@ -84,6 +84,11 @@ typedef struct RJP_array_iterator{
RJP_value* current;
}RJP_array_iterator;
typedef enum RJP_parse_flag{
RJP_PARSE_NONE = 0,
RJP_PARSE_ALLOW_COMMENTS = 1,
RJP_PARSE_ALLOW_TRAILING_COMMA = 2
}RJP_parse_flag;
/***************** NON OBJECT OPERATIONS *******************/
@ -103,7 +108,7 @@ RJP_index rjp_escape_strlen(const char* str);
/***************** GENERIC OPERATIONS *******************/
//Convert C string consisting of json data into RJP's format
RJP_value* rjp_parse(const char* str);
RJP_value* rjp_parse(const char* str, RJP_parse_flag flags);
//RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk);
char* rjp_to_json(const RJP_value* root, int pretty);

View File

@ -28,8 +28,8 @@
namespace rjp{
string to_json(const value& val, int format = RJP_FORMAT_PRETTY);
value parse_json(const rexy::string_base& str);
value parse_json(const char* str);
value parse_json(const rexy::string_base& str, RJP_parse_flag = RJP_PARSE_NONE);
value parse_json(const char* str, RJP_parse_flag = RJP_PARSE_NONE);
namespace detail{
template<class To, class From, bool = std::is_same<std::remove_reference_t<To>,std::remove_reference_t<From>>::value>

View File

@ -26,10 +26,10 @@ namespace rjp{
s.reset(rjp_to_json(val.raw(), format));
return s;
}
value parse_json(const rexy::string_base& str){
return value(rjp_parse(str.get()), true);
value parse_json(const rexy::string_base& str, RJP_parse_flag flags){
return value(rjp_parse(str.get(), flags), true);
}
value parse_json(const char* str){
return value(rjp_parse(str), true);
value parse_json(const char* str, RJP_parse_flag flags){
return value(rjp_parse(str, flags), true);
}
}

View File

@ -88,7 +88,8 @@ typedef enum RJP_lex_category{
rjp_lexcat_scientific,
rjp_lexcat_true,
rjp_lexcat_false,
rjp_lexcat_null
rjp_lexcat_null,
rjp_lexcat_comment,
}RJP_lex_category;
typedef enum RJP_yacc_target{
@ -121,6 +122,8 @@ typedef struct RJP_yacc_state{
RJP_value* lastadded;
RJP_lex_state lexstate;
int row, column;
const _Bool allow_comments;
const _Bool allow_trail_comma;
}RJP_yacc_state;
static void irjp_init_yacc_stack(RJP_yacc_stack* s){
@ -296,7 +299,6 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){
case rjp_lex_string:
return irjp_lex_accept(rjp_lexcat_string, state);
#ifdef RJP_ENABLE_COMMENTS
//comments
case rjp_lex_slash:
if(ch == '/')
@ -307,10 +309,8 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){
state->node = rjp_lex_invalid;
break;
case rjp_lex_line_comment:
if(ch == '\n' || ch == '\r')
state->node = rjp_lex_newlines;
else if(ch == 0)
return irjp_lex_accept(rjp_lexcat_space, state);
if(ch == '\n' || ch == '\r' || ch == 0) //don't consume this character
return irjp_lex_accept(rjp_lexcat_comment, state);
break;
case rjp_lex_block_comment:
if(ch == '*')
@ -323,8 +323,7 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){
state->node = rjp_lex_block_comment;
break;
case rjp_lex_block_comment_end2:
return irjp_lex_accept(rjp_lexcat_space, state);
#endif
return irjp_lex_accept(rjp_lexcat_comment, state);
//true
case rjp_lex_t:
@ -494,11 +493,18 @@ static RJP_value* irjp_add_value_to_object(RJP_yacc_state* state, const char* ke
return NULL; \
}while(0)
static RJP_lex_category irjp_convert_comment(_Bool allow_comments){
if(allow_comments)
return rjp_lexcat_space;
return rjp_lexcat_invalid;
}
RJP_value* rjp_parse(const char* str){
RJP_yacc_state state = {0};
state.column = 1;
state.row = 1;
RJP_value* rjp_parse(const char* str, RJP_parse_flag flags){
RJP_yacc_state state = {.column = 1,
.row = 1,
.allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS),
.allow_trail_comma = (flags & RJP_PARSE_ALLOW_TRAILING_COMMA)
};
irjp_init_yacc_stack(&state.target_stack);
state.lexstate.str = str;
state.root = state.curr = state.lastadded = rjp_calloc(1, sizeof(RJP_value));
@ -506,6 +512,8 @@ RJP_value* rjp_parse(const char* str){
RJP_lex_category cat;
do{
cat = irjp_lex(&state.lexstate);
if(cat == rjp_lexcat_comment)
cat = irjp_convert_comment(state.allow_comments);
if(cat == rjp_lexcat_newline){
++state.column;
@ -524,8 +532,9 @@ RJP_value* rjp_parse(const char* str){
for(;state.lexstate.str[state.lexstate.offset+state.lexstate.length];state.row += state.lexstate.length){
cat = irjp_lex(&state.lexstate);
if(cat == rjp_lexcat_invalid)
irjp_parse_error("Invalid token");
if(cat == rjp_lexcat_comment)
cat = irjp_convert_comment(state.allow_comments);
if(cat == rjp_lexcat_space)
continue;
if(cat == rjp_lexcat_newline){
@ -533,36 +542,11 @@ RJP_value* rjp_parse(const char* str){
++state.column;
continue;
}
if(cat == rjp_lexcat_invalid)
irjp_parse_error("Invalid token");
switch(irjp_yacc_stack_current(&state.target_stack)){
#ifdef RJP_ENABLE_TRAILING_COMMA
case rjp_yacc_first_mem_key:
case rjp_yacc_mem_key:
if(cat == rjp_lexcat_cbrace){
irjp_yacc_stack_pop(&state.target_stack);
state.curr = state.curr->parent;
}else if(cat == rjp_lexcat_string){
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_key_colon);
if(!irjp_add_value_to_object(&state, state.lexstate.str+state.lexstate.offset, state.lexstate.length)){
irjp_parse_error("Expected member key");
}
}else{
irjp_parse_error("Expected member key");
}
break;
case rjp_yacc_arr_first_value:
case rjp_yacc_arr_value:
if(cat == rjp_lexcat_cbracket){
irjp_yacc_stack_pop(&state.target_stack);
state.curr = state.curr->parent;
}else{
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_comma);
if(!irjp_add_value_to_array(cat, &state))
irjp_parse_error("Expected value");
}
break;
#else
case rjp_yacc_first_mem_key:
if(cat == rjp_lexcat_cbrace){
irjp_yacc_stack_pop(&state.target_stack);
@ -592,7 +576,6 @@ RJP_value* rjp_parse(const char* str){
irjp_parse_error("Expected value");
}
break;
#endif
case rjp_yacc_key_colon:
if(cat != rjp_lexcat_colon)
@ -607,7 +590,7 @@ RJP_value* rjp_parse(const char* str){
break;
case rjp_yacc_obj_comma:
if(cat == rjp_lexcat_comma){
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_mem_key);
irjp_yacc_stack_set(&state.target_stack, state.allow_trail_comma ? rjp_yacc_first_mem_key : rjp_yacc_mem_key);
}else if(cat == rjp_lexcat_cbrace){
irjp_yacc_stack_pop(&state.target_stack);
state.curr = state.curr->parent;
@ -618,7 +601,7 @@ RJP_value* rjp_parse(const char* str){
case rjp_yacc_arr_comma:
if(cat == rjp_lexcat_comma){
irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_value);
irjp_yacc_stack_set(&state.target_stack, state.allow_trail_comma ? rjp_yacc_arr_first_value : rjp_yacc_arr_value);
}else if(cat == rjp_lexcat_cbracket){
irjp_yacc_stack_pop(&state.target_stack);
state.curr = state.curr->parent;
@ -639,7 +622,7 @@ RJP_value* rjp_parse(const char* str){
}
RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk){
if(!prev_chunk){
return rjp_parse(str);
return rjp_parse(str, RJP_PARSE_NONE);
}
return NULL;
}

View File

@ -4,9 +4,9 @@
#include <stdio.h>
int test(const char* str){
int test(const char* str, RJP_parse_flag flags){
RJP_value* res;
res = rjp_parse(str);
res = rjp_parse(str, flags);
if(res){
fprintf(stderr, "Accepted\n");
}
@ -20,113 +20,112 @@ int test(const char* str){
return failed;
}
struct parse_pair{
const char* str;
RJP_parse_flag flags;
};
int main(){
const char* should_pass_strings[] = {
"{}",
"[]",
"\"s\"",
"\"\\n\"",
"\"\\\"\"",
"\"str\\nstr\"",
"\"\\uD83D\\uDE10\"",
"true",
"false",
"null",
"5",
"-5",
"+5",
"5.5",
"-5.5",
"+5.5",
"5.5e6",
"-5.5e6",
"+5.5e6",
"5.5e+6",
"-5.5e+6",
"+5.5e+6",
"5.5e-6",
"-5.5e-6",
"+5.5e-6",
" {}",
"\n{}\n",
" { \"key\" \t:\n\n\n5 \n\t\n } ",
" {\t }\n",
"5.5 ",
"{\"key\":5}",
"{\"key\":{}}",
"{\"\\uD83D\\uDE10\":5}",
"{\"😐\":5}",
"{\"key\":{\"key\":5}}",
"{\"key\":{\"key\":5,\"key2\":6}}",
"{\"key\":{\"key\":5},\"key2\":6}",
"[5, 6, 7, 8, 9, \"10\"]",
"[[5,6],[7,8],[9,\"10\"]]",
"{\"arr\":[5,6,6]}",
"[{\"arr\":[5,6,6]}]",
"[{\"arr\":[5,6,6]}, 6]",
"[5,6,6,6,6.6]",
#ifdef RJP_ENABLE_TRAILING_COMMA
"[6,7,]",
"{\"1\":1,\"2\":2,}",
"[6,]",
"{\"1\":1,}",
#endif
#ifdef RJP_ENABLE_COMMENTS
"//comment\n{}",
"{\"key\"://comment\n5}",
"{\"key\"//comment\n:5}",
"{//\"key\":5\n}",
"5 //comment*/",
"{/*\"key\":5*/\"key\":5}",
"[5, /*comment*/6]",
#endif
struct parse_pair should_pass_strings[] = {
{"{}", RJP_PARSE_NONE},
{"[]", RJP_PARSE_NONE},
{"\"s\"", RJP_PARSE_NONE},
{"\"\\n\"", RJP_PARSE_NONE},
{"\"\\\"\"", RJP_PARSE_NONE},
{"\"str\\nstr\"", RJP_PARSE_NONE},
{"\"\\uD83D\\uDE10\"", RJP_PARSE_NONE},
{"true", RJP_PARSE_NONE},
{"false", RJP_PARSE_NONE},
{"null", RJP_PARSE_NONE},
{"5", RJP_PARSE_NONE},
{"-5", RJP_PARSE_NONE},
{"+5", RJP_PARSE_NONE},
{"5.5", RJP_PARSE_NONE},
{"-5.5", RJP_PARSE_NONE},
{"+5.5", RJP_PARSE_NONE},
{"5.5e6", RJP_PARSE_NONE},
{"-5.5e6", RJP_PARSE_NONE},
{"+5.5e6", RJP_PARSE_NONE},
{"5.5e+6", RJP_PARSE_NONE},
{"-5.5e+6", RJP_PARSE_NONE},
{"+5.5e+6", RJP_PARSE_NONE},
{"5.5e-6", RJP_PARSE_NONE},
{"-5.5e-6", RJP_PARSE_NONE},
{"+5.5e-6", RJP_PARSE_NONE},
{" {}", RJP_PARSE_NONE},
{"\n{}\n", RJP_PARSE_NONE},
{" { \"key\" \t:\n\n\n5 \n\t\n } ", RJP_PARSE_NONE},
{" {\t }\n", RJP_PARSE_NONE},
{"5.5 ", RJP_PARSE_NONE},
{"{\"key\":5}", RJP_PARSE_NONE},
{"{\"key\":{}}", RJP_PARSE_NONE},
{"{\"\\uD83D\\uDE10\":5}", RJP_PARSE_NONE},
{"{\"😐\":5}", RJP_PARSE_NONE},
{"{\"key\":{\"key\":5}}", RJP_PARSE_NONE},
{"{\"key\":{\"key\":5,\"key2\":6}}", RJP_PARSE_NONE},
{"{\"key\":{\"key\":5},\"key2\":6}", RJP_PARSE_NONE},
{"[5, 6, 7, 8, 9, \"10\"]", RJP_PARSE_NONE},
{"[[5,6],[7,8],[9,\"10\"]]", RJP_PARSE_NONE},
{"{\"arr\":[5,6,6]}", RJP_PARSE_NONE},
{"[{\"arr\":[5,6,6]}]", RJP_PARSE_NONE},
{"[{\"arr\":[5,6,6]}, 6]", RJP_PARSE_NONE},
{"[5,6,6,6,6.6]", RJP_PARSE_NONE},
{"[6,7,]", RJP_PARSE_ALLOW_TRAILING_COMMA},
{"{\"1\":1,\"2\":2,}", RJP_PARSE_ALLOW_TRAILING_COMMA},
{"[6,]", RJP_PARSE_ALLOW_TRAILING_COMMA},
{"{\"1\":1,}", RJP_PARSE_ALLOW_TRAILING_COMMA},
{"//comment\n{}", RJP_PARSE_ALLOW_COMMENTS},
{"{\"key\"://comment\n5}", RJP_PARSE_ALLOW_COMMENTS},
{"{\"key\"//comment\n:5}", RJP_PARSE_ALLOW_COMMENTS},
{"{}//comment", RJP_PARSE_ALLOW_COMMENTS},
{"{//\"key\":5\n}", RJP_PARSE_ALLOW_COMMENTS},
{"5 //comment*/", RJP_PARSE_ALLOW_COMMENTS},
{"{/*\"key\":5*/\"key\":5}", RJP_PARSE_ALLOW_COMMENTS},
{"[5, /*comment*/6]", RJP_PARSE_ALLOW_COMMENTS},
};
const int should_pass_cnt = sizeof(should_pass_strings)/sizeof(should_pass_strings[0]);
const char* should_fail_strings[] = {
"{",
"}",
"[",
"]",
"6.",
"6.6e",
"6.6e+",
"{6}",
"[\"key\":5]",
"\"string\n\"",
"[3 4]",
"\"\\uD83D\\uDE1\"",
"\"\\uD83D\\uDE1Q\"",
"\"\\uD83\\uDE10\"",
"\"\\uF83D\\uDE10\"",
"\"\\uU83D\\uDE10\"",
"{\"key\":1 \"key2\":2}",
"{\"key\" 1}",
"6, 7",
"[,]",
"{,}",
"[1, 2],",
"{\"key\nkey\":5}",
"{\"key\":\"key\n\"}",
#ifndef RJP_ENABLE_TRAILING_COMMA
"[6,7,]",
"{\"1\":1,\"2\":2,}",
"[6,]",
"{\"1\":1,}",
#endif
"{//comment\"key\":\n5}",
"{/*\"key\":*/5}",
"[5, /*6*/, 7]",
"{/*comment}",
"{//comment}",
#ifndef RJP_ENABLE_COMMENTS
"//comment\n{}",
"{\"key\"://comment\n5}",
"{\"key\"//comment\n:5}",
"{//\"key\":5\n}",
"5 //comment*/",
"{/*\"key\":5*/\"key\":5}",
"[5, /*comment*/6]",
#endif
struct parse_pair should_fail_strings[] = {
{"//comment\n{}", RJP_PARSE_NONE},
{"{", RJP_PARSE_NONE},
{"}", RJP_PARSE_NONE},
{"[", RJP_PARSE_NONE},
{"]", RJP_PARSE_NONE},
{"6.", RJP_PARSE_NONE},
{"6.6e", RJP_PARSE_NONE},
{"6.6e+", RJP_PARSE_NONE},
{"{6}", RJP_PARSE_NONE},
{"[\"key\":5]", RJP_PARSE_NONE},
{"\"string\n\"", RJP_PARSE_NONE},
{"[3 4]", RJP_PARSE_NONE},
{"\"\\uD83D\\uDE1\"", RJP_PARSE_NONE},
{"\"\\uD83D\\uDE1Q\"", RJP_PARSE_NONE},
{"\"\\uD83\\uDE10\"", RJP_PARSE_NONE},
{"\"\\uF83D\\uDE10\"", RJP_PARSE_NONE},
{"\"\\uU83D\\uDE10\"", RJP_PARSE_NONE},
{"{\"key\":1 \"key2\":2}", RJP_PARSE_NONE},
{"{\"key\" 1}", RJP_PARSE_NONE},
{"6, 7", RJP_PARSE_NONE},
{"[,]", RJP_PARSE_NONE},
{"{, RJP_PARSE_NONE}", RJP_PARSE_NONE},
{"[1, 2],", RJP_PARSE_NONE},
{"{\"key\nkey\":5}", RJP_PARSE_NONE},
{"{\"key\":\"key\n\"}", RJP_PARSE_NONE},
{"[6,7,]", RJP_PARSE_NONE},
{"{\"1\":1,\"2\":2, RJP_PARSE_NONE}", RJP_PARSE_NONE},
{"[6,]", RJP_PARSE_NONE},
{"{\"1\":1, RJP_PARSE_NONE}", RJP_PARSE_NONE},
{"{//comment\"key\":\n5}", RJP_PARSE_NONE},
{"{/*\"key\":*/5}", RJP_PARSE_NONE},
{"[5, /*6*/, 7]", RJP_PARSE_NONE},
{"{/*comment}", RJP_PARSE_NONE},
{"{//comment}", RJP_PARSE_NONE},
{"{\"key\"://comment\n5}", RJP_PARSE_NONE},
{"{\"key\"//comment\n:5}", RJP_PARSE_NONE},
{"{}//comment", RJP_PARSE_NONE},
{"{//\"key\":5\n}", RJP_PARSE_NONE},
{"5 //comment*/", RJP_PARSE_NONE},
{"{/*\"key\":5*/\"key\":5}", RJP_PARSE_NONE},
{"[5, /*comment*/6]", RJP_PARSE_NONE},
};
const int should_fail_cnt = sizeof(should_fail_strings)/sizeof(should_fail_strings[0]);
const int total_tests = should_pass_cnt + should_fail_cnt;
@ -136,20 +135,20 @@ int main(){
fprintf(stderr, "Running %d tests that should pass...\n", should_pass_cnt);
for(unsigned i = 0;i < sizeof(should_pass_strings)/sizeof(should_pass_strings[0]);++i){
fprintf(stderr, "%8d) ", i+1);
if(!test(should_pass_strings[i])){
if(!test(should_pass_strings[i].str, should_pass_strings[i].flags)){
++passed;
}else{
fprintf(stderr, "%13s%s\n", "", should_pass_strings[i]);
fprintf(stderr, "%13s%s\n", "", should_pass_strings[i].str);
}
}
fprintf(stderr, "\n");
printf("Running %d tests that should fail...\n", should_fail_cnt);
for(unsigned i = 0;i < sizeof(should_fail_strings)/sizeof(should_fail_strings[0]);++i){
fprintf(stderr, "%8d) ", i+1);
if(test(should_fail_strings[i])){
if(test(should_fail_strings[i].str, should_fail_strings[i].flags)){
++passed;
}else{
fprintf(stderr, "%13s%s\n", "", should_fail_strings[i]);
fprintf(stderr, "%13s%s\n", "", should_fail_strings[i].str);
}
}
fprintf(stderr, "\nResults: %d/%d tests passed\n", passed, total_tests);