diff --git a/CMakeLists.txt b/CMakeLists.txt index 100c786..650efde 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,8 +12,6 @@ set(RJP_LIBFLAGS "-lrjp") option(ENABLE_DIAGNOSTICS "Print diagnostic messages when parsing json to help identify issues" ON) option(ENABLE_SHARED "Build shared library" ON) -option(ENABLE_COMMENTS "Allow C++ style comments in JSON" ON) -option(ENABLE_TRAILING_COMMA "Allow a comma after the last element of an array/object" OFF) option(ENABLE_C++ "Build C++ wrapper library" ON) option(BUILD_TESTS "Build test programs" OFF) option(ENABLE_PROFILING "Enable asan" OFF) @@ -27,12 +25,6 @@ else() add_library(rjp STATIC ${SOURCE_LIST}) endif() -if(ENABLE_COMMENTS) - set(RJP_ENABLE_COMMENTS 1) -endif() -if(ENABLE_TRAILING_COMMA) - set(RJP_ENABLE_TRAILING_COMMA 1) -endif() if(ENABLE_DIAGNOSTICS) set(RJP_ENABLE_DIAGNOSTICS 1) endif() diff --git a/TODO b/TODO index df68a6e..1554b51 100644 --- a/TODO +++ b/TODO @@ -1,2 +1 @@ Implement chunked reading -Runtime setting for comma/comment support diff --git a/include/config.h.in b/include/config.h.in index 987c7ac..2e7e4ff 100644 --- a/include/config.h.in +++ b/include/config.h.in @@ -23,8 +23,6 @@ #define RJP_VERSION_MINOR @rjp_VERSION_MINOR@ #define RJP_VERSION_REVISION @rjp_VERSION_REVISION@ -#cmakedefine RJP_ENABLE_COMMENTS -#cmakedefine RJP_ENABLE_TRAILING_COMMA #cmakedefine RJP_ENABLE_DIAGNOSTICS #endif diff --git a/include/rjp.h b/include/rjp.h index f25fcba..0b0b4c9 100644 --- a/include/rjp.h +++ b/include/rjp.h @@ -84,6 +84,11 @@ typedef struct RJP_array_iterator{ RJP_value* current; }RJP_array_iterator; +typedef enum RJP_parse_flag{ + RJP_PARSE_NONE = 0, + RJP_PARSE_ALLOW_COMMENTS = 1, + RJP_PARSE_ALLOW_TRAILING_COMMA = 2 +}RJP_parse_flag; /***************** NON OBJECT OPERATIONS *******************/ @@ -103,7 +108,7 @@ RJP_index rjp_escape_strlen(const char* str); /***************** GENERIC OPERATIONS *******************/ //Convert C string consisting of json data into RJP's format -RJP_value* rjp_parse(const char* str); +RJP_value* rjp_parse(const char* str, RJP_parse_flag flags); //RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk); char* rjp_to_json(const RJP_value* root, int pretty); diff --git a/rjp++/include/rjp_internal.hpp b/rjp++/include/rjp_internal.hpp index ef9e1b3..b9866c6 100644 --- a/rjp++/include/rjp_internal.hpp +++ b/rjp++/include/rjp_internal.hpp @@ -28,8 +28,8 @@ namespace rjp{ string to_json(const value& val, int format = RJP_FORMAT_PRETTY); - value parse_json(const rexy::string_base& str); - value parse_json(const char* str); + value parse_json(const rexy::string_base& str, RJP_parse_flag = RJP_PARSE_NONE); + value parse_json(const char* str, RJP_parse_flag = RJP_PARSE_NONE); namespace detail{ template,std::remove_reference_t>::value> diff --git a/rjp++/src/rjp.cpp b/rjp++/src/rjp.cpp index 9be5e44..27e4e6a 100644 --- a/rjp++/src/rjp.cpp +++ b/rjp++/src/rjp.cpp @@ -26,10 +26,10 @@ namespace rjp{ s.reset(rjp_to_json(val.raw(), format)); return s; } - value parse_json(const rexy::string_base& str){ - return value(rjp_parse(str.get()), true); + value parse_json(const rexy::string_base& str, RJP_parse_flag flags){ + return value(rjp_parse(str.get(), flags), true); } - value parse_json(const char* str){ - return value(rjp_parse(str), true); + value parse_json(const char* str, RJP_parse_flag flags){ + return value(rjp_parse(str, flags), true); } } diff --git a/src/input.c b/src/input.c index 7eda5b8..72dce14 100644 --- a/src/input.c +++ b/src/input.c @@ -88,7 +88,8 @@ typedef enum RJP_lex_category{ rjp_lexcat_scientific, rjp_lexcat_true, rjp_lexcat_false, - rjp_lexcat_null + rjp_lexcat_null, + rjp_lexcat_comment, }RJP_lex_category; typedef enum RJP_yacc_target{ @@ -121,6 +122,8 @@ typedef struct RJP_yacc_state{ RJP_value* lastadded; RJP_lex_state lexstate; int row, column; + const _Bool allow_comments; + const _Bool allow_trail_comma; }RJP_yacc_state; static void irjp_init_yacc_stack(RJP_yacc_stack* s){ @@ -296,7 +299,6 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){ case rjp_lex_string: return irjp_lex_accept(rjp_lexcat_string, state); -#ifdef RJP_ENABLE_COMMENTS //comments case rjp_lex_slash: if(ch == '/') @@ -307,24 +309,21 @@ static RJP_lex_category irjp_lex(RJP_lex_state* state){ state->node = rjp_lex_invalid; break; case rjp_lex_line_comment: - if(ch == '\n' || ch == '\r') - state->node = rjp_lex_newlines; - else if(ch == 0) - return irjp_lex_accept(rjp_lexcat_space, state); + if(ch == '\n' || ch == '\r' || ch == 0) //don't consume this character + return irjp_lex_accept(rjp_lexcat_comment, state); break; - case rjp_lex_block_comment: - if(ch == '*') - state->node = rjp_lex_block_comment_end1; - break; - case rjp_lex_block_comment_end1: - if(ch == '/') - state->node = rjp_lex_block_comment_end2; - else - state->node = rjp_lex_block_comment; - break; - case rjp_lex_block_comment_end2: - return irjp_lex_accept(rjp_lexcat_space, state); -#endif + case rjp_lex_block_comment: + if(ch == '*') + state->node = rjp_lex_block_comment_end1; + break; + case rjp_lex_block_comment_end1: + if(ch == '/') + state->node = rjp_lex_block_comment_end2; + else + state->node = rjp_lex_block_comment; + break; + case rjp_lex_block_comment_end2: + return irjp_lex_accept(rjp_lexcat_comment, state); //true case rjp_lex_t: @@ -494,11 +493,18 @@ static RJP_value* irjp_add_value_to_object(RJP_yacc_state* state, const char* ke return NULL; \ }while(0) +static RJP_lex_category irjp_convert_comment(_Bool allow_comments){ + if(allow_comments) + return rjp_lexcat_space; + return rjp_lexcat_invalid; +} -RJP_value* rjp_parse(const char* str){ - RJP_yacc_state state = {0}; - state.column = 1; - state.row = 1; +RJP_value* rjp_parse(const char* str, RJP_parse_flag flags){ + RJP_yacc_state state = {.column = 1, + .row = 1, + .allow_comments = (flags & RJP_PARSE_ALLOW_COMMENTS), + .allow_trail_comma = (flags & RJP_PARSE_ALLOW_TRAILING_COMMA) + }; irjp_init_yacc_stack(&state.target_stack); state.lexstate.str = str; state.root = state.curr = state.lastadded = rjp_calloc(1, sizeof(RJP_value)); @@ -506,6 +512,8 @@ RJP_value* rjp_parse(const char* str){ RJP_lex_category cat; do{ cat = irjp_lex(&state.lexstate); + if(cat == rjp_lexcat_comment) + cat = irjp_convert_comment(state.allow_comments); if(cat == rjp_lexcat_newline){ ++state.column; @@ -524,8 +532,9 @@ RJP_value* rjp_parse(const char* str){ for(;state.lexstate.str[state.lexstate.offset+state.lexstate.length];state.row += state.lexstate.length){ cat = irjp_lex(&state.lexstate); - if(cat == rjp_lexcat_invalid) - irjp_parse_error("Invalid token"); + if(cat == rjp_lexcat_comment) + cat = irjp_convert_comment(state.allow_comments); + if(cat == rjp_lexcat_space) continue; if(cat == rjp_lexcat_newline){ @@ -533,36 +542,11 @@ RJP_value* rjp_parse(const char* str){ ++state.column; continue; } + if(cat == rjp_lexcat_invalid) + irjp_parse_error("Invalid token"); switch(irjp_yacc_stack_current(&state.target_stack)){ -#ifdef RJP_ENABLE_TRAILING_COMMA - case rjp_yacc_first_mem_key: - case rjp_yacc_mem_key: - if(cat == rjp_lexcat_cbrace){ - irjp_yacc_stack_pop(&state.target_stack); - state.curr = state.curr->parent; - }else if(cat == rjp_lexcat_string){ - irjp_yacc_stack_set(&state.target_stack, rjp_yacc_key_colon); - if(!irjp_add_value_to_object(&state, state.lexstate.str+state.lexstate.offset, state.lexstate.length)){ - irjp_parse_error("Expected member key"); - } - }else{ - irjp_parse_error("Expected member key"); - } - break; - case rjp_yacc_arr_first_value: - case rjp_yacc_arr_value: - if(cat == rjp_lexcat_cbracket){ - irjp_yacc_stack_pop(&state.target_stack); - state.curr = state.curr->parent; - }else{ - irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_comma); - if(!irjp_add_value_to_array(cat, &state)) - irjp_parse_error("Expected value"); - } - break; -#else case rjp_yacc_first_mem_key: if(cat == rjp_lexcat_cbrace){ irjp_yacc_stack_pop(&state.target_stack); @@ -592,7 +576,6 @@ RJP_value* rjp_parse(const char* str){ irjp_parse_error("Expected value"); } break; -#endif case rjp_yacc_key_colon: if(cat != rjp_lexcat_colon) @@ -607,7 +590,7 @@ RJP_value* rjp_parse(const char* str){ break; case rjp_yacc_obj_comma: if(cat == rjp_lexcat_comma){ - irjp_yacc_stack_set(&state.target_stack, rjp_yacc_mem_key); + irjp_yacc_stack_set(&state.target_stack, state.allow_trail_comma ? rjp_yacc_first_mem_key : rjp_yacc_mem_key); }else if(cat == rjp_lexcat_cbrace){ irjp_yacc_stack_pop(&state.target_stack); state.curr = state.curr->parent; @@ -618,7 +601,7 @@ RJP_value* rjp_parse(const char* str){ case rjp_yacc_arr_comma: if(cat == rjp_lexcat_comma){ - irjp_yacc_stack_set(&state.target_stack, rjp_yacc_arr_value); + irjp_yacc_stack_set(&state.target_stack, state.allow_trail_comma ? rjp_yacc_arr_first_value : rjp_yacc_arr_value); }else if(cat == rjp_lexcat_cbracket){ irjp_yacc_stack_pop(&state.target_stack); state.curr = state.curr->parent; @@ -639,7 +622,7 @@ RJP_value* rjp_parse(const char* str){ } RJP_value* rjp_parse_chunked(const char* str, RJP_value* prev_chunk){ if(!prev_chunk){ - return rjp_parse(str); + return rjp_parse(str, RJP_PARSE_NONE); } return NULL; } diff --git a/src/test/test.c b/src/test/test.c index 6d28722..040940d 100644 --- a/src/test/test.c +++ b/src/test/test.c @@ -4,9 +4,9 @@ #include -int test(const char* str){ +int test(const char* str, RJP_parse_flag flags){ RJP_value* res; - res = rjp_parse(str); + res = rjp_parse(str, flags); if(res){ fprintf(stderr, "Accepted\n"); } @@ -20,113 +20,112 @@ int test(const char* str){ return failed; } +struct parse_pair{ + const char* str; + RJP_parse_flag flags; +}; + int main(){ - const char* should_pass_strings[] = { - "{}", - "[]", - "\"s\"", - "\"\\n\"", - "\"\\\"\"", - "\"str\\nstr\"", - "\"\\uD83D\\uDE10\"", - "true", - "false", - "null", - "5", - "-5", - "+5", - "5.5", - "-5.5", - "+5.5", - "5.5e6", - "-5.5e6", - "+5.5e6", - "5.5e+6", - "-5.5e+6", - "+5.5e+6", - "5.5e-6", - "-5.5e-6", - "+5.5e-6", - " {}", - "\n{}\n", - " { \"key\" \t:\n\n\n5 \n\t\n } ", - " {\t }\n", - "5.5 ", - "{\"key\":5}", - "{\"key\":{}}", - "{\"\\uD83D\\uDE10\":5}", - "{\"😐\":5}", - "{\"key\":{\"key\":5}}", - "{\"key\":{\"key\":5,\"key2\":6}}", - "{\"key\":{\"key\":5},\"key2\":6}", - "[5, 6, 7, 8, 9, \"10\"]", - "[[5,6],[7,8],[9,\"10\"]]", - "{\"arr\":[5,6,6]}", - "[{\"arr\":[5,6,6]}]", - "[{\"arr\":[5,6,6]}, 6]", - "[5,6,6,6,6.6]", -#ifdef RJP_ENABLE_TRAILING_COMMA - "[6,7,]", - "{\"1\":1,\"2\":2,}", - "[6,]", - "{\"1\":1,}", -#endif -#ifdef RJP_ENABLE_COMMENTS - "//comment\n{}", - "{\"key\"://comment\n5}", - "{\"key\"//comment\n:5}", - "{//\"key\":5\n}", - "5 //comment*/", - "{/*\"key\":5*/\"key\":5}", - "[5, /*comment*/6]", -#endif + struct parse_pair should_pass_strings[] = { + {"{}", RJP_PARSE_NONE}, + {"[]", RJP_PARSE_NONE}, + {"\"s\"", RJP_PARSE_NONE}, + {"\"\\n\"", RJP_PARSE_NONE}, + {"\"\\\"\"", RJP_PARSE_NONE}, + {"\"str\\nstr\"", RJP_PARSE_NONE}, + {"\"\\uD83D\\uDE10\"", RJP_PARSE_NONE}, + {"true", RJP_PARSE_NONE}, + {"false", RJP_PARSE_NONE}, + {"null", RJP_PARSE_NONE}, + {"5", RJP_PARSE_NONE}, + {"-5", RJP_PARSE_NONE}, + {"+5", RJP_PARSE_NONE}, + {"5.5", RJP_PARSE_NONE}, + {"-5.5", RJP_PARSE_NONE}, + {"+5.5", RJP_PARSE_NONE}, + {"5.5e6", RJP_PARSE_NONE}, + {"-5.5e6", RJP_PARSE_NONE}, + {"+5.5e6", RJP_PARSE_NONE}, + {"5.5e+6", RJP_PARSE_NONE}, + {"-5.5e+6", RJP_PARSE_NONE}, + {"+5.5e+6", RJP_PARSE_NONE}, + {"5.5e-6", RJP_PARSE_NONE}, + {"-5.5e-6", RJP_PARSE_NONE}, + {"+5.5e-6", RJP_PARSE_NONE}, + {" {}", RJP_PARSE_NONE}, + {"\n{}\n", RJP_PARSE_NONE}, + {" { \"key\" \t:\n\n\n5 \n\t\n } ", RJP_PARSE_NONE}, + {" {\t }\n", RJP_PARSE_NONE}, + {"5.5 ", RJP_PARSE_NONE}, + {"{\"key\":5}", RJP_PARSE_NONE}, + {"{\"key\":{}}", RJP_PARSE_NONE}, + {"{\"\\uD83D\\uDE10\":5}", RJP_PARSE_NONE}, + {"{\"😐\":5}", RJP_PARSE_NONE}, + {"{\"key\":{\"key\":5}}", RJP_PARSE_NONE}, + {"{\"key\":{\"key\":5,\"key2\":6}}", RJP_PARSE_NONE}, + {"{\"key\":{\"key\":5},\"key2\":6}", RJP_PARSE_NONE}, + {"[5, 6, 7, 8, 9, \"10\"]", RJP_PARSE_NONE}, + {"[[5,6],[7,8],[9,\"10\"]]", RJP_PARSE_NONE}, + {"{\"arr\":[5,6,6]}", RJP_PARSE_NONE}, + {"[{\"arr\":[5,6,6]}]", RJP_PARSE_NONE}, + {"[{\"arr\":[5,6,6]}, 6]", RJP_PARSE_NONE}, + {"[5,6,6,6,6.6]", RJP_PARSE_NONE}, + {"[6,7,]", RJP_PARSE_ALLOW_TRAILING_COMMA}, + {"{\"1\":1,\"2\":2,}", RJP_PARSE_ALLOW_TRAILING_COMMA}, + {"[6,]", RJP_PARSE_ALLOW_TRAILING_COMMA}, + {"{\"1\":1,}", RJP_PARSE_ALLOW_TRAILING_COMMA}, + {"//comment\n{}", RJP_PARSE_ALLOW_COMMENTS}, + {"{\"key\"://comment\n5}", RJP_PARSE_ALLOW_COMMENTS}, + {"{\"key\"//comment\n:5}", RJP_PARSE_ALLOW_COMMENTS}, + {"{}//comment", RJP_PARSE_ALLOW_COMMENTS}, + {"{//\"key\":5\n}", RJP_PARSE_ALLOW_COMMENTS}, + {"5 //comment*/", RJP_PARSE_ALLOW_COMMENTS}, + {"{/*\"key\":5*/\"key\":5}", RJP_PARSE_ALLOW_COMMENTS}, + {"[5, /*comment*/6]", RJP_PARSE_ALLOW_COMMENTS}, }; const int should_pass_cnt = sizeof(should_pass_strings)/sizeof(should_pass_strings[0]); - const char* should_fail_strings[] = { - "{", - "}", - "[", - "]", - "6.", - "6.6e", - "6.6e+", - "{6}", - "[\"key\":5]", - "\"string\n\"", - "[3 4]", - "\"\\uD83D\\uDE1\"", - "\"\\uD83D\\uDE1Q\"", - "\"\\uD83\\uDE10\"", - "\"\\uF83D\\uDE10\"", - "\"\\uU83D\\uDE10\"", - "{\"key\":1 \"key2\":2}", - "{\"key\" 1}", - "6, 7", - "[,]", - "{,}", - "[1, 2],", - "{\"key\nkey\":5}", - "{\"key\":\"key\n\"}", -#ifndef RJP_ENABLE_TRAILING_COMMA - "[6,7,]", - "{\"1\":1,\"2\":2,}", - "[6,]", - "{\"1\":1,}", -#endif - "{//comment\"key\":\n5}", - "{/*\"key\":*/5}", - "[5, /*6*/, 7]", - "{/*comment}", - "{//comment}", -#ifndef RJP_ENABLE_COMMENTS - "//comment\n{}", - "{\"key\"://comment\n5}", - "{\"key\"//comment\n:5}", - "{//\"key\":5\n}", - "5 //comment*/", - "{/*\"key\":5*/\"key\":5}", - "[5, /*comment*/6]", -#endif + struct parse_pair should_fail_strings[] = { + {"//comment\n{}", RJP_PARSE_NONE}, + {"{", RJP_PARSE_NONE}, + {"}", RJP_PARSE_NONE}, + {"[", RJP_PARSE_NONE}, + {"]", RJP_PARSE_NONE}, + {"6.", RJP_PARSE_NONE}, + {"6.6e", RJP_PARSE_NONE}, + {"6.6e+", RJP_PARSE_NONE}, + {"{6}", RJP_PARSE_NONE}, + {"[\"key\":5]", RJP_PARSE_NONE}, + {"\"string\n\"", RJP_PARSE_NONE}, + {"[3 4]", RJP_PARSE_NONE}, + {"\"\\uD83D\\uDE1\"", RJP_PARSE_NONE}, + {"\"\\uD83D\\uDE1Q\"", RJP_PARSE_NONE}, + {"\"\\uD83\\uDE10\"", RJP_PARSE_NONE}, + {"\"\\uF83D\\uDE10\"", RJP_PARSE_NONE}, + {"\"\\uU83D\\uDE10\"", RJP_PARSE_NONE}, + {"{\"key\":1 \"key2\":2}", RJP_PARSE_NONE}, + {"{\"key\" 1}", RJP_PARSE_NONE}, + {"6, 7", RJP_PARSE_NONE}, + {"[,]", RJP_PARSE_NONE}, + {"{, RJP_PARSE_NONE}", RJP_PARSE_NONE}, + {"[1, 2],", RJP_PARSE_NONE}, + {"{\"key\nkey\":5}", RJP_PARSE_NONE}, + {"{\"key\":\"key\n\"}", RJP_PARSE_NONE}, + {"[6,7,]", RJP_PARSE_NONE}, + {"{\"1\":1,\"2\":2, RJP_PARSE_NONE}", RJP_PARSE_NONE}, + {"[6,]", RJP_PARSE_NONE}, + {"{\"1\":1, RJP_PARSE_NONE}", RJP_PARSE_NONE}, + {"{//comment\"key\":\n5}", RJP_PARSE_NONE}, + {"{/*\"key\":*/5}", RJP_PARSE_NONE}, + {"[5, /*6*/, 7]", RJP_PARSE_NONE}, + {"{/*comment}", RJP_PARSE_NONE}, + {"{//comment}", RJP_PARSE_NONE}, + {"{\"key\"://comment\n5}", RJP_PARSE_NONE}, + {"{\"key\"//comment\n:5}", RJP_PARSE_NONE}, + {"{}//comment", RJP_PARSE_NONE}, + {"{//\"key\":5\n}", RJP_PARSE_NONE}, + {"5 //comment*/", RJP_PARSE_NONE}, + {"{/*\"key\":5*/\"key\":5}", RJP_PARSE_NONE}, + {"[5, /*comment*/6]", RJP_PARSE_NONE}, }; const int should_fail_cnt = sizeof(should_fail_strings)/sizeof(should_fail_strings[0]); const int total_tests = should_pass_cnt + should_fail_cnt; @@ -136,20 +135,20 @@ int main(){ fprintf(stderr, "Running %d tests that should pass...\n", should_pass_cnt); for(unsigned i = 0;i < sizeof(should_pass_strings)/sizeof(should_pass_strings[0]);++i){ fprintf(stderr, "%8d) ", i+1); - if(!test(should_pass_strings[i])){ + if(!test(should_pass_strings[i].str, should_pass_strings[i].flags)){ ++passed; }else{ - fprintf(stderr, "%13s%s\n", "", should_pass_strings[i]); + fprintf(stderr, "%13s%s\n", "", should_pass_strings[i].str); } } fprintf(stderr, "\n"); printf("Running %d tests that should fail...\n", should_fail_cnt); for(unsigned i = 0;i < sizeof(should_fail_strings)/sizeof(should_fail_strings[0]);++i){ fprintf(stderr, "%8d) ", i+1); - if(test(should_fail_strings[i])){ + if(test(should_fail_strings[i].str, should_fail_strings[i].flags)){ ++passed; }else{ - fprintf(stderr, "%13s%s\n", "", should_fail_strings[i]); + fprintf(stderr, "%13s%s\n", "", should_fail_strings[i].str); } } fprintf(stderr, "\nResults: %d/%d tests passed\n", passed, total_tests);