Separated lexing logic further. Added file for lexical analysis functions
This commit is contained in:
parent
21e4583827
commit
8e454b3d5a
@ -17,7 +17,7 @@ option(BUILD_TESTS "Build test programs" OFF)
|
|||||||
option(ENABLE_PROFILING "Enable asan" OFF)
|
option(ENABLE_PROFILING "Enable asan" OFF)
|
||||||
mark_as_advanced(ENABLE_PROFILING)
|
mark_as_advanced(ENABLE_PROFILING)
|
||||||
|
|
||||||
set(SOURCE_LIST "src/rjp_ordered_object.c" "src/rjp_unordered_object.c" "src/input.c" "src/output.c" "src/rjp_array.c" "src/rjp.c" "src/rjp_object.c" "src/rjp_string.c" "src/tree.c")
|
set(SOURCE_LIST "src/rjp_lex.c" "src/rjp_ordered_object.c" "src/rjp_unordered_object.c" "src/rjp_parse.c" "src/output.c" "src/rjp_array.c" "src/rjp.c" "src/rjp_object.c" "src/rjp_string.c" "src/tree.c")
|
||||||
if(ENABLE_SHARED)
|
if(ENABLE_SHARED)
|
||||||
add_library(rjp SHARED ${SOURCE_LIST})
|
add_library(rjp SHARED ${SOURCE_LIST})
|
||||||
set_target_properties(rjp PROPERTIES SOVERSION "${rjp_VERSION_MAJOR}.${rjp_VERSION_MINOR}.${rjp_VERSION_REVISION}")
|
set_target_properties(rjp PROPERTIES SOVERSION "${rjp_VERSION_MAJOR}.${rjp_VERSION_MINOR}.${rjp_VERSION_REVISION}")
|
||||||
|
|||||||
59
include/rjp_lex.h
Normal file
59
include/rjp_lex.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#ifndef RJP_LEX_H
|
||||||
|
#define RJP_LEX_H
|
||||||
|
|
||||||
|
#include "rjp.h"
|
||||||
|
|
||||||
|
#define rjp_lex_accept 1
|
||||||
|
typedef enum RJP_lex_category{
|
||||||
|
rjp_lex_start = 0,
|
||||||
|
rjp_lex_obracket = 3,
|
||||||
|
rjp_lex_obrace = 5,
|
||||||
|
rjp_lex_cbracket = 7,
|
||||||
|
rjp_lex_cbrace = 9,
|
||||||
|
rjp_lex_spaces = 11,
|
||||||
|
rjp_lex_quote = 12,
|
||||||
|
rjp_lex_t = 14,
|
||||||
|
rjp_lex_tr = 16,
|
||||||
|
rjp_lex_tru = 18,
|
||||||
|
rjp_lex_true = 19,
|
||||||
|
rjp_lex_f = 20,
|
||||||
|
rjp_lex_fa = 22,
|
||||||
|
rjp_lex_fal = 24,
|
||||||
|
rjp_lex_fals = 26,
|
||||||
|
rjp_lex_false = 27,
|
||||||
|
rjp_lex_n = 28,
|
||||||
|
rjp_lex_nu = 30,
|
||||||
|
rjp_lex_nul = 32,
|
||||||
|
rjp_lex_null = 33,
|
||||||
|
rjp_lex_escaped = 34,
|
||||||
|
rjp_lex_string = 35,
|
||||||
|
rjp_lex_comma = 37,
|
||||||
|
rjp_lex_colon = 39,
|
||||||
|
rjp_lex_number = 41,
|
||||||
|
rjp_lex_decimal = 42,
|
||||||
|
rjp_lex_fnumber = 43,
|
||||||
|
rjp_lex_fnum_e = 44,
|
||||||
|
rjp_lex_sci_num = 45,
|
||||||
|
rjp_lex_slash = 46,
|
||||||
|
rjp_lex_line_comment = 47,
|
||||||
|
rjp_lex_signed_number = 49,
|
||||||
|
rjp_lex_sci_num_signed = 51,
|
||||||
|
rjp_lex_newlines = 53,
|
||||||
|
rjp_lex_block_comment_start = 54,
|
||||||
|
rjp_lex_block_comment_end1 = 56,
|
||||||
|
rjp_lex_block_comment = 57,
|
||||||
|
rjp_lex_invalid = 1000,
|
||||||
|
rjp_lex_unrecognized_word = 1002,
|
||||||
|
rjp_lex_end = 1004,
|
||||||
|
}RJP_lex_category;
|
||||||
|
|
||||||
|
typedef struct RJP_lex_state{
|
||||||
|
const char* str;
|
||||||
|
RJP_lex_category node;
|
||||||
|
RJP_index length;
|
||||||
|
RJP_index offset;
|
||||||
|
}RJP_lex_state;
|
||||||
|
|
||||||
|
RJP_lex_category irjp_lex(RJP_lex_state* state);
|
||||||
|
|
||||||
|
#endif
|
||||||
281
src/rjp_lex.c
Normal file
281
src/rjp_lex.c
Normal file
@ -0,0 +1,281 @@
|
|||||||
|
#include "rjp_lex.h"
|
||||||
|
#include "rjp.h"
|
||||||
|
#include <ctype.h> //isalpha, etc
|
||||||
|
|
||||||
|
static RJP_lex_category irjp_lex_accept(RJP_lex_category val, RJP_lex_state* state){
|
||||||
|
state->node = rjp_lex_start;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
static int irjp_is_space(char ch){
|
||||||
|
switch(ch){
|
||||||
|
case ' ':
|
||||||
|
case '\t':
|
||||||
|
case '\f':
|
||||||
|
case '\v':
|
||||||
|
return 1;
|
||||||
|
};
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_start(char ch){
|
||||||
|
switch(ch){
|
||||||
|
case 0:
|
||||||
|
return rjp_lex_end;
|
||||||
|
case '{':
|
||||||
|
return rjp_lex_obrace;
|
||||||
|
case '}':
|
||||||
|
return rjp_lex_cbrace;
|
||||||
|
case '[':
|
||||||
|
return rjp_lex_obracket;
|
||||||
|
case ']':
|
||||||
|
return rjp_lex_cbracket;
|
||||||
|
case '"':
|
||||||
|
return rjp_lex_quote;
|
||||||
|
case ',':
|
||||||
|
return rjp_lex_comma;
|
||||||
|
case ':':
|
||||||
|
return rjp_lex_colon;
|
||||||
|
case 't':
|
||||||
|
return rjp_lex_t;
|
||||||
|
case 'f':
|
||||||
|
return rjp_lex_f;
|
||||||
|
case 'n':
|
||||||
|
return rjp_lex_n;
|
||||||
|
case '/':
|
||||||
|
return rjp_lex_slash;
|
||||||
|
case '+':
|
||||||
|
case '-':
|
||||||
|
return rjp_lex_signed_number;
|
||||||
|
case '\n':
|
||||||
|
case '\r':
|
||||||
|
return rjp_lex_newlines;
|
||||||
|
case ' ':
|
||||||
|
case '\t':
|
||||||
|
case '\v':
|
||||||
|
case '\f':
|
||||||
|
return rjp_lex_spaces;
|
||||||
|
case '0':
|
||||||
|
case '1':
|
||||||
|
case '2':
|
||||||
|
case '3':
|
||||||
|
case '4':
|
||||||
|
case '5':
|
||||||
|
case '6':
|
||||||
|
case '7':
|
||||||
|
case '8':
|
||||||
|
case '9':
|
||||||
|
return rjp_lex_number;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_spaces(char ch){
|
||||||
|
if(irjp_is_space(ch))
|
||||||
|
return rjp_lex_spaces;
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_number(char ch){
|
||||||
|
if(isdigit(ch))
|
||||||
|
return rjp_lex_number;
|
||||||
|
if(ch == '.')
|
||||||
|
return rjp_lex_decimal;
|
||||||
|
if(isalpha(ch))
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_signed_num(char ch){
|
||||||
|
switch(ch){
|
||||||
|
case '-':
|
||||||
|
case '+':
|
||||||
|
return rjp_lex_number;
|
||||||
|
};
|
||||||
|
return irjp_lex_do_number(ch);
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_decimal(char ch){
|
||||||
|
if(isdigit(ch))
|
||||||
|
return rjp_lex_fnumber;
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_fnumber(char ch){
|
||||||
|
if(isdigit(ch))
|
||||||
|
return rjp_lex_fnumber;
|
||||||
|
if(ch == 'e' || ch == 'E')
|
||||||
|
return rjp_lex_fnum_e;
|
||||||
|
if(isalpha(ch))
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_fnum_e(char ch){
|
||||||
|
if(ch == '-' || ch == '+')
|
||||||
|
return rjp_lex_sci_num_signed;
|
||||||
|
if(isdigit(ch))
|
||||||
|
return rjp_lex_sci_num;
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_sci_num_signed(char ch){
|
||||||
|
if(isdigit(ch))
|
||||||
|
return rjp_lex_sci_num;
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_sci_num(char ch){
|
||||||
|
if(isdigit(ch))
|
||||||
|
return rjp_lex_sci_num;
|
||||||
|
if(isalpha(ch))
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_quote(char ch){
|
||||||
|
switch(ch){
|
||||||
|
case '\\':
|
||||||
|
return rjp_lex_escaped;
|
||||||
|
case '\n':
|
||||||
|
case '\r':
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
case '"':
|
||||||
|
return rjp_lex_string;
|
||||||
|
};
|
||||||
|
return rjp_lex_quote;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_slash(char ch){
|
||||||
|
switch(ch){
|
||||||
|
case '/':
|
||||||
|
return rjp_lex_line_comment;
|
||||||
|
case '*':
|
||||||
|
return rjp_lex_block_comment_start;
|
||||||
|
};
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_line_comment(char ch){
|
||||||
|
switch(ch){
|
||||||
|
case '\n':
|
||||||
|
case '\r':
|
||||||
|
case 0:
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
};
|
||||||
|
return rjp_lex_line_comment;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_block_comment_start(char ch){
|
||||||
|
if(ch == '*')
|
||||||
|
return rjp_lex_block_comment_end1;
|
||||||
|
return rjp_lex_block_comment_start;
|
||||||
|
}
|
||||||
|
static inline RJP_lex_category irjp_lex_do_block_comment_end1(char ch){
|
||||||
|
if(ch == '/')
|
||||||
|
return rjp_lex_block_comment;
|
||||||
|
return rjp_lex_block_comment_start;
|
||||||
|
}
|
||||||
|
static RJP_lex_category irjp_lex_char(char ch, RJP_lex_category node){
|
||||||
|
switch(node){
|
||||||
|
case rjp_lex_start:
|
||||||
|
return irjp_lex_do_start(ch);
|
||||||
|
//whitespace
|
||||||
|
case rjp_lex_spaces:
|
||||||
|
return irjp_lex_do_spaces(ch);
|
||||||
|
|
||||||
|
//numbers
|
||||||
|
case rjp_lex_signed_number:
|
||||||
|
return irjp_lex_do_signed_num(ch);
|
||||||
|
case rjp_lex_number:
|
||||||
|
return irjp_lex_do_number(ch);
|
||||||
|
case rjp_lex_decimal:
|
||||||
|
return irjp_lex_do_decimal(ch);
|
||||||
|
case rjp_lex_fnumber:
|
||||||
|
return irjp_lex_do_fnumber(ch);
|
||||||
|
case rjp_lex_fnum_e:
|
||||||
|
return irjp_lex_do_fnum_e(ch);
|
||||||
|
case rjp_lex_sci_num_signed:
|
||||||
|
return irjp_lex_do_sci_num_signed(ch);
|
||||||
|
case rjp_lex_sci_num:
|
||||||
|
return irjp_lex_do_sci_num(ch);
|
||||||
|
|
||||||
|
//strings
|
||||||
|
case rjp_lex_quote:
|
||||||
|
return irjp_lex_do_quote(ch);
|
||||||
|
case rjp_lex_escaped:
|
||||||
|
return rjp_lex_quote;
|
||||||
|
|
||||||
|
//comments
|
||||||
|
case rjp_lex_slash:
|
||||||
|
return irjp_lex_do_slash(ch);
|
||||||
|
case rjp_lex_line_comment:
|
||||||
|
return irjp_lex_do_line_comment(ch);
|
||||||
|
case rjp_lex_block_comment_start:
|
||||||
|
return irjp_lex_do_block_comment_start(ch);
|
||||||
|
case rjp_lex_block_comment_end1:
|
||||||
|
return irjp_lex_do_block_comment_end1(ch);
|
||||||
|
|
||||||
|
//true
|
||||||
|
case rjp_lex_t:
|
||||||
|
if(ch != 'r')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_tr;
|
||||||
|
case rjp_lex_tr:
|
||||||
|
if(ch != 'u')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_tru;
|
||||||
|
case rjp_lex_tru:
|
||||||
|
if(ch != 'e')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_true;
|
||||||
|
|
||||||
|
//false
|
||||||
|
case rjp_lex_f:
|
||||||
|
if(ch != 'a')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_fa;
|
||||||
|
case rjp_lex_fa:
|
||||||
|
if(ch != 'l')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_fal;
|
||||||
|
case rjp_lex_fal:
|
||||||
|
if(ch != 's')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_fals;
|
||||||
|
case rjp_lex_fals:
|
||||||
|
if(ch != 'e')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_false;
|
||||||
|
|
||||||
|
//null
|
||||||
|
case rjp_lex_n:
|
||||||
|
if(ch != 'u')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_nu;
|
||||||
|
case rjp_lex_nu:
|
||||||
|
if(ch != 'l')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_nul;
|
||||||
|
case rjp_lex_nul:
|
||||||
|
if(ch != 'l')
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_null;
|
||||||
|
|
||||||
|
case rjp_lex_true:
|
||||||
|
case rjp_lex_false:
|
||||||
|
case rjp_lex_null:
|
||||||
|
if(!isalnum(ch))
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
|
||||||
|
case rjp_lex_unrecognized_word:
|
||||||
|
if(isalnum(ch))
|
||||||
|
return rjp_lex_unrecognized_word;
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
default:
|
||||||
|
return rjp_lex_invalid;
|
||||||
|
};
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
RJP_lex_category irjp_lex(RJP_lex_state* state){
|
||||||
|
state->offset += state->length;
|
||||||
|
state->length = 0;
|
||||||
|
for(const char* c = state->str+state->offset;1;++c,++state->length){
|
||||||
|
RJP_lex_category cat = irjp_lex_char(*c, state->node);
|
||||||
|
if(cat == rjp_lex_invalid)
|
||||||
|
return irjp_lex_accept(state->node, state);
|
||||||
|
state->node = cat;
|
||||||
|
if(*c == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return irjp_lex_accept(state->node, state);
|
||||||
|
}
|
||||||
@ -23,55 +23,12 @@
|
|||||||
#include "rjp_value.h"
|
#include "rjp_value.h"
|
||||||
#include "rjp_string.h"
|
#include "rjp_string.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
|
#include "rjp_lex.h"
|
||||||
#include <stdlib.h> //strtod, strtol
|
#include <stdlib.h> //strtod, strtol
|
||||||
#include <ctype.h> //isalpha, etc
|
|
||||||
#include <string.h> //memcpy
|
#include <string.h> //memcpy
|
||||||
|
|
||||||
#define RJP_INITIAL_PARSE_DEPTH 16
|
|
||||||
|
|
||||||
#define rjp_lex_accept 1
|
#define RJP_INITIAL_PARSE_DEPTH 16
|
||||||
typedef enum RJP_lex_category{
|
|
||||||
rjp_lex_start = 0,
|
|
||||||
rjp_lex_obracket = 3,
|
|
||||||
rjp_lex_obrace = 5,
|
|
||||||
rjp_lex_cbracket = 7,
|
|
||||||
rjp_lex_cbrace = 9,
|
|
||||||
rjp_lex_spaces = 11,
|
|
||||||
rjp_lex_quote = 12,
|
|
||||||
rjp_lex_t = 14,
|
|
||||||
rjp_lex_tr = 16,
|
|
||||||
rjp_lex_tru = 18,
|
|
||||||
rjp_lex_true = 19,
|
|
||||||
rjp_lex_f = 20,
|
|
||||||
rjp_lex_fa = 22,
|
|
||||||
rjp_lex_fal = 24,
|
|
||||||
rjp_lex_fals = 26,
|
|
||||||
rjp_lex_false = 27,
|
|
||||||
rjp_lex_n = 28,
|
|
||||||
rjp_lex_nu = 30,
|
|
||||||
rjp_lex_nul = 32,
|
|
||||||
rjp_lex_null = 33,
|
|
||||||
rjp_lex_escaped = 34,
|
|
||||||
rjp_lex_string = 35,
|
|
||||||
rjp_lex_comma = 37,
|
|
||||||
rjp_lex_colon = 39,
|
|
||||||
rjp_lex_number = 41,
|
|
||||||
rjp_lex_decimal = 42,
|
|
||||||
rjp_lex_fnumber = 43,
|
|
||||||
rjp_lex_fnum_e = 44,
|
|
||||||
rjp_lex_sci_num = 45,
|
|
||||||
rjp_lex_slash = 46,
|
|
||||||
rjp_lex_line_comment = 47,
|
|
||||||
rjp_lex_signed_number = 49,
|
|
||||||
rjp_lex_sci_num_signed = 51,
|
|
||||||
rjp_lex_newlines = 53,
|
|
||||||
rjp_lex_block_comment_start = 54,
|
|
||||||
rjp_lex_block_comment_end1 = 56,
|
|
||||||
rjp_lex_block_comment = 57,
|
|
||||||
rjp_lex_invalid = 1000,
|
|
||||||
rjp_lex_unrecognized_word = 1002,
|
|
||||||
rjp_lex_end = 1004,
|
|
||||||
}RJP_lex_category;
|
|
||||||
|
|
||||||
typedef enum RJP_yacc_target{
|
typedef enum RJP_yacc_target{
|
||||||
rjp_yacc_end,
|
rjp_yacc_end,
|
||||||
@ -86,12 +43,6 @@ typedef enum RJP_yacc_target{
|
|||||||
rjp_yacc_obj_comma
|
rjp_yacc_obj_comma
|
||||||
}RJP_yacc_target;
|
}RJP_yacc_target;
|
||||||
|
|
||||||
typedef struct RJP_lex_state{
|
|
||||||
const char* str;
|
|
||||||
RJP_lex_category node;
|
|
||||||
RJP_index length;
|
|
||||||
RJP_index offset;
|
|
||||||
}RJP_lex_state;
|
|
||||||
typedef struct RJP_yacc_stack{
|
typedef struct RJP_yacc_stack{
|
||||||
RJP_yacc_target* stack;
|
RJP_yacc_target* stack;
|
||||||
RJP_index position;
|
RJP_index position;
|
||||||
@ -140,208 +91,6 @@ static void irjp_yacc_stack_set(RJP_yacc_stack* s, RJP_yacc_target target){
|
|||||||
s->stack[s->position] = target;
|
s->stack[s->position] = target;
|
||||||
}
|
}
|
||||||
|
|
||||||
static RJP_lex_category irjp_lex_accept(RJP_lex_category val, RJP_lex_state* state){
|
|
||||||
state->node = rjp_lex_start;
|
|
||||||
return val;
|
|
||||||
}
|
|
||||||
static RJP_lex_category irjp_lex_char(char ch, RJP_lex_category node){
|
|
||||||
switch(node){
|
|
||||||
case rjp_lex_start:
|
|
||||||
if(ch == 0)
|
|
||||||
return rjp_lex_end;
|
|
||||||
else if(ch == '{')
|
|
||||||
return rjp_lex_obrace;
|
|
||||||
else if(ch == '}')
|
|
||||||
return rjp_lex_cbrace;
|
|
||||||
else if(ch == '[')
|
|
||||||
return rjp_lex_obracket;
|
|
||||||
else if(ch == ']')
|
|
||||||
return rjp_lex_cbracket;
|
|
||||||
else if(ch == '"')
|
|
||||||
return rjp_lex_quote;
|
|
||||||
else if(ch == ',')
|
|
||||||
return rjp_lex_comma;
|
|
||||||
else if(ch == ':')
|
|
||||||
return rjp_lex_colon;
|
|
||||||
else if(isdigit(ch))
|
|
||||||
return rjp_lex_number;
|
|
||||||
else if(ch == '+' || ch == '-')
|
|
||||||
return rjp_lex_signed_number;
|
|
||||||
else if(ch == '\n' || ch == '\r')
|
|
||||||
return rjp_lex_newlines;
|
|
||||||
else if(isspace(ch))
|
|
||||||
return rjp_lex_spaces;
|
|
||||||
else if(ch == 't')
|
|
||||||
return rjp_lex_t;
|
|
||||||
else if(ch == 'f')
|
|
||||||
return rjp_lex_f;
|
|
||||||
else if(ch == 'n')
|
|
||||||
return rjp_lex_n;
|
|
||||||
else if(ch == '/')
|
|
||||||
return rjp_lex_slash;
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
|
|
||||||
//whitespace
|
|
||||||
case rjp_lex_spaces:
|
|
||||||
if(isspace(ch))
|
|
||||||
break;
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
|
|
||||||
//numbers
|
|
||||||
case rjp_lex_signed_number:
|
|
||||||
if(ch == '-' || ch == '+'){
|
|
||||||
return rjp_lex_number;
|
|
||||||
}else{
|
|
||||||
//fallthrough
|
|
||||||
case rjp_lex_number:
|
|
||||||
if(isdigit(ch))
|
|
||||||
return rjp_lex_number;
|
|
||||||
if(ch == '.')
|
|
||||||
return rjp_lex_decimal;
|
|
||||||
else if(isalpha(ch))
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
}
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
case rjp_lex_decimal:
|
|
||||||
if(isdigit(ch))
|
|
||||||
return rjp_lex_fnumber;
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
case rjp_lex_fnumber:
|
|
||||||
if(isdigit(ch))
|
|
||||||
break;
|
|
||||||
else if(ch == 'e' || ch == 'E')
|
|
||||||
return rjp_lex_fnum_e;
|
|
||||||
else if(isalpha(ch))
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
case rjp_lex_fnum_e:
|
|
||||||
if(ch == '-' || ch == '+')
|
|
||||||
return rjp_lex_sci_num_signed;
|
|
||||||
else if(isdigit(ch))
|
|
||||||
return rjp_lex_sci_num;
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
case rjp_lex_sci_num_signed:
|
|
||||||
if(isdigit(ch))
|
|
||||||
return rjp_lex_sci_num;
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
case rjp_lex_sci_num:
|
|
||||||
if(isdigit(ch))
|
|
||||||
break;
|
|
||||||
else if(isalpha(ch))
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
|
|
||||||
//strings
|
|
||||||
case rjp_lex_quote:
|
|
||||||
if(ch == '\\')
|
|
||||||
return rjp_lex_escaped;
|
|
||||||
else if(ch == '\n' || ch == '\r')
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
else if(ch == '"')
|
|
||||||
return rjp_lex_string;
|
|
||||||
break;
|
|
||||||
case rjp_lex_escaped:
|
|
||||||
return rjp_lex_quote;
|
|
||||||
case rjp_lex_string:
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
|
|
||||||
//comments
|
|
||||||
case rjp_lex_slash:
|
|
||||||
if(ch == '/')
|
|
||||||
return rjp_lex_line_comment;
|
|
||||||
else if(ch == '*')
|
|
||||||
return rjp_lex_block_comment_start;
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
case rjp_lex_line_comment:
|
|
||||||
if(ch == '\n' || ch == '\r' || ch == 0) //don't consume this character
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
break;
|
|
||||||
case rjp_lex_block_comment_start:
|
|
||||||
if(ch == '*')
|
|
||||||
return rjp_lex_block_comment_end1;
|
|
||||||
break;
|
|
||||||
case rjp_lex_block_comment_end1:
|
|
||||||
if(ch == '/')
|
|
||||||
return rjp_lex_block_comment;
|
|
||||||
return rjp_lex_block_comment_start;
|
|
||||||
|
|
||||||
//true
|
|
||||||
case rjp_lex_t:
|
|
||||||
if(ch != 'r')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_tr;
|
|
||||||
case rjp_lex_tr:
|
|
||||||
if(ch != 'u')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_tru;
|
|
||||||
case rjp_lex_tru:
|
|
||||||
if(ch != 'e')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_true;
|
|
||||||
|
|
||||||
//false
|
|
||||||
case rjp_lex_f:
|
|
||||||
if(ch != 'a')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_fa;
|
|
||||||
case rjp_lex_fa:
|
|
||||||
if(ch != 'l')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_fal;
|
|
||||||
case rjp_lex_fal:
|
|
||||||
if(ch != 's')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_fals;
|
|
||||||
case rjp_lex_fals:
|
|
||||||
if(ch != 'e')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_false;
|
|
||||||
|
|
||||||
//null
|
|
||||||
case rjp_lex_n:
|
|
||||||
if(ch != 'u')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_nu;
|
|
||||||
case rjp_lex_nu:
|
|
||||||
if(ch != 'l')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_nul;
|
|
||||||
case rjp_lex_nul:
|
|
||||||
if(ch != 'l')
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
return rjp_lex_null;
|
|
||||||
|
|
||||||
case rjp_lex_true:
|
|
||||||
case rjp_lex_false:
|
|
||||||
case rjp_lex_null:
|
|
||||||
if(!isalnum(ch))
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
return rjp_lex_unrecognized_word;
|
|
||||||
|
|
||||||
case rjp_lex_unrecognized_word:
|
|
||||||
if(isalnum(ch))
|
|
||||||
break;
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
//fallthrough
|
|
||||||
default:
|
|
||||||
return rjp_lex_invalid;
|
|
||||||
};
|
|
||||||
return node;
|
|
||||||
}
|
|
||||||
static RJP_lex_category irjp_lex(RJP_lex_state* state){
|
|
||||||
state->offset += state->length;
|
|
||||||
state->length = 0;
|
|
||||||
for(const char* c = state->str+state->offset;1;++c,++state->length){
|
|
||||||
RJP_lex_category cat = irjp_lex_char(*c, state->node);
|
|
||||||
if(cat == rjp_lex_invalid)
|
|
||||||
return irjp_lex_accept(state->node, state);
|
|
||||||
state->node = cat;
|
|
||||||
if(*c == 0)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return irjp_lex_accept(state->node, state);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int irjp_init_value(RJP_value* newval, RJP_lex_category cat, RJP_yacc_state* state){
|
static int irjp_init_value(RJP_value* newval, RJP_lex_category cat, RJP_yacc_state* state){
|
||||||
RJP_index length = state->lexstate.length;
|
RJP_index length = state->lexstate.length;
|
||||||
RJP_index offset = state->lexstate.offset;
|
RJP_index offset = state->lexstate.offset;
|
||||||
Loading…
x
Reference in New Issue
Block a user