Separate tokenizing logic from loop
This commit is contained in:
parent
e38245261e
commit
21e4583827
441
src/input.c
441
src/input.c
@ -144,264 +144,199 @@ static RJP_lex_category irjp_lex_accept(RJP_lex_category val, RJP_lex_state* sta
|
||||
state->node = rjp_lex_start;
|
||||
return val;
|
||||
}
|
||||
static RJP_lex_category irjp_lex_error(RJP_lex_state* state){
|
||||
return irjp_lex_accept(rjp_lex_invalid, state);
|
||||
static RJP_lex_category irjp_lex_char(char ch, RJP_lex_category node){
|
||||
switch(node){
|
||||
case rjp_lex_start:
|
||||
if(ch == 0)
|
||||
return rjp_lex_end;
|
||||
else if(ch == '{')
|
||||
return rjp_lex_obrace;
|
||||
else if(ch == '}')
|
||||
return rjp_lex_cbrace;
|
||||
else if(ch == '[')
|
||||
return rjp_lex_obracket;
|
||||
else if(ch == ']')
|
||||
return rjp_lex_cbracket;
|
||||
else if(ch == '"')
|
||||
return rjp_lex_quote;
|
||||
else if(ch == ',')
|
||||
return rjp_lex_comma;
|
||||
else if(ch == ':')
|
||||
return rjp_lex_colon;
|
||||
else if(isdigit(ch))
|
||||
return rjp_lex_number;
|
||||
else if(ch == '+' || ch == '-')
|
||||
return rjp_lex_signed_number;
|
||||
else if(ch == '\n' || ch == '\r')
|
||||
return rjp_lex_newlines;
|
||||
else if(isspace(ch))
|
||||
return rjp_lex_spaces;
|
||||
else if(ch == 't')
|
||||
return rjp_lex_t;
|
||||
else if(ch == 'f')
|
||||
return rjp_lex_f;
|
||||
else if(ch == 'n')
|
||||
return rjp_lex_n;
|
||||
else if(ch == '/')
|
||||
return rjp_lex_slash;
|
||||
return rjp_lex_invalid;
|
||||
|
||||
//whitespace
|
||||
case rjp_lex_spaces:
|
||||
if(isspace(ch))
|
||||
break;
|
||||
return rjp_lex_invalid;
|
||||
|
||||
//numbers
|
||||
case rjp_lex_signed_number:
|
||||
if(ch == '-' || ch == '+'){
|
||||
return rjp_lex_number;
|
||||
}else{
|
||||
//fallthrough
|
||||
case rjp_lex_number:
|
||||
if(isdigit(ch))
|
||||
return rjp_lex_number;
|
||||
if(ch == '.')
|
||||
return rjp_lex_decimal;
|
||||
else if(isalpha(ch))
|
||||
return rjp_lex_unrecognized_word;
|
||||
}
|
||||
return rjp_lex_invalid;
|
||||
case rjp_lex_decimal:
|
||||
if(isdigit(ch))
|
||||
return rjp_lex_fnumber;
|
||||
return rjp_lex_invalid;
|
||||
case rjp_lex_fnumber:
|
||||
if(isdigit(ch))
|
||||
break;
|
||||
else if(ch == 'e' || ch == 'E')
|
||||
return rjp_lex_fnum_e;
|
||||
else if(isalpha(ch))
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_invalid;
|
||||
case rjp_lex_fnum_e:
|
||||
if(ch == '-' || ch == '+')
|
||||
return rjp_lex_sci_num_signed;
|
||||
else if(isdigit(ch))
|
||||
return rjp_lex_sci_num;
|
||||
return rjp_lex_invalid;
|
||||
case rjp_lex_sci_num_signed:
|
||||
if(isdigit(ch))
|
||||
return rjp_lex_sci_num;
|
||||
return rjp_lex_unrecognized_word;
|
||||
case rjp_lex_sci_num:
|
||||
if(isdigit(ch))
|
||||
break;
|
||||
else if(isalpha(ch))
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_invalid;
|
||||
|
||||
//strings
|
||||
case rjp_lex_quote:
|
||||
if(ch == '\\')
|
||||
return rjp_lex_escaped;
|
||||
else if(ch == '\n' || ch == '\r')
|
||||
return rjp_lex_invalid;
|
||||
else if(ch == '"')
|
||||
return rjp_lex_string;
|
||||
break;
|
||||
case rjp_lex_escaped:
|
||||
return rjp_lex_quote;
|
||||
case rjp_lex_string:
|
||||
return rjp_lex_invalid;
|
||||
|
||||
//comments
|
||||
case rjp_lex_slash:
|
||||
if(ch == '/')
|
||||
return rjp_lex_line_comment;
|
||||
else if(ch == '*')
|
||||
return rjp_lex_block_comment_start;
|
||||
return rjp_lex_invalid;
|
||||
case rjp_lex_line_comment:
|
||||
if(ch == '\n' || ch == '\r' || ch == 0) //don't consume this character
|
||||
return rjp_lex_invalid;
|
||||
break;
|
||||
case rjp_lex_block_comment_start:
|
||||
if(ch == '*')
|
||||
return rjp_lex_block_comment_end1;
|
||||
break;
|
||||
case rjp_lex_block_comment_end1:
|
||||
if(ch == '/')
|
||||
return rjp_lex_block_comment;
|
||||
return rjp_lex_block_comment_start;
|
||||
|
||||
//true
|
||||
case rjp_lex_t:
|
||||
if(ch != 'r')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_tr;
|
||||
case rjp_lex_tr:
|
||||
if(ch != 'u')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_tru;
|
||||
case rjp_lex_tru:
|
||||
if(ch != 'e')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_true;
|
||||
|
||||
//false
|
||||
case rjp_lex_f:
|
||||
if(ch != 'a')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_fa;
|
||||
case rjp_lex_fa:
|
||||
if(ch != 'l')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_fal;
|
||||
case rjp_lex_fal:
|
||||
if(ch != 's')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_fals;
|
||||
case rjp_lex_fals:
|
||||
if(ch != 'e')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_false;
|
||||
|
||||
//null
|
||||
case rjp_lex_n:
|
||||
if(ch != 'u')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_nu;
|
||||
case rjp_lex_nu:
|
||||
if(ch != 'l')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_nul;
|
||||
case rjp_lex_nul:
|
||||
if(ch != 'l')
|
||||
return rjp_lex_unrecognized_word;
|
||||
return rjp_lex_null;
|
||||
|
||||
case rjp_lex_true:
|
||||
case rjp_lex_false:
|
||||
case rjp_lex_null:
|
||||
if(!isalnum(ch))
|
||||
return rjp_lex_invalid;
|
||||
return rjp_lex_unrecognized_word;
|
||||
|
||||
case rjp_lex_unrecognized_word:
|
||||
if(isalnum(ch))
|
||||
break;
|
||||
return rjp_lex_invalid;
|
||||
//fallthrough
|
||||
default:
|
||||
return rjp_lex_invalid;
|
||||
};
|
||||
return node;
|
||||
}
|
||||
static RJP_lex_category irjp_lex(RJP_lex_state* state){
|
||||
state->offset += state->length;
|
||||
state->length = 0;
|
||||
for(const char* c = state->str+state->offset;1;++c,++state->length){
|
||||
char ch = *c;
|
||||
switch(state->node){
|
||||
case rjp_lex_start:
|
||||
if(ch == '{')
|
||||
state->node = rjp_lex_obrace;
|
||||
else if(ch == '}')
|
||||
state->node = rjp_lex_cbrace;
|
||||
else if(ch == '[')
|
||||
state->node = rjp_lex_obracket;
|
||||
else if(ch == ']')
|
||||
state->node = rjp_lex_cbracket;
|
||||
else if(ch == '"')
|
||||
state->node = rjp_lex_quote;
|
||||
else if(ch == ',')
|
||||
state->node = rjp_lex_comma;
|
||||
else if(ch == ':')
|
||||
state->node = rjp_lex_colon;
|
||||
else if(isdigit(ch))
|
||||
state->node = rjp_lex_number;
|
||||
else if(ch == '+' || ch == '-')
|
||||
state->node = rjp_lex_signed_number;
|
||||
else if(ch == '\n' || ch == '\r')
|
||||
state->node = rjp_lex_newlines;
|
||||
else if(isspace(ch))
|
||||
state->node = rjp_lex_spaces;
|
||||
else if(ch == 't')
|
||||
state->node = rjp_lex_t;
|
||||
else if(ch == 'f')
|
||||
state->node = rjp_lex_f;
|
||||
else if(ch == 'n')
|
||||
state->node = rjp_lex_n;
|
||||
else if(ch == '/')
|
||||
state->node = rjp_lex_slash;
|
||||
else if(ch == 0)
|
||||
return irjp_lex_accept(rjp_lex_end, state);
|
||||
else
|
||||
state->node = rjp_lex_invalid;
|
||||
break;
|
||||
|
||||
//punctuation
|
||||
case rjp_lex_obracket:
|
||||
return irjp_lex_accept(rjp_lex_obracket, state);
|
||||
case rjp_lex_obrace:
|
||||
return irjp_lex_accept(rjp_lex_obrace, state);
|
||||
case rjp_lex_cbracket:
|
||||
return irjp_lex_accept(rjp_lex_cbracket, state);
|
||||
case rjp_lex_cbrace:
|
||||
return irjp_lex_accept(rjp_lex_cbrace, state);
|
||||
case rjp_lex_comma:
|
||||
return irjp_lex_accept(rjp_lex_comma, state);
|
||||
case rjp_lex_colon:
|
||||
return irjp_lex_accept(rjp_lex_colon, state);
|
||||
|
||||
//whitespace
|
||||
case rjp_lex_newlines:
|
||||
return irjp_lex_accept(rjp_lex_newlines, state);
|
||||
case rjp_lex_spaces:
|
||||
if(isspace(ch))
|
||||
break;
|
||||
return irjp_lex_accept(rjp_lex_spaces, state);
|
||||
|
||||
//numbers
|
||||
case rjp_lex_signed_number:
|
||||
if(ch == '-' || ch == '+'){
|
||||
state->node = rjp_lex_number;
|
||||
break;
|
||||
}else{
|
||||
//fallthrough
|
||||
case rjp_lex_number:
|
||||
if(isdigit(ch))
|
||||
break;
|
||||
if(ch == '.')
|
||||
state->node = rjp_lex_decimal;
|
||||
else if(isalpha(ch))
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
return irjp_lex_accept(rjp_lex_number, state);
|
||||
}
|
||||
break;
|
||||
case rjp_lex_decimal:
|
||||
if(isdigit(ch))
|
||||
state->node = rjp_lex_fnumber;
|
||||
else
|
||||
state->node = rjp_lex_invalid;
|
||||
break;
|
||||
case rjp_lex_fnumber:
|
||||
if(isdigit(ch))
|
||||
break;
|
||||
if(ch == 'e' || ch == 'E')
|
||||
state->node = rjp_lex_fnum_e;
|
||||
else if(isalpha(ch))
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
return irjp_lex_accept(rjp_lex_fnumber, state);
|
||||
break;
|
||||
case rjp_lex_fnum_e:
|
||||
if(ch == '-' || ch == '+')
|
||||
state->node = rjp_lex_sci_num_signed;
|
||||
else if(isdigit(ch))
|
||||
state->node = rjp_lex_sci_num;
|
||||
else
|
||||
state->node = rjp_lex_invalid;
|
||||
break;
|
||||
case rjp_lex_sci_num_signed:
|
||||
if(isdigit(ch))
|
||||
state->node = rjp_lex_sci_num;
|
||||
else
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
break;
|
||||
case rjp_lex_sci_num:
|
||||
if(isdigit(ch))
|
||||
break;
|
||||
else if(isalpha(ch))
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
return irjp_lex_accept(rjp_lex_sci_num, state);
|
||||
break;
|
||||
|
||||
//strings
|
||||
case rjp_lex_quote:
|
||||
if(ch == '\\')
|
||||
state->node = rjp_lex_escaped;
|
||||
else if(ch == '\n' || ch == '\r')
|
||||
state->node = rjp_lex_invalid;
|
||||
else if(ch == '"')
|
||||
state->node = rjp_lex_string;
|
||||
break;
|
||||
case rjp_lex_escaped:
|
||||
state->node = rjp_lex_quote;
|
||||
break;
|
||||
case rjp_lex_string:
|
||||
return irjp_lex_accept(rjp_lex_string, state);
|
||||
|
||||
//comments
|
||||
case rjp_lex_slash:
|
||||
if(ch == '/')
|
||||
state->node = rjp_lex_line_comment;
|
||||
else if(ch == '*')
|
||||
state->node = rjp_lex_block_comment_start;
|
||||
else
|
||||
state->node = rjp_lex_invalid;
|
||||
break;
|
||||
case rjp_lex_line_comment:
|
||||
if(ch == '\n' || ch == '\r' || ch == 0) //don't consume this character
|
||||
return irjp_lex_accept(rjp_lex_line_comment, state);
|
||||
break;
|
||||
case rjp_lex_block_comment_start:
|
||||
if(ch == '*')
|
||||
state->node = rjp_lex_block_comment_end1;
|
||||
break;
|
||||
case rjp_lex_block_comment_end1:
|
||||
if(ch == '/')
|
||||
state->node = rjp_lex_block_comment;
|
||||
else
|
||||
state->node = rjp_lex_block_comment_start;
|
||||
break;
|
||||
case rjp_lex_block_comment:
|
||||
return irjp_lex_accept(rjp_lex_block_comment, state);
|
||||
|
||||
//true
|
||||
case rjp_lex_t:
|
||||
if(ch != 'r')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_tr;
|
||||
break;
|
||||
case rjp_lex_tr:
|
||||
if(ch != 'u')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_tru;
|
||||
break;
|
||||
case rjp_lex_tru:
|
||||
if(ch != 'e')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_true;
|
||||
break;
|
||||
case rjp_lex_true:
|
||||
if(!isalnum(ch))
|
||||
return irjp_lex_accept(rjp_lex_true, state);
|
||||
else
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
break;
|
||||
|
||||
//false
|
||||
case rjp_lex_f:
|
||||
if(ch != 'a')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_fa;
|
||||
break;
|
||||
case rjp_lex_fa:
|
||||
if(ch != 'l')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_fal;
|
||||
break;
|
||||
case rjp_lex_fal:
|
||||
if(ch != 's')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_fals;
|
||||
break;
|
||||
case rjp_lex_fals:
|
||||
if(ch != 'e')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_false;
|
||||
break;
|
||||
case rjp_lex_false:
|
||||
if(!isalnum(ch))
|
||||
return irjp_lex_accept(rjp_lex_false, state);
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
break;
|
||||
|
||||
//null
|
||||
case rjp_lex_n:
|
||||
if(ch != 'u')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_nu;
|
||||
break;
|
||||
case rjp_lex_nu:
|
||||
if(ch != 'l')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_nul;
|
||||
break;
|
||||
case rjp_lex_nul:
|
||||
if(ch != 'l')
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
else
|
||||
state->node = rjp_lex_null;
|
||||
break;
|
||||
case rjp_lex_null:
|
||||
if(!isalnum(ch))
|
||||
return irjp_lex_accept(rjp_lex_null, state);
|
||||
state->node = rjp_lex_unrecognized_word;
|
||||
break;
|
||||
|
||||
case rjp_lex_unrecognized_word:
|
||||
if(isalnum(ch))
|
||||
break;
|
||||
state->node = rjp_lex_invalid;
|
||||
//fallthrough
|
||||
case rjp_lex_invalid:
|
||||
default:
|
||||
return irjp_lex_error(state);
|
||||
};
|
||||
if(ch == 0)
|
||||
RJP_lex_category cat = irjp_lex_char(*c, state->node);
|
||||
if(cat == rjp_lex_invalid)
|
||||
return irjp_lex_accept(state->node, state);
|
||||
state->node = cat;
|
||||
if(*c == 0)
|
||||
break;
|
||||
}
|
||||
return irjp_lex_accept(state->node, state);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user