RRG-Proxmark3/client/deps/liblua/llex.c

/*
** $Id: llex.c $
** Lexical Analyzer
** See Copyright Notice in lua.h
*/

#define llex_c
#define LUA_CORE

#include "lprefix.h"


#include <locale.h>
#include <string.h>

#include "lua.h"

#include "lctype.h"
#include "ldebug.h"
#include "ldo.h"
#include "lgc.h"
#include "llex.h"
#include "lobject.h"
#include "lparser.h"
#include "lstate.h"
#include "lstring.h"
#include "ltable.h"
#include "lzio.h"


#define next(ls)	(ls->current = zgetc(ls->z))


#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')


/* ORDER RESERVED */
static const char *const luaX_tokens [] = {
    "and", "break", "do", "else", "elseif",
    "end", "false", "for", "function", "goto", "if",
    "in", "local", "nil", "not", "or", "repeat",
    "return", "then", "true", "until", "while",
    "//", "..", "...", "==", ">=", "<=", "~=",
    "<<", ">>", "::", "<eof>",
    "<number>", "<integer>", "<name>", "<string>"
};


#define save_and_next(ls) (save(ls, ls->current), next(ls))


static l_noret lexerror(LexState *ls, const char *msg, int token);


static void save(LexState *ls, int c) {
    Mbuffer *b = ls->buff;
    if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
        size_t newsize;
        if (luaZ_sizebuffer(b) >= MAX_SIZE / 2)
            lexerror(ls, "lexical element too long", 0);
        newsize = luaZ_sizebuffer(b) * 2;
        luaZ_resizebuffer(ls->L, b, newsize);
    }
    b->buffer[luaZ_bufflen(b)++] = cast_char(c);
}


void luaX_init(lua_State *L) {
    int i;
    TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
    luaC_fix(L, obj2gco(e));  /* never collect this name */
    for (i = 0; i < NUM_RESERVED; i++) {
        TString *ts = luaS_new(L, luaX_tokens[i]);
        luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */
        ts->extra = cast_byte(i + 1); /* reserved word */
    }
}


const char *luaX_token2str(LexState *ls, int token) {
    if (token < FIRST_RESERVED) {  /* single-byte symbols? */
        if (lisprint(token))
            return luaO_pushfstring(ls->L, "'%c'", token);
        else  /* control character */
            return luaO_pushfstring(ls->L, "'<\\%d>'", token);
    } else {
        const char *s = luaX_tokens[token - FIRST_RESERVED];
        if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
            return luaO_pushfstring(ls->L, "'%s'", s);
        else  /* names, strings, and numerals */
            return s;
    }
}


static const char *txtToken(LexState *ls, int token) {
    switch (token) {
        case TK_NAME:
        case TK_STRING:
        case TK_FLT:
        case TK_INT:
            save(ls, '\0');
            return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
        default:
            return luaX_token2str(ls, token);
    }
}


static l_noret lexerror(LexState *ls, const char *msg, int token) {
    msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
    if (token)
        luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
    luaD_throw(ls->L, LUA_ERRSYNTAX);
}


l_noret luaX_syntaxerror(LexState *ls, const char *msg) {
    lexerror(ls, msg, ls->t.token);
}


/*
** Creates a new string and anchors it in scanner's table so that it
** will not be collected until the end of the compilation; by that time
** it should be anchored somewhere. It also internalizes long strings,
** ensuring there is only one copy of each unique string.  The table
** here is used as a set: the string enters as the key, while its value
** is irrelevant. We use the string itself as the value only because it
** is a TValue readily available. Later, the code generation can change
** this value.
*/
TString *luaX_newstring(LexState *ls, const char *str, size_t l) {
    lua_State *L = ls->L;
    TString *ts = luaS_newlstr(L, str, l);  /* create new string */
    const TValue *o = luaH_getstr(ls->h, ts);
    if (!ttisnil(o))  /* string already present? */
        ts = keystrval(nodefromval(o));  /* get saved copy */
    else {  /* not in use yet */
        TValue *stv = s2v(L->top.p++);  /* reserve stack space for string */
        setsvalue(L, stv, ts);  /* temporarily anchor the string */
        luaH_finishset(L, ls->h, stv, o, stv);  /* t[string] = string */
        /* table is not a metatable, so it does not need to invalidate cache */
        luaC_checkGC(L);
        L->top.p--;  /* remove string from stack */
    }
    return ts;
}


/*
** increment line number and skips newline sequence (any of
** \n, \r, \n\r, or \r\n)
*/
static void inclinenumber(LexState *ls) {
    int old = ls->current;
    lua_assert(currIsNewline(ls));
    next(ls);  /* skip '\n' or '\r' */
    if (currIsNewline(ls) && ls->current != old)
        next(ls);  /* skip '\n\r' or '\r\n' */
    if (++ls->linenumber >= MAX_INT)
        lexerror(ls, "chunk has too many lines", 0);
}


void luaX_setinput(lua_State *L, LexState *ls, ZIO *z, TString *source,
                   int firstchar) {
    ls->t.token = 0;
    ls->L = L;
    ls->current = firstchar;
    ls->lookahead.token = TK_EOS;  /* no look-ahead token */
    ls->z = z;
    ls->fs = NULL;
    ls->linenumber = 1;
    ls->lastline = 1;
    ls->source = source;
    ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */
    luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
}


/*
** =======================================================
** LEXICAL ANALYZER
** =======================================================
*/


static int check_next1(LexState *ls, int c) {
    if (ls->current == c) {
        next(ls);
        return 1;
    } else return 0;
}


/*
** Check whether current char is in set 'set' (with two chars) and
** saves it
*/
static int check_next2(LexState *ls, const char *set) {
    lua_assert(set[2] == '\0');
    if (ls->current == set[0] || ls->current == set[1]) {
        save_and_next(ls);
        return 1;
    } else return 0;
}


/* LUA_NUMBER */
/*
** This function is quite liberal in what it accepts, as 'luaO_str2num'
** will reject ill-formed numerals. Roughly, it accepts the following
** pattern:
**
**   %d(%x|%.|([Ee][+-]?))* | 0[Xx](%x|%.|([Pp][+-]?))*
**
** The only tricky part is to accept [+-] only after a valid exponent
** mark, to avoid reading '3-4' or '0xe+1' as a single number.
**
** The caller might have already read an initial dot.
*/
static int read_numeral(LexState *ls, SemInfo *seminfo) {
    TValue obj;
    const char *expo = "Ee";
    int first = ls->current;
    lua_assert(lisdigit(ls->current));
    save_and_next(ls);
    if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */
        expo = "Pp";
    for (;;) {
        if (check_next2(ls, expo))  /* exponent mark? */
            check_next2(ls, "-+");  /* optional exponent sign */
        else if (lisxdigit(ls->current) || ls->current == '.')  /* '%x|%.' */
            save_and_next(ls);
        else break;
    }
    if (lislalpha(ls->current))  /* is numeral touching a letter? */
        save_and_next(ls);  /* force an error */
    save(ls, '\0');
    if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */
        lexerror(ls, "malformed number", TK_FLT);
    if (ttisinteger(&obj)) {
        seminfo->i = ivalue(&obj);
        return TK_INT;
    } else {
        lua_assert(ttisfloat(&obj));
        seminfo->r = fltvalue(&obj);
        return TK_FLT;
    }
}


/*
** read a sequence '[=*[' or ']=*]', leaving the last bracket. If
** sequence is well formed, return its number of '='s + 2; otherwise,
** return 1 if it is a single bracket (no '='s and no 2nd bracket);
** otherwise (an unfinished '[==...') return 0.
*/
static size_t skip_sep(LexState *ls) {
    size_t count = 0;
    int s = ls->current;
    lua_assert(s == '[' || s == ']');
    save_and_next(ls);
    while (ls->current == '=') {
        save_and_next(ls);
        count++;
    }
    return (ls->current == s) ? count + 2
           : (count == 0) ? 1
           : 0;
}


static void read_long_string(LexState *ls, SemInfo *seminfo, size_t sep) {
    int line = ls->linenumber;  /* initial line (for error message) */
    save_and_next(ls);  /* skip 2nd '[' */
    if (currIsNewline(ls))  /* string starts with a newline? */
        inclinenumber(ls);  /* skip it */
    for (;;) {
        switch (ls->current) {
            case EOZ: {  /* error */
                const char *what = (seminfo ? "string" : "comment");
                const char *msg = luaO_pushfstring(ls->L,
                                                   "unfinished long %s (starting at line %d)", what, line);
                lexerror(ls, msg, TK_EOS);
                break;  /* to avoid warnings */
            }
            case ']': {
                if (skip_sep(ls) == sep) {
                    save_and_next(ls);  /* skip 2nd ']' */
                    goto endloop;
                }
                break;
            }
            case '\n':
            case '\r': {
                save(ls, '\n');
                inclinenumber(ls);
                if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
                break;
            }
            default: {
                if (seminfo) save_and_next(ls);
                else next(ls);
            }
        }
    }
endloop:
    if (seminfo)
        seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
                                     luaZ_bufflen(ls->buff) - 2 * sep);
}


static void esccheck(LexState *ls, int c, const char *msg) {
    if (!c) {
        if (ls->current != EOZ)
            save_and_next(ls);  /* add current to buffer for error message */
        lexerror(ls, msg, TK_STRING);
    }
}


static int gethexa(LexState *ls) {
    save_and_next(ls);
    esccheck(ls, lisxdigit(ls->current), "hexadecimal digit expected");
    return luaO_hexavalue(ls->current);
}


static int readhexaesc(LexState *ls) {
    int r = gethexa(ls);
    r = (r << 4) + gethexa(ls);
    luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */
    return r;
}


static unsigned long readutf8esc(LexState *ls) {
    unsigned long r;
    int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */
    save_and_next(ls);  /* skip 'u' */
    esccheck(ls, ls->current == '{', "missing '{'");
    r = gethexa(ls);  /* must have at least one digit */
    while (cast_void(save_and_next(ls)), lisxdigit(ls->current)) {
        i++;
        esccheck(ls, r <= (0x7FFFFFFFu >> 4), "UTF-8 value too large");
        r = (r << 4) + luaO_hexavalue(ls->current);
    }
    esccheck(ls, ls->current == '}', "missing '}'");
    next(ls);  /* skip '}' */
    luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */
    return r;
}


static void utf8esc(LexState *ls) {
    char buff[UTF8BUFFSZ];
    int n = luaO_utf8esc(buff, readutf8esc(ls));
    for (; n > 0; n--)  /* add 'buff' to string */
        save(ls, buff[UTF8BUFFSZ - n]);
}


static int readdecesc(LexState *ls) {
    int i;
    int r = 0;  /* result accumulator */
    for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
        r = 10 * r + ls->current - '0';
        save_and_next(ls);
    }
    esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
    luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */
    return r;
}


static void read_string(LexState *ls, int del, SemInfo *seminfo) {
    save_and_next(ls);  /* keep delimiter (for error messages) */
    while (ls->current != del) {
        switch (ls->current) {
            case EOZ:
                lexerror(ls, "unfinished string", TK_EOS);
                break;  /* to avoid warnings */
            case '\n':
            case '\r':
                lexerror(ls, "unfinished string", TK_STRING);
                break;  /* to avoid warnings */
            case '\\': {  /* escape sequences */
                int c;  /* final character to be saved */
                save_and_next(ls);  /* keep '\\' for error messages */
                switch (ls->current) {
                    case 'a':
                        c = '\a';
                        goto read_save;
                    case 'b':
                        c = '\b';
                        goto read_save;
                    case 'f':
                        c = '\f';
                        goto read_save;
                    case 'n':
                        c = '\n';
                        goto read_save;
                    case 'r':
                        c = '\r';
                        goto read_save;
                    case 't':
                        c = '\t';
                        goto read_save;
                    case 'v':
                        c = '\v';
                        goto read_save;
                    case 'x':
                        c = readhexaesc(ls);
                        goto read_save;
                    case 'u':
                        utf8esc(ls);
                        goto no_save;
                    case '\n':
                    case '\r':
                        inclinenumber(ls);
                        c = '\n';
                        goto only_save;
                    case '\\':
                    case '\"':
                    case '\'':
                        c = ls->current;
                        goto read_save;
                    case EOZ:
                        goto no_save;  /* will raise an error next loop */
                    case 'z': {  /* zap following span of spaces */
                        luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
                        next(ls);  /* skip the 'z' */
                        while (lisspace(ls->current)) {
                            if (currIsNewline(ls)) inclinenumber(ls);
                            else next(ls);
                        }
                        goto no_save;
                    }
                    default: {
                        esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
                        c = readdecesc(ls);  /* digital escape '\ddd' */
                        goto only_save;
                    }
                }
read_save:
                next(ls);
                /* go through */
only_save:
                luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
                save(ls, c);
                /* go through */
no_save:
                break;
            }
            default:
                save_and_next(ls);
        }
    }
    save_and_next(ls);  /* skip delimiter */
    seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
                                 luaZ_bufflen(ls->buff) - 2);
}


static int llex(LexState *ls, SemInfo *seminfo) {
    luaZ_resetbuffer(ls->buff);
    for (;;) {
        switch (ls->current) {
            case '\n':
            case '\r': {  /* line breaks */
                inclinenumber(ls);
                break;
            }
            case ' ':
            case '\f':
            case '\t':
            case '\v': {  /* spaces */
                next(ls);
                break;
            }
            case '-': {  /* '-' or '--' (comment) */
                next(ls);
                if (ls->current != '-') return '-';
                /* else is a comment */
                next(ls);
                if (ls->current == '[') {  /* long comment? */
                    size_t sep = skip_sep(ls);
                    luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
                    if (sep >= 2) {
                        read_long_string(ls, NULL, sep);  /* skip long comment */
                        luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
                        break;
                    }
                }
                /* else short comment */
                while (!currIsNewline(ls) && ls->current != EOZ)
                    next(ls);  /* skip until end of line (or end of file) */
                break;
            }
            case '[': {  /* long string or simply '[' */
                size_t sep = skip_sep(ls);
                if (sep >= 2) {
                    read_long_string(ls, seminfo, sep);
                    return TK_STRING;
                } else if (sep == 0) /* '[=...' missing second bracket? */
                    lexerror(ls, "invalid long string delimiter", TK_STRING);
                return '[';
            }
            case '=': {
                next(ls);
                if (check_next1(ls, '=')) return TK_EQ;  /* '==' */
                else return '=';
            }
            case '<': {
                next(ls);
                if (check_next1(ls, '=')) return TK_LE;  /* '<=' */
                else if (check_next1(ls, '<')) return TK_SHL;  /* '<<' */
                else return '<';
            }
            case '>': {
                next(ls);
                if (check_next1(ls, '=')) return TK_GE;  /* '>=' */
                else if (check_next1(ls, '>')) return TK_SHR;  /* '>>' */
                else return '>';
            }
            case '/': {
                next(ls);
                if (check_next1(ls, '/')) return TK_IDIV;  /* '//' */
                else return '/';
            }
            case '~': {
                next(ls);
                if (check_next1(ls, '=')) return TK_NE;  /* '~=' */
                else return '~';
            }
            case ':': {
                next(ls);
                if (check_next1(ls, ':')) return TK_DBCOLON;  /* '::' */
                else return ':';
            }
            case '"':
            case '\'': {  /* short literal strings */
                read_string(ls, ls->current, seminfo);
                return TK_STRING;
            }
            case '.': {  /* '.', '..', '...', or number */
                save_and_next(ls);
                if (check_next1(ls, '.')) {
                    if (check_next1(ls, '.'))
                        return TK_DOTS;   /* '...' */
                    else return TK_CONCAT;   /* '..' */
                } else if (!lisdigit(ls->current)) return '.';
                else return read_numeral(ls, seminfo);
            }
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
            case '8':
            case '9': {
                return read_numeral(ls, seminfo);
            }
            case EOZ: {
                return TK_EOS;
            }
            default: {
                if (lislalpha(ls->current)) {  /* identifier or reserved word? */
                    TString *ts;
                    do {
                        save_and_next(ls);
                    } while (lislalnum(ls->current));
                    ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
                                        luaZ_bufflen(ls->buff));
                    seminfo->ts = ts;
                    if (isreserved(ts))  /* reserved word? */
                        return ts->extra - 1 + FIRST_RESERVED;
                    else {
                        return TK_NAME;
                    }
                } else { /* single-char tokens ('+', '*', '%', '{', '}', ...) */
                    int c = ls->current;
                    next(ls);
                    return c;
                }
            }
        }
    }
}


void luaX_next(LexState *ls) {
    ls->lastline = ls->linenumber;
    if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
        ls->t = ls->lookahead;  /* use this one */
        ls->lookahead.token = TK_EOS;  /* and discharge it */
    } else
        ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
}


int luaX_lookahead(LexState *ls) {
    lua_assert(ls->lookahead.token == TK_EOS);
    ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
    return ls->lookahead.token;
}