// UPDOC
//The E grammar specifically treats updoc test sequences as comments.  Any line
//whose first non-whitespace character is '?' is considered by the lexer to be
//the start of an UPDOC test case.  The test case will include any continguous
//lines that are whitespace, or that begin with '#' or '>'.

//----------------------------------------------------------------------------
// The E Lexer
//----------------------------------------------------------------------------
//class EALexer extends Lexer("antlr.AstroLexer");
class EALexer extends Lexer("antlr.SwitchingLexer");

options {
	importVocab=E;
	exportVocab=EALexer;
	//testLiterals=false;    // don't automatically test for literals
	k=3;                   // four characters of lookahead
	charVocabulary='\3'..'\377';
	//charVocabulary='\u0003'..'\u7FFE';
	// without inlining some bitset tests, couldn't do unicode;
	// I need to make ANTLR generate smaller bitsets; see
	// bottom of JavaLexer.java
	codeGenBitsetTestThreshold=20;
}
tokens {
    SR;
    GE;
    SR_ASSIGN;
}
{   // to control whether CRs are consumed as whitespace on a per token basis
    protected boolean myContinue;
    //public void traceIn(String rname) throws CharStreamException {    }
}

// OPERATORS
QUASIOPEN:        '`' {selector.push("quasi");}  ;
LPAREN:              '('    BR ;
RPAREN:              ')'    ;
LBRACK:              '['    BR ;
RBRACK:              ']'    ;
LCURLY:              '{'    BR  {selector.enterBrace();} ;
RCURLY:              '}'    {selector.exitBrace();} ;
AT:                  '@'    ;
ATCURLY:             "@{"   ;
DOLLARCURLY:         "${"   ;
// a question at the beginning of a line indicates an updoc line, and the line is ignored.
QUESTION:            '?'    ({atLineStart()}? SKIPLINE {$setType(Token.SKIP);} | BR) ;
COLON:               ':'    BR ;
COMMA:               ','    BR ;
DOT:                 '.'    BR ;
THRU:                ".."   BR ;
TILL:                "..!"  BR ;
SAME:                "=="   BR ;
EQ:                  '='    BR ;
LNOT:                '!'    BR ;
BNOT:                '~'    BR ;
NOTSAME:             "!="   BR ;
DIV:                 '/'    BR ;
FLOORDIV:            "//"   BR ;
PLUS:                '+'    BR ;
MINUS:               '-'    BR ;
INC:                 "++"   ;
DEC:                 "--"   ;
STAR:                '*'    BR ;
REM:                 '%'    BR ;
MOD:                 "%%"   BR ;
SL:                  "<<"   BR ;
LE:                  "<="   BR ;
ABA:                 "<=>"  BR ;
BXOR:                '^'    BR ;
BOR:                 '|'    BR ;
LOR:                 "||"   BR ;
BAND:                '&'    BR ;
BUTNOT:              "&!"   BR ;
LAND:                "&&"   BR ;
SEMI:                ';'    ;
POW:                 "**"   BR ;

ASSIGN:              ":="   BR ;
FLOORDIV_ASSIGN:     "//="  BR ;
DIV_ASSIGN:          "/="   BR ;
PLUS_ASSIGN:         "+="   BR ;
MINUS_ASSIGN:        "-="   BR ;
STAR_ASSIGN:         "*="   BR ;
REM_ASSIGN:          "%="   BR ;
MOD_ASSIGN:          "%%="  BR ;
POW_ASSIGN:          "**="  BR ;
SL_ASSIGN:           "<<="  BR ;
BXOR_ASSIGN:         "^="   BR ;
BOR_ASSIGN:          "|="   BR ;
BAND_ASSIGN:         "&="   BR ;

// Other tokes
SEND:                "<-"   BR ;
WHEN:                "->"   BR ;
MAPSTO:              "=>"   BR ;
MATCHBIND:           "=~"   BR ;
MISMATCH:            "!~"   BR ;
SCOPE:               "::"   BR ;
SCOPESLOT:           "::&"  BR ;

//SR:                  ">>"   BR ;
//GE:                  ">="   BR ;
//SR_ASSIGN       :    ">>="  BR ;
GT:                  '>'    ({atLineStart()}? SKIPLINE {$setType(Token.SKIP);}
                            | '>'  BR {$setType(SR);}
                            | '='  BR {$setType(GE);}
                            | ">=" BR {$setType(SR_ASSIGN);}
                            | ); // shoudl have BR, except for terminating a URI

LT:                  ('<'  IDENT ('>' | ':')) =>
                      '<'! IDENT ( '>'! {$setType(URIGetter);}
                                 | ':' (    (ANYWS)=> BR {$setType(URIStart);}
                                       |   URI '>'!  {$setType(URI);}))
                |    '<' BR ;

// Whitespace -- ignored
WS:         (' '|'\t'|'\f'|ESCWS)+    {$setType(Token.SKIP);} ;

protected
ESCWS:      '\\' (' '|'\t'|'\f')* EOL    ;

protected
ANYWS:      ' '|'\t'|'\f'|'\r'|'\n' ;

LINESEP:        (EOL)+    ;

// Single-line comments
SL_COMMENT  :   "#" (~('\n'|'\r'))*  {$setType(Token.SKIP);} ;

protected
SKIPLINE:       (~('\n'|'\r'))* EOL ;

// multiple-line comments
DOC_COMMENT
    :    "/**"
        (    //    '\r' '\n' can be matched in one alternative or by matching
            //    '\r' in one iteration and '\n' in another.  I am trying to
            //    handle any flavor of newline that comes in, but the language
            //    that allows both "\r\n" and "\r" and "\n" to all be valid
            //    newline is ambiguous.  Consequently, the resulting grammar
            //    must be ambiguous.  I'm shutting this warning off.
            options {
                generateAmbigWarnings=false;
            }
        :   { LA(2)!='/' }? '*'
        |   EOL
        |   ~('*'|'\n'|'\r')
        )*
        '*' '/'
        BR
        {$setText("**comment hidden**");}
        //{$setType(Token.SKIP);}
    ;


// character literals
CHAR_LITERAL    :    '\'' ( ESC | ~('\''|'\n'|'\r'|'\\') ) '\''   ;

// string literals
STRING: '"' (   ESC
            |   EOL
            |    ~('"'|'\\'|'\n'|'\r')
        )* '"'  ;

// escape sequence -- note that this is protected; it can only be called
//   from another lexer rule -- it will not ever directly return a token to
//   the parser
// There are various ambiguities hushed in this rule.  The optional
// '0'...'9' digit matches should be matched here rather than letting
// them go back to STRING to be matched.  ANTLR does the
// right thing by matching immediately; hence, it's ok to shut off
// the FOLLOW ambig warnings.
protected
ESC     {int ifWhitespace = text.length();} // to ignore escaped whitespace
    :   '\\'
        (    'n'
        |    'r'
        |    't'
        |    'b'
        |    'f'
        |    '"'
        |    '\''
        |    '\\'
        |    ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
        |    '0'..'3'
            (
                options {
                    warnWhenFollowAmbig = false;
                }
            :    '0'..'7'
                (
                    options {
                        warnWhenFollowAmbig = false;
                    }
                :    '0'..'7'
                )?
            )?
        |    '4'..'7'
            (
                options {
                    warnWhenFollowAmbig = false;
                }
            :    '0'..'7'
            )?
        | (' '|'\t'|'\f')* EOL
            ({text.setLength(ifWhitespace);}:)//ignores the ecaped whitespace
        )
;


// hexadecimal digit
protected
HEX_DIGIT   :    ('0'..'9'|'A'..'F'|'a'..'f')  ;

// an identifier.  Note that testLiterals is set to true!  This means
// that after we match the rule, we look in the literals table to see
// if it's a literal or really an identifer
IDENT       options {testLiterals=true;}
            :    ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
            ;

// a numeric literal
INT:            ("0x") => "0x" (HEX_DIGIT)+ { $setType(HEX); }
            |   ('0' ('0'..'9')) => ('0'..'7')+  { $setType(OCTAL); }
            |   (FLOAT64) => FLOAT64  { $setType(FLOAT64); }
            |   POSINT
            ;

// an integer
protected
POSINT:         ('0'..'9') ('0'..'9'|'_'!)* ;

protected
FLOAT64     :   POSINT ('.' POSINT | ('e' | 'E') EXPONENT)  ;

protected
EXPONENT:       ('+'|'-')? POSINT  ;

protected
BR:      (   {_saveIndex=text.length();}:)
             (' ' | '\t' | "#" (options {greedy=true;}:(~('\n'|'\r'|'#')!))* | EOL
         )*
         ({text.setLength(_saveIndex);}:)
         ;

protected
EOL:     (options {generateAmbigWarnings=false;} : "\r\n" | '\r' | '\n' ) { newline(); } ;


protected
URI:            (  'a'..'z'|'A'..'Z'|'_'|'0'..'9'
                |';'|'/'|'?'|':'|'@'|'&'|'='|'+'|'$'|','|'-'
                |'.'|'!'|'~'|'*'|'\''|'('|')'|'%'|'\\'|'|'|'#'  )+
            ;
/**/