|
JavaScript 2.0
Formal Description
Lexer Semantics
|
Thursday, July 22, 1999
The lexer semantics describe the actions the lexer takes in order to transform an input stream of Unicode characters into a stream of tokens. For convenience, the lexer grammar is repeated here. See also the description of the semantic notation.
This document is also available as a Word 98 rtf file.
The start symbols are
NextTokenre
and
NextTokendiv
depending on whether a / should be interpreted as a regular expression or division.
«TAB» | «VT» | «FF» | «SP» | «u00A0»«u2000» | «u2001» | «u2002» | «u2003» | «u2004» | «u2005» | «u2006» | «u2007»«u2008» | «u2009» | «u200A» | «u200B»«u3000»action DecimalValue[ASCIIDigit] : Integer = digitValue(ASCIIDigit)
type RegExp = tuple {reBody: String; reFlags: String}
type Quantity = tuple {amount: Double; unit: String}
type Token
= oneof {
identifier: String;
keyword: String;
punctuator: String;
number: Double;
quantity: Quantity;
string: String;
regularExpression: RegExp;
end}
action Token[NextTokent] : Token
Token[NextTokent WhiteSpace Tokent] = Token[Tokent]
action RegExpMayFollow[NextTokent] : Boolean
RegExpMayFollow[NextTokent WhiteSpace Tokent] = RegExpMayFollow[Tokent]
Token[Tokent IdentifierOrReservedWord] = Token[IdentifierOrReservedWord]
Token[Tokent Punctuator] = Token[Punctuator]
Token[Tokendiv DivisionPunctuator] = punctuator Punctuator[DivisionPunctuator]
Token[Tokent NumericLiteral] = number DoubleValue[NumericLiteral]
Token[Tokent QuantityLiteral] = quantity QuantityValue[QuantityLiteral]
Token[Tokent StringLiteral] = string StringValue[StringLiteral]
Token[Tokenre RegExpLiteralslash] = regularExpression REValue[RegExpLiteralslash]
Token[Tokent RegExpLiteralguillemet]
= regularExpression REValue[RegExpLiteralguillemet]
Token[Tokent EndOfInput] = end
action RegExpMayFollow[Tokent] : Boolean
RegExpMayFollow[Tokent IdentifierOrReservedWord]
= RegExpMayFollow[IdentifierOrReservedWord]
RegExpMayFollow[Tokent Punctuator] = RegExpMayFollow[Punctuator]
RegExpMayFollow[Tokendiv DivisionPunctuator] = true
RegExpMayFollow[Tokent NumericLiteral] = false
RegExpMayFollow[Tokent QuantityLiteral] = false
RegExpMayFollow[Tokent StringLiteral] = false
RegExpMayFollow[Tokenre RegExpLiteralslash] = false
RegExpMayFollow[Tokent RegExpLiteralguillemet] = false
RegExpMayFollow[Tokent EndOfInput] = true
action Name[IdentifierName] : String
Name[IdentifierName InitialIdentifierCharacter]
= [CharacterValue[InitialIdentifierCharacter]]
Name[IdentifierName IdentifierName1 ContinuingIdentifierCharacter]
= Name[IdentifierName1] [CharacterValue[ContinuingIdentifierCharacter]]
action ContainsEscapes[IdentifierName] : Boolean
ContainsEscapes[IdentifierName InitialIdentifierCharacter]
= ContainsEscapes[InitialIdentifierCharacter]
ContainsEscapes[IdentifierName IdentifierName1 ContinuingIdentifierCharacter]
= ContainsEscapes[IdentifierName1] or ContainsEscapes[ContinuingIdentifierCharacter]
action CharacterValue[InitialIdentifierCharacter] : Character
CharacterValue[InitialIdentifierCharacter OrdinaryInitialIdentifierCharacter]
= OrdinaryInitialIdentifierCharacter
CharacterValue[InitialIdentifierCharacter \ HexEscape]
= if isOrdinaryInitialIdentifierCharacter(CharacterValue[HexEscape])
then CharacterValue[HexEscape]
else
action ContainsEscapes[InitialIdentifierCharacter] : Boolean
ContainsEscapes[InitialIdentifierCharacter OrdinaryInitialIdentifierCharacter] = false
ContainsEscapes[InitialIdentifierCharacter \ HexEscape] = true
action CharacterValue[ContinuingIdentifierCharacter] : Character
CharacterValue[ContinuingIdentifierCharacter OrdinaryContinuingIdentifierCharacter]
= OrdinaryContinuingIdentifierCharacter
CharacterValue[ContinuingIdentifierCharacter \ HexEscape]
= if isOrdinaryContinuingIdentifierCharacter(CharacterValue[HexEscape])
then CharacterValue[HexEscape]
else
action ContainsEscapes[ContinuingIdentifierCharacter] : Boolean
ContainsEscapes[ContinuingIdentifierCharacter OrdinaryContinuingIdentifierCharacter]
= false
ContainsEscapes[ContinuingIdentifierCharacter \ HexEscape] = true
reservedWordsRE : String[]
= [“abstract”,
“break”,
“case”,
“catch”,
“class”,
“const”,
“continue”,
“debugger”,
“default”,
“delete”,
“do”,
“else”,
“enum”,
“eval”,
“export”,
“extends”,
“field”,
“final”,
“finally”,
“for”,
“function”,
“goto”,
“if”,
“implements”,
“import”,
“in”,
“instanceof”,
“native”,
“new”,
“package”,
“private”,
“protected”,
“public”,
“return”,
“static”,
“switch”,
“synchronized”,
“throw”,
“throws”,
“transient”,
“try”,
“typeof”,
“var”,
“volatile”,
“while”,
“with”]
reservedWordsDiv : String[] = [“false”, “null”, “super”, “this”, “true”]
nonReservedWords : String[]
= [“constructor”, “getter”, “method”, “override”, “setter”, “traditional”, “version”]
keywords : String[] = reservedWordsRE reservedWordsDiv nonReservedWords
member(id: String, list: String[]) : Boolean
= if |list| = 0
then false
else if id = list[0]
then true
else member(id, list[1 ...])
action Token[IdentifierOrReservedWord] : Token
Token[IdentifierOrReservedWord IdentifierName]
= let id: String = Name[IdentifierName]
in if member(id, keywords) and not ContainsEscapes[IdentifierName]
then keyword id
else identifier id
action RegExpMayFollow[IdentifierOrReservedWord] : Boolean
RegExpMayFollow[IdentifierOrReservedWord IdentifierName]
= let id: String = Name[IdentifierName]
in member(id, reservedWordsRE) and not ContainsEscapes[IdentifierName]
!! =! = =#%% =&& && & =& =(** =++ =,-- =- >.. .. . .:: :;<< << < =< === == = =>> => >> > => > >> > > =?@[^^ =^ ^^ ^ ={|| =| || | =~action Token[Punctuator] : Token
Token[Punctuator PunctuatorRE] = punctuator Punctuator[PunctuatorRE]
Token[Punctuator PunctuatorDiv] = punctuator Punctuator[PunctuatorDiv]
action RegExpMayFollow[Punctuator] : Boolean
RegExpMayFollow[Punctuator PunctuatorRE] = true
RegExpMayFollow[Punctuator PunctuatorDiv] = false
action Punctuator[PunctuatorRE] : String
Punctuator[PunctuatorRE !] = “!”
Punctuator[PunctuatorRE ! =] = “!=”
Punctuator[PunctuatorRE ! = =] = “!==”
Punctuator[PunctuatorRE #] = “#”
Punctuator[PunctuatorRE %] = “%”
Punctuator[PunctuatorRE % =] = “%=”
Punctuator[PunctuatorRE &] = “&”
Punctuator[PunctuatorRE & &] = “&&”
Punctuator[PunctuatorRE & & =] = “&&=”
Punctuator[PunctuatorRE & =] = “&=”
Punctuator[PunctuatorRE (] = “(”
Punctuator[PunctuatorRE *] = “*”
Punctuator[PunctuatorRE * =] = “*=”
Punctuator[PunctuatorRE +] = “+”
Punctuator[PunctuatorRE + =] = “+=”
Punctuator[PunctuatorRE ,] = “,”
Punctuator[PunctuatorRE -] = “-”
Punctuator[PunctuatorRE - =] = “-=”
Punctuator[PunctuatorRE - >] = “->”
Punctuator[PunctuatorRE .] = “.”
Punctuator[PunctuatorRE . .] = “..”
Punctuator[PunctuatorRE . . .] = “...”
Punctuator[PunctuatorRE :] = “:”
Punctuator[PunctuatorRE : :] = “::”
Punctuator[PunctuatorRE ;] = “;”
Punctuator[PunctuatorRE <] = “<”
Punctuator[PunctuatorRE < <] = “<<”
Punctuator[PunctuatorRE < < =] = “<<=”
Punctuator[PunctuatorRE < =] = “<=”
Punctuator[PunctuatorRE =] = “=”
Punctuator[PunctuatorRE = =] = “==”
Punctuator[PunctuatorRE = = =] = “===”
Punctuator[PunctuatorRE >] = “>”
Punctuator[PunctuatorRE > =] = “>=”
Punctuator[PunctuatorRE > >] = “>>”
Punctuator[PunctuatorRE > > =] = “>>=”
Punctuator[PunctuatorRE > > >] = “>>>”
Punctuator[PunctuatorRE > > > =] = “>>>=”
Punctuator[PunctuatorRE ?] = “?”
Punctuator[PunctuatorRE @] = “@”
Punctuator[PunctuatorRE [] = “[”
Punctuator[PunctuatorRE ^] = “^”
Punctuator[PunctuatorRE ^ =] = “^=”
Punctuator[PunctuatorRE ^ ^] = “^^”
Punctuator[PunctuatorRE ^ ^ =] = “^^=”
Punctuator[PunctuatorRE {] = “{”
Punctuator[PunctuatorRE |] = “|”
Punctuator[PunctuatorRE | =] = “|=”
Punctuator[PunctuatorRE | |] = “||”
Punctuator[PunctuatorRE | | =] = “||=”
Punctuator[PunctuatorRE ~] = “~”
action Punctuator[PunctuatorDiv] : String
Punctuator[PunctuatorDiv )] = “)”
Punctuator[PunctuatorDiv + +] = “++”
Punctuator[PunctuatorDiv - -] = “--”
Punctuator[PunctuatorDiv ]] = “]”
Punctuator[PunctuatorDiv }] = “}”
action Punctuator[DivisionPunctuator] : String
Punctuator[DivisionPunctuator /] = “/”
Punctuator[DivisionPunctuator / =] = “/=”
action DoubleValue[NumericLiteral] : Double
DoubleValue[NumericLiteral DecimalLiteral]
= rationalToDouble(RationalValue[DecimalLiteral])
DoubleValue[NumericLiteral HexIntegerLiteral [lookahead{HexDigit}]]
= rationalToDouble(IntegerValue[HexIntegerLiteral])
DoubleValue[NumericLiteral OctalIntegerLiteral]
= rationalToDouble(IntegerValue[OctalIntegerLiteral])
expt(base: Rational, exponent: Integer) : Rational
= if exponent = 0
then 1
else if exponent < 0
then 1/expt(base, -exponent)
else base*expt(base, exponent - 1)
. Fractionaction RationalValue[DecimalLiteral] : Rational
RationalValue[DecimalLiteral Mantissa] = RationalValue[Mantissa]
RationalValue[DecimalLiteral Mantissa LetterE SignedInteger]
= RationalValue[Mantissa]*expt(10, IntegerValue[SignedInteger])
action RationalValue[Mantissa] : Rational
RationalValue[Mantissa DecimalIntegerLiteral] = IntegerValue[DecimalIntegerLiteral]
RationalValue[Mantissa DecimalIntegerLiteral .] = IntegerValue[DecimalIntegerLiteral]
RationalValue[Mantissa DecimalIntegerLiteral . Fraction]
= IntegerValue[DecimalIntegerLiteral] + RationalValue[Fraction]
RationalValue[Mantissa . Fraction] = RationalValue[Fraction]
action IntegerValue[DecimalIntegerLiteral] : Integer
IntegerValue[DecimalIntegerLiteral 0] = 0
IntegerValue[DecimalIntegerLiteral NonZeroDecimalDigits]
= IntegerValue[NonZeroDecimalDigits]
action IntegerValue[NonZeroDecimalDigits] : Integer
IntegerValue[NonZeroDecimalDigits NonZeroDigit] = DecimalValue[NonZeroDigit]
IntegerValue[NonZeroDecimalDigits NonZeroDecimalDigits1 ASCIIDigit]
= 10*IntegerValue[NonZeroDecimalDigits1] + DecimalValue[ASCIIDigit]
action DecimalValue[NonZeroDigit] : Integer = digitValue(NonZeroDigit)
action RationalValue[Fraction] : Rational
RationalValue[Fraction DecimalDigits]
= IntegerValue[DecimalDigits]/expt(10, NDigits[DecimalDigits])
action IntegerValue[SignedInteger] : Integer
IntegerValue[SignedInteger DecimalDigits] = IntegerValue[DecimalDigits]
IntegerValue[SignedInteger + DecimalDigits] = IntegerValue[DecimalDigits]
IntegerValue[SignedInteger - DecimalDigits] = -IntegerValue[DecimalDigits]
action IntegerValue[DecimalDigits] : Integer
IntegerValue[DecimalDigits ASCIIDigit] = DecimalValue[ASCIIDigit]
IntegerValue[DecimalDigits DecimalDigits1 ASCIIDigit]
= 10*IntegerValue[DecimalDigits1] + DecimalValue[ASCIIDigit]
action NDigits[DecimalDigits] : Integer
NDigits[DecimalDigits ASCIIDigit] = 1
NDigits[DecimalDigits DecimalDigits1 ASCIIDigit] = NDigits[DecimalDigits1] + 1
action IntegerValue[HexIntegerLiteral] : Integer
IntegerValue[HexIntegerLiteral 0 LetterX HexDigit] = HexValue[HexDigit]
IntegerValue[HexIntegerLiteral HexIntegerLiteral1 HexDigit]
= 16*IntegerValue[HexIntegerLiteral1] + HexValue[HexDigit]
action HexValue[HexDigit] : Integer = digitValue(HexDigit)
action IntegerValue[OctalIntegerLiteral] : Integer
IntegerValue[OctalIntegerLiteral 0 OctalDigit] = OctalValue[OctalDigit]
IntegerValue[OctalIntegerLiteral OctalIntegerLiteral1 OctalDigit]
= 8*IntegerValue[OctalIntegerLiteral1] + OctalValue[OctalDigit]
action OctalValue[OctalDigit] : Integer = digitValue(OctalDigit)
action QuantityValue[QuantityLiteral] : Quantity
QuantityValue[QuantityLiteral NumericLiteral QuantityName]
= amount DoubleValue[NumericLiteral], unit Name[QuantityName]
action Name[QuantityName] : String
Name[QuantityName [lookahead{LetterE, LetterX}] IdentifierName]
= Name[IdentifierName]
action StringValue[StringLiteral] : String
StringValue[StringLiteral ' StringCharssingle '] = StringValue[StringCharssingle]
StringValue[StringLiteral " StringCharsdouble "] = StringValue[StringCharsdouble]
action StringValue[StringCharsq] : String
StringValue[StringCharsq «empty»] = “”
StringValue[StringCharsq StringCharsq1 StringCharq]
= StringValue[StringCharsq1] [CharacterValue[StringCharq]]
action CharacterValue[StringCharq] : Character
CharacterValue[StringCharq LiteralStringCharq] = LiteralStringCharq
CharacterValue[StringCharq \ StringEscape] = CharacterValue[StringEscape]
action CharacterValue[StringEscape] : Character
CharacterValue[StringEscape ControlEscape] = CharacterValue[ControlEscape]
CharacterValue[StringEscape OctalEscape] = CharacterValue[OctalEscape]
CharacterValue[StringEscape HexEscape] = CharacterValue[HexEscape]
CharacterValue[StringEscape IdentityEscape] = IdentityEscape
action CharacterValue[ControlEscape] : Character
CharacterValue[ControlEscape b] = ‘«BS»’
CharacterValue[ControlEscape f] = ‘«FF»’
CharacterValue[ControlEscape n] = ‘«LF»’
CharacterValue[ControlEscape r] = ‘«CR»’
CharacterValue[ControlEscape t] = ‘«TAB»’
CharacterValue[ControlEscape v] = ‘«VT»’
action CharacterValue[OctalEscape] : Character
CharacterValue[OctalEscape OctalDigit [lookahead{OctalDigit}]]
= codeToCharacter(OctalValue[OctalDigit])
CharacterValue[OctalEscape ZeroToThree OctalDigit [lookahead{OctalDigit}]]
= codeToCharacter(8*OctalValue[ZeroToThree] + OctalValue[OctalDigit])
CharacterValue[OctalEscape FourToSeven OctalDigit]
= codeToCharacter(8*OctalValue[FourToSeven] + OctalValue[OctalDigit])
CharacterValue[OctalEscape ZeroToThree OctalDigit1 OctalDigit2]
= codeToCharacter(
64*OctalValue[ZeroToThree] + 8*OctalValue[OctalDigit1] + OctalValue[OctalDigit2])
action OctalValue[ZeroToThree] : Integer = digitValue(ZeroToThree)
action OctalValue[FourToSeven] : Integer = digitValue(FourToSeven)
action CharacterValue[HexEscape] : Character
CharacterValue[HexEscape x HexDigit1 HexDigit2]
= codeToCharacter(16*HexValue[HexDigit1] + HexValue[HexDigit2])
CharacterValue[HexEscape u HexDigit1 HexDigit2 HexDigit3 HexDigit4]
= codeToCharacter(
4096*HexValue[HexDigit1] + 256*HexValue[HexDigit2] + 16*HexValue[HexDigit3] +
HexValue[HexDigit4])
action REValue[RegExpLiteralr] : RegExp
REValue[RegExpLiteralr RegExpBodyr RegExpFlags]
= reBody REBody[RegExpBodyr], reFlags REFlags[RegExpFlags]
action REFlags[RegExpFlags] : String
REFlags[RegExpFlags «empty»] = “”
REFlags[RegExpFlags RegExpFlags1 ContinuingIdentifierCharacter]
= REFlags[RegExpFlags1] [CharacterValue[ContinuingIdentifierCharacter]]
action REBody[RegExpBodyr] : String
REBody[RegExpBodyslash / RegExpFirstChar RegExpCharsslash /]
= REBody[RegExpFirstChar] REBody[RegExpCharsslash]
REBody[RegExpBodyguillemet «u00AB» RegExpCharsguillemet «u00BB»]
= REBody[RegExpCharsguillemet]
action REBody[RegExpFirstChar] : String
REBody[RegExpFirstChar OrdinaryRegExpFirstChar] = [OrdinaryRegExpFirstChar]
REBody[RegExpFirstChar \ NonTerminator] = [‘\’, NonTerminator]
action REBody[RegExpCharsr] : String
REBody[RegExpCharsr «empty»] = “”
REBody[RegExpCharsr RegExpCharsr1 RegExpCharr]
= REBody[RegExpCharsr1] REBody[RegExpCharr]
action REBody[RegExpCharr] : String
REBody[RegExpCharr OrdinaryRegExpCharr] = [OrdinaryRegExpCharr]
REBody[RegExpCharr \ NonTerminator] = [‘\’, NonTerminator]