Spaces:
Runtime error
Runtime error
; | |
const types = require('./types.cjs'); | |
const charCodeDefinitions = require('./char-code-definitions.cjs'); | |
const utils = require('./utils.cjs'); | |
const names = require('./names.cjs'); | |
const OffsetToLocation = require('./OffsetToLocation.cjs'); | |
const TokenStream = require('./TokenStream.cjs'); | |
function tokenize(source, onToken) { | |
function getCharCode(offset) { | |
return offset < sourceLength ? source.charCodeAt(offset) : 0; | |
} | |
// § 4.3.3. Consume a numeric token | |
function consumeNumericToken() { | |
// Consume a number and let number be the result. | |
offset = utils.consumeNumber(source, offset); | |
// If the next 3 input code points would start an identifier, then: | |
if (charCodeDefinitions.isIdentifierStart(getCharCode(offset), getCharCode(offset + 1), getCharCode(offset + 2))) { | |
// Create a <dimension-token> with the same value and type flag as number, and a unit set initially to the empty string. | |
// Consume a name. Set the <dimension-token>’s unit to the returned value. | |
// Return the <dimension-token>. | |
type = types.Dimension; | |
offset = utils.consumeName(source, offset); | |
return; | |
} | |
// Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it. | |
if (getCharCode(offset) === 0x0025) { | |
// Create a <percentage-token> with the same value as number, and return it. | |
type = types.Percentage; | |
offset++; | |
return; | |
} | |
// Otherwise, create a <number-token> with the same value and type flag as number, and return it. | |
type = types.Number; | |
} | |
// § 4.3.4. Consume an ident-like token | |
function consumeIdentLikeToken() { | |
const nameStartOffset = offset; | |
// Consume a name, and let string be the result. | |
offset = utils.consumeName(source, offset); | |
// If string’s value is an ASCII case-insensitive match for "url", | |
// and the next input code point is U+0028 LEFT PARENTHESIS ((), consume it. | |
if (utils.cmpStr(source, nameStartOffset, offset, 'url') && getCharCode(offset) === 0x0028) { | |
// While the next two input code points are whitespace, consume the next input code point. | |
offset = utils.findWhiteSpaceEnd(source, offset + 1); | |
// If the next one or two input code points are U+0022 QUOTATION MARK ("), U+0027 APOSTROPHE ('), | |
// or whitespace followed by U+0022 QUOTATION MARK (") or U+0027 APOSTROPHE ('), | |
// then create a <function-token> with its value set to string and return it. | |
if (getCharCode(offset) === 0x0022 || | |
getCharCode(offset) === 0x0027) { | |
type = types.Function; | |
offset = nameStartOffset + 4; | |
return; | |
} | |
// Otherwise, consume a url token, and return it. | |
consumeUrlToken(); | |
return; | |
} | |
// Otherwise, if the next input code point is U+0028 LEFT PARENTHESIS ((), consume it. | |
// Create a <function-token> with its value set to string and return it. | |
if (getCharCode(offset) === 0x0028) { | |
type = types.Function; | |
offset++; | |
return; | |
} | |
// Otherwise, create an <ident-token> with its value set to string and return it. | |
type = types.Ident; | |
} | |
// § 4.3.5. Consume a string token | |
function consumeStringToken(endingCodePoint) { | |
// This algorithm may be called with an ending code point, which denotes the code point | |
// that ends the string. If an ending code point is not specified, | |
// the current input code point is used. | |
if (!endingCodePoint) { | |
endingCodePoint = getCharCode(offset++); | |
} | |
// Initially create a <string-token> with its value set to the empty string. | |
type = types.String; | |
// Repeatedly consume the next input code point from the stream: | |
for (; offset < source.length; offset++) { | |
const code = source.charCodeAt(offset); | |
switch (charCodeDefinitions.charCodeCategory(code)) { | |
// ending code point | |
case endingCodePoint: | |
// Return the <string-token>. | |
offset++; | |
return; | |
// EOF | |
// case EofCategory: | |
// This is a parse error. Return the <string-token>. | |
// return; | |
// newline | |
case charCodeDefinitions.WhiteSpaceCategory: | |
if (charCodeDefinitions.isNewline(code)) { | |
// This is a parse error. Reconsume the current input code point, | |
// create a <bad-string-token>, and return it. | |
offset += utils.getNewlineLength(source, offset, code); | |
type = types.BadString; | |
return; | |
} | |
break; | |
// U+005C REVERSE SOLIDUS (\) | |
case 0x005C: | |
// If the next input code point is EOF, do nothing. | |
if (offset === source.length - 1) { | |
break; | |
} | |
const nextCode = getCharCode(offset + 1); | |
// Otherwise, if the next input code point is a newline, consume it. | |
if (charCodeDefinitions.isNewline(nextCode)) { | |
offset += utils.getNewlineLength(source, offset + 1, nextCode); | |
} else if (charCodeDefinitions.isValidEscape(code, nextCode)) { | |
// Otherwise, (the stream starts with a valid escape) consume | |
// an escaped code point and append the returned code point to | |
// the <string-token>’s value. | |
offset = utils.consumeEscaped(source, offset) - 1; | |
} | |
break; | |
// anything else | |
// Append the current input code point to the <string-token>’s value. | |
} | |
} | |
} | |
// § 4.3.6. Consume a url token | |
// Note: This algorithm assumes that the initial "url(" has already been consumed. | |
// This algorithm also assumes that it’s being called to consume an "unquoted" value, like url(foo). | |
// A quoted value, like url("foo"), is parsed as a <function-token>. Consume an ident-like token | |
// automatically handles this distinction; this algorithm shouldn’t be called directly otherwise. | |
function consumeUrlToken() { | |
// Initially create a <url-token> with its value set to the empty string. | |
type = types.Url; | |
// Consume as much whitespace as possible. | |
offset = utils.findWhiteSpaceEnd(source, offset); | |
// Repeatedly consume the next input code point from the stream: | |
for (; offset < source.length; offset++) { | |
const code = source.charCodeAt(offset); | |
switch (charCodeDefinitions.charCodeCategory(code)) { | |
// U+0029 RIGHT PARENTHESIS ()) | |
case 0x0029: | |
// Return the <url-token>. | |
offset++; | |
return; | |
// EOF | |
// case EofCategory: | |
// This is a parse error. Return the <url-token>. | |
// return; | |
// whitespace | |
case charCodeDefinitions.WhiteSpaceCategory: | |
// Consume as much whitespace as possible. | |
offset = utils.findWhiteSpaceEnd(source, offset); | |
// If the next input code point is U+0029 RIGHT PARENTHESIS ()) or EOF, | |
// consume it and return the <url-token> | |
// (if EOF was encountered, this is a parse error); | |
if (getCharCode(offset) === 0x0029 || offset >= source.length) { | |
if (offset < source.length) { | |
offset++; | |
} | |
return; | |
} | |
// otherwise, consume the remnants of a bad url, create a <bad-url-token>, | |
// and return it. | |
offset = utils.consumeBadUrlRemnants(source, offset); | |
type = types.BadUrl; | |
return; | |
// U+0022 QUOTATION MARK (") | |
// U+0027 APOSTROPHE (') | |
// U+0028 LEFT PARENTHESIS (() | |
// non-printable code point | |
case 0x0022: | |
case 0x0027: | |
case 0x0028: | |
case charCodeDefinitions.NonPrintableCategory: | |
// This is a parse error. Consume the remnants of a bad url, | |
// create a <bad-url-token>, and return it. | |
offset = utils.consumeBadUrlRemnants(source, offset); | |
type = types.BadUrl; | |
return; | |
// U+005C REVERSE SOLIDUS (\) | |
case 0x005C: | |
// If the stream starts with a valid escape, consume an escaped code point and | |
// append the returned code point to the <url-token>’s value. | |
if (charCodeDefinitions.isValidEscape(code, getCharCode(offset + 1))) { | |
offset = utils.consumeEscaped(source, offset) - 1; | |
break; | |
} | |
// Otherwise, this is a parse error. Consume the remnants of a bad url, | |
// create a <bad-url-token>, and return it. | |
offset = utils.consumeBadUrlRemnants(source, offset); | |
type = types.BadUrl; | |
return; | |
// anything else | |
// Append the current input code point to the <url-token>’s value. | |
} | |
} | |
} | |
// ensure source is a string | |
source = String(source || ''); | |
const sourceLength = source.length; | |
let start = charCodeDefinitions.isBOM(getCharCode(0)); | |
let offset = start; | |
let type; | |
// https://drafts.csswg.org/css-syntax-3/#consume-token | |
// § 4.3.1. Consume a token | |
while (offset < sourceLength) { | |
const code = source.charCodeAt(offset); | |
switch (charCodeDefinitions.charCodeCategory(code)) { | |
// whitespace | |
case charCodeDefinitions.WhiteSpaceCategory: | |
// Consume as much whitespace as possible. Return a <whitespace-token>. | |
type = types.WhiteSpace; | |
offset = utils.findWhiteSpaceEnd(source, offset + 1); | |
break; | |
// U+0022 QUOTATION MARK (") | |
case 0x0022: | |
// Consume a string token and return it. | |
consumeStringToken(); | |
break; | |
// U+0023 NUMBER SIGN (#) | |
case 0x0023: | |
// If the next input code point is a name code point or the next two input code points are a valid escape, then: | |
if (charCodeDefinitions.isName(getCharCode(offset + 1)) || charCodeDefinitions.isValidEscape(getCharCode(offset + 1), getCharCode(offset + 2))) { | |
// Create a <hash-token>. | |
type = types.Hash; | |
// If the next 3 input code points would start an identifier, set the <hash-token>’s type flag to "id". | |
// if (isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) { | |
// // TODO: set id flag | |
// } | |
// Consume a name, and set the <hash-token>’s value to the returned string. | |
offset = utils.consumeName(source, offset + 1); | |
// Return the <hash-token>. | |
} else { | |
// Otherwise, return a <delim-token> with its value set to the current input code point. | |
type = types.Delim; | |
offset++; | |
} | |
break; | |
// U+0027 APOSTROPHE (') | |
case 0x0027: | |
// Consume a string token and return it. | |
consumeStringToken(); | |
break; | |
// U+0028 LEFT PARENTHESIS (() | |
case 0x0028: | |
// Return a <(-token>. | |
type = types.LeftParenthesis; | |
offset++; | |
break; | |
// U+0029 RIGHT PARENTHESIS ()) | |
case 0x0029: | |
// Return a <)-token>. | |
type = types.RightParenthesis; | |
offset++; | |
break; | |
// U+002B PLUS SIGN (+) | |
case 0x002B: | |
// If the input stream starts with a number, ... | |
if (charCodeDefinitions.isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) { | |
// ... reconsume the current input code point, consume a numeric token, and return it. | |
consumeNumericToken(); | |
} else { | |
// Otherwise, return a <delim-token> with its value set to the current input code point. | |
type = types.Delim; | |
offset++; | |
} | |
break; | |
// U+002C COMMA (,) | |
case 0x002C: | |
// Return a <comma-token>. | |
type = types.Comma; | |
offset++; | |
break; | |
// U+002D HYPHEN-MINUS (-) | |
case 0x002D: | |
// If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it. | |
if (charCodeDefinitions.isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) { | |
consumeNumericToken(); | |
} else { | |
// Otherwise, if the next 2 input code points are U+002D HYPHEN-MINUS U+003E GREATER-THAN SIGN (->), consume them and return a <CDC-token>. | |
if (getCharCode(offset + 1) === 0x002D && | |
getCharCode(offset + 2) === 0x003E) { | |
type = types.CDC; | |
offset = offset + 3; | |
} else { | |
// Otherwise, if the input stream starts with an identifier, ... | |
if (charCodeDefinitions.isIdentifierStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) { | |
// ... reconsume the current input code point, consume an ident-like token, and return it. | |
consumeIdentLikeToken(); | |
} else { | |
// Otherwise, return a <delim-token> with its value set to the current input code point. | |
type = types.Delim; | |
offset++; | |
} | |
} | |
} | |
break; | |
// U+002E FULL STOP (.) | |
case 0x002E: | |
// If the input stream starts with a number, ... | |
if (charCodeDefinitions.isNumberStart(code, getCharCode(offset + 1), getCharCode(offset + 2))) { | |
// ... reconsume the current input code point, consume a numeric token, and return it. | |
consumeNumericToken(); | |
} else { | |
// Otherwise, return a <delim-token> with its value set to the current input code point. | |
type = types.Delim; | |
offset++; | |
} | |
break; | |
// U+002F SOLIDUS (/) | |
case 0x002F: | |
// If the next two input code point are U+002F SOLIDUS (/) followed by a U+002A ASTERISK (*), | |
if (getCharCode(offset + 1) === 0x002A) { | |
// ... consume them and all following code points up to and including the first U+002A ASTERISK (*) | |
// followed by a U+002F SOLIDUS (/), or up to an EOF code point. | |
type = types.Comment; | |
offset = source.indexOf('*/', offset + 2); | |
offset = offset === -1 ? source.length : offset + 2; | |
} else { | |
type = types.Delim; | |
offset++; | |
} | |
break; | |
// U+003A COLON (:) | |
case 0x003A: | |
// Return a <colon-token>. | |
type = types.Colon; | |
offset++; | |
break; | |
// U+003B SEMICOLON (;) | |
case 0x003B: | |
// Return a <semicolon-token>. | |
type = types.Semicolon; | |
offset++; | |
break; | |
// U+003C LESS-THAN SIGN (<) | |
case 0x003C: | |
// If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), ... | |
if (getCharCode(offset + 1) === 0x0021 && | |
getCharCode(offset + 2) === 0x002D && | |
getCharCode(offset + 3) === 0x002D) { | |
// ... consume them and return a <CDO-token>. | |
type = types.CDO; | |
offset = offset + 4; | |
} else { | |
// Otherwise, return a <delim-token> with its value set to the current input code point. | |
type = types.Delim; | |
offset++; | |
} | |
break; | |
// U+0040 COMMERCIAL AT (@) | |
case 0x0040: | |
// If the next 3 input code points would start an identifier, ... | |
if (charCodeDefinitions.isIdentifierStart(getCharCode(offset + 1), getCharCode(offset + 2), getCharCode(offset + 3))) { | |
// ... consume a name, create an <at-keyword-token> with its value set to the returned value, and return it. | |
type = types.AtKeyword; | |
offset = utils.consumeName(source, offset + 1); | |
} else { | |
// Otherwise, return a <delim-token> with its value set to the current input code point. | |
type = types.Delim; | |
offset++; | |
} | |
break; | |
// U+005B LEFT SQUARE BRACKET ([) | |
case 0x005B: | |
// Return a <[-token>. | |
type = types.LeftSquareBracket; | |
offset++; | |
break; | |
// U+005C REVERSE SOLIDUS (\) | |
case 0x005C: | |
// If the input stream starts with a valid escape, ... | |
if (charCodeDefinitions.isValidEscape(code, getCharCode(offset + 1))) { | |
// ... reconsume the current input code point, consume an ident-like token, and return it. | |
consumeIdentLikeToken(); | |
} else { | |
// Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point. | |
type = types.Delim; | |
offset++; | |
} | |
break; | |
// U+005D RIGHT SQUARE BRACKET (]) | |
case 0x005D: | |
// Return a <]-token>. | |
type = types.RightSquareBracket; | |
offset++; | |
break; | |
// U+007B LEFT CURLY BRACKET ({) | |
case 0x007B: | |
// Return a <{-token>. | |
type = types.LeftCurlyBracket; | |
offset++; | |
break; | |
// U+007D RIGHT CURLY BRACKET (}) | |
case 0x007D: | |
// Return a <}-token>. | |
type = types.RightCurlyBracket; | |
offset++; | |
break; | |
// digit | |
case charCodeDefinitions.DigitCategory: | |
// Reconsume the current input code point, consume a numeric token, and return it. | |
consumeNumericToken(); | |
break; | |
// name-start code point | |
case charCodeDefinitions.NameStartCategory: | |
// Reconsume the current input code point, consume an ident-like token, and return it. | |
consumeIdentLikeToken(); | |
break; | |
// EOF | |
// case EofCategory: | |
// Return an <EOF-token>. | |
// break; | |
// anything else | |
default: | |
// Return a <delim-token> with its value set to the current input code point. | |
type = types.Delim; | |
offset++; | |
} | |
// put token to stream | |
onToken(type, start, start = offset); | |
} | |
} | |
exports.AtKeyword = types.AtKeyword; | |
exports.BadString = types.BadString; | |
exports.BadUrl = types.BadUrl; | |
exports.CDC = types.CDC; | |
exports.CDO = types.CDO; | |
exports.Colon = types.Colon; | |
exports.Comma = types.Comma; | |
exports.Comment = types.Comment; | |
exports.Delim = types.Delim; | |
exports.Dimension = types.Dimension; | |
exports.EOF = types.EOF; | |
exports.Function = types.Function; | |
exports.Hash = types.Hash; | |
exports.Ident = types.Ident; | |
exports.LeftCurlyBracket = types.LeftCurlyBracket; | |
exports.LeftParenthesis = types.LeftParenthesis; | |
exports.LeftSquareBracket = types.LeftSquareBracket; | |
exports.Number = types.Number; | |
exports.Percentage = types.Percentage; | |
exports.RightCurlyBracket = types.RightCurlyBracket; | |
exports.RightParenthesis = types.RightParenthesis; | |
exports.RightSquareBracket = types.RightSquareBracket; | |
exports.Semicolon = types.Semicolon; | |
exports.String = types.String; | |
exports.Url = types.Url; | |
exports.WhiteSpace = types.WhiteSpace; | |
exports.tokenTypes = types; | |
exports.DigitCategory = charCodeDefinitions.DigitCategory; | |
exports.EofCategory = charCodeDefinitions.EofCategory; | |
exports.NameStartCategory = charCodeDefinitions.NameStartCategory; | |
exports.NonPrintableCategory = charCodeDefinitions.NonPrintableCategory; | |
exports.WhiteSpaceCategory = charCodeDefinitions.WhiteSpaceCategory; | |
exports.charCodeCategory = charCodeDefinitions.charCodeCategory; | |
exports.isBOM = charCodeDefinitions.isBOM; | |
exports.isDigit = charCodeDefinitions.isDigit; | |
exports.isHexDigit = charCodeDefinitions.isHexDigit; | |
exports.isIdentifierStart = charCodeDefinitions.isIdentifierStart; | |
exports.isLetter = charCodeDefinitions.isLetter; | |
exports.isLowercaseLetter = charCodeDefinitions.isLowercaseLetter; | |
exports.isName = charCodeDefinitions.isName; | |
exports.isNameStart = charCodeDefinitions.isNameStart; | |
exports.isNewline = charCodeDefinitions.isNewline; | |
exports.isNonAscii = charCodeDefinitions.isNonAscii; | |
exports.isNonPrintable = charCodeDefinitions.isNonPrintable; | |
exports.isNumberStart = charCodeDefinitions.isNumberStart; | |
exports.isUppercaseLetter = charCodeDefinitions.isUppercaseLetter; | |
exports.isValidEscape = charCodeDefinitions.isValidEscape; | |
exports.isWhiteSpace = charCodeDefinitions.isWhiteSpace; | |
exports.cmpChar = utils.cmpChar; | |
exports.cmpStr = utils.cmpStr; | |
exports.consumeBadUrlRemnants = utils.consumeBadUrlRemnants; | |
exports.consumeEscaped = utils.consumeEscaped; | |
exports.consumeName = utils.consumeName; | |
exports.consumeNumber = utils.consumeNumber; | |
exports.decodeEscaped = utils.decodeEscaped; | |
exports.findDecimalNumberEnd = utils.findDecimalNumberEnd; | |
exports.findWhiteSpaceEnd = utils.findWhiteSpaceEnd; | |
exports.findWhiteSpaceStart = utils.findWhiteSpaceStart; | |
exports.getNewlineLength = utils.getNewlineLength; | |
exports.tokenNames = names; | |
exports.OffsetToLocation = OffsetToLocation.OffsetToLocation; | |
exports.TokenStream = TokenStream.TokenStream; | |
exports.tokenize = tokenize; | |