Spaces:
Runtime error
Runtime error
; | |
const adoptBuffer = require('./adopt-buffer.cjs'); | |
const utils = require('./utils.cjs'); | |
const names = require('./names.cjs'); | |
const types = require('./types.cjs'); | |
const OFFSET_MASK = 0x00FFFFFF; | |
const TYPE_SHIFT = 24; | |
const balancePair = new Map([ | |
[types.Function, types.RightParenthesis], | |
[types.LeftParenthesis, types.RightParenthesis], | |
[types.LeftSquareBracket, types.RightSquareBracket], | |
[types.LeftCurlyBracket, types.RightCurlyBracket] | |
]); | |
class TokenStream { | |
constructor(source, tokenize) { | |
this.setSource(source, tokenize); | |
} | |
reset() { | |
this.eof = false; | |
this.tokenIndex = -1; | |
this.tokenType = 0; | |
this.tokenStart = this.firstCharOffset; | |
this.tokenEnd = this.firstCharOffset; | |
} | |
setSource(source = '', tokenize = () => {}) { | |
source = String(source || ''); | |
const sourceLength = source.length; | |
const offsetAndType = adoptBuffer.adoptBuffer(this.offsetAndType, source.length + 1); // +1 because of eof-token | |
const balance = adoptBuffer.adoptBuffer(this.balance, source.length + 1); | |
let tokenCount = 0; | |
let balanceCloseType = 0; | |
let balanceStart = 0; | |
let firstCharOffset = -1; | |
// capture buffers | |
this.offsetAndType = null; | |
this.balance = null; | |
tokenize(source, (type, start, end) => { | |
switch (type) { | |
default: | |
balance[tokenCount] = sourceLength; | |
break; | |
case balanceCloseType: { | |
let balancePrev = balanceStart & OFFSET_MASK; | |
balanceStart = balance[balancePrev]; | |
balanceCloseType = balanceStart >> TYPE_SHIFT; | |
balance[tokenCount] = balancePrev; | |
balance[balancePrev++] = tokenCount; | |
for (; balancePrev < tokenCount; balancePrev++) { | |
if (balance[balancePrev] === sourceLength) { | |
balance[balancePrev] = tokenCount; | |
} | |
} | |
break; | |
} | |
case types.LeftParenthesis: | |
case types.Function: | |
case types.LeftSquareBracket: | |
case types.LeftCurlyBracket: | |
balance[tokenCount] = balanceStart; | |
balanceCloseType = balancePair.get(type); | |
balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount; | |
break; | |
} | |
offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | end; | |
if (firstCharOffset === -1) { | |
firstCharOffset = start; | |
} | |
}); | |
// finalize buffers | |
offsetAndType[tokenCount] = (types.EOF << TYPE_SHIFT) | sourceLength; // <EOF-token> | |
balance[tokenCount] = sourceLength; | |
balance[sourceLength] = sourceLength; // prevents false positive balance match with any token | |
while (balanceStart !== 0) { | |
const balancePrev = balanceStart & OFFSET_MASK; | |
balanceStart = balance[balancePrev]; | |
balance[balancePrev] = sourceLength; | |
} | |
this.source = source; | |
this.firstCharOffset = firstCharOffset === -1 ? 0 : firstCharOffset; | |
this.tokenCount = tokenCount; | |
this.offsetAndType = offsetAndType; | |
this.balance = balance; | |
this.reset(); | |
this.next(); | |
} | |
lookupType(offset) { | |
offset += this.tokenIndex; | |
if (offset < this.tokenCount) { | |
return this.offsetAndType[offset] >> TYPE_SHIFT; | |
} | |
return types.EOF; | |
} | |
lookupOffset(offset) { | |
offset += this.tokenIndex; | |
if (offset < this.tokenCount) { | |
return this.offsetAndType[offset - 1] & OFFSET_MASK; | |
} | |
return this.source.length; | |
} | |
lookupValue(offset, referenceStr) { | |
offset += this.tokenIndex; | |
if (offset < this.tokenCount) { | |
return utils.cmpStr( | |
this.source, | |
this.offsetAndType[offset - 1] & OFFSET_MASK, | |
this.offsetAndType[offset] & OFFSET_MASK, | |
referenceStr | |
); | |
} | |
return false; | |
} | |
getTokenStart(tokenIndex) { | |
if (tokenIndex === this.tokenIndex) { | |
return this.tokenStart; | |
} | |
if (tokenIndex > 0) { | |
return tokenIndex < this.tokenCount | |
? this.offsetAndType[tokenIndex - 1] & OFFSET_MASK | |
: this.offsetAndType[this.tokenCount] & OFFSET_MASK; | |
} | |
return this.firstCharOffset; | |
} | |
substrToCursor(start) { | |
return this.source.substring(start, this.tokenStart); | |
} | |
isBalanceEdge(pos) { | |
return this.balance[this.tokenIndex] < pos; | |
} | |
isDelim(code, offset) { | |
if (offset) { | |
return ( | |
this.lookupType(offset) === types.Delim && | |
this.source.charCodeAt(this.lookupOffset(offset)) === code | |
); | |
} | |
return ( | |
this.tokenType === types.Delim && | |
this.source.charCodeAt(this.tokenStart) === code | |
); | |
} | |
skip(tokenCount) { | |
let next = this.tokenIndex + tokenCount; | |
if (next < this.tokenCount) { | |
this.tokenIndex = next; | |
this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK; | |
next = this.offsetAndType[next]; | |
this.tokenType = next >> TYPE_SHIFT; | |
this.tokenEnd = next & OFFSET_MASK; | |
} else { | |
this.tokenIndex = this.tokenCount; | |
this.next(); | |
} | |
} | |
next() { | |
let next = this.tokenIndex + 1; | |
if (next < this.tokenCount) { | |
this.tokenIndex = next; | |
this.tokenStart = this.tokenEnd; | |
next = this.offsetAndType[next]; | |
this.tokenType = next >> TYPE_SHIFT; | |
this.tokenEnd = next & OFFSET_MASK; | |
} else { | |
this.eof = true; | |
this.tokenIndex = this.tokenCount; | |
this.tokenType = types.EOF; | |
this.tokenStart = this.tokenEnd = this.source.length; | |
} | |
} | |
skipSC() { | |
while (this.tokenType === types.WhiteSpace || this.tokenType === types.Comment) { | |
this.next(); | |
} | |
} | |
skipUntilBalanced(startToken, stopConsume) { | |
let cursor = startToken; | |
let balanceEnd; | |
let offset; | |
loop: | |
for (; cursor < this.tokenCount; cursor++) { | |
balanceEnd = this.balance[cursor]; | |
// stop scanning on balance edge that points to offset before start token | |
if (balanceEnd < startToken) { | |
break loop; | |
} | |
offset = cursor > 0 ? this.offsetAndType[cursor - 1] & OFFSET_MASK : this.firstCharOffset; | |
// check stop condition | |
switch (stopConsume(this.source.charCodeAt(offset))) { | |
case 1: // just stop | |
break loop; | |
case 2: // stop & included | |
cursor++; | |
break loop; | |
default: | |
// fast forward to the end of balanced block | |
if (this.balance[balanceEnd] === cursor) { | |
cursor = balanceEnd; | |
} | |
} | |
} | |
this.skip(cursor - this.tokenIndex); | |
} | |
forEachToken(fn) { | |
for (let i = 0, offset = this.firstCharOffset; i < this.tokenCount; i++) { | |
const start = offset; | |
const item = this.offsetAndType[i]; | |
const end = item & OFFSET_MASK; | |
const type = item >> TYPE_SHIFT; | |
offset = end; | |
fn(type, start, end, i); | |
} | |
} | |
dump() { | |
const tokens = new Array(this.tokenCount); | |
this.forEachToken((type, start, end, index) => { | |
tokens[index] = { | |
idx: index, | |
type: names[type], | |
chunk: this.source.substring(start, end), | |
balance: this.balance[index] | |
}; | |
}); | |
return tokens; | |
} | |
} | |
exports.TokenStream = TokenStream; | |