Spaces:
Runtime error
Runtime error
| ; | |
| const adoptBuffer = require('./adopt-buffer.cjs'); | |
| const utils = require('./utils.cjs'); | |
| const names = require('./names.cjs'); | |
| const types = require('./types.cjs'); | |
| const OFFSET_MASK = 0x00FFFFFF; | |
| const TYPE_SHIFT = 24; | |
| const balancePair = new Map([ | |
| [types.Function, types.RightParenthesis], | |
| [types.LeftParenthesis, types.RightParenthesis], | |
| [types.LeftSquareBracket, types.RightSquareBracket], | |
| [types.LeftCurlyBracket, types.RightCurlyBracket] | |
| ]); | |
| class TokenStream { | |
| constructor(source, tokenize) { | |
| this.setSource(source, tokenize); | |
| } | |
| reset() { | |
| this.eof = false; | |
| this.tokenIndex = -1; | |
| this.tokenType = 0; | |
| this.tokenStart = this.firstCharOffset; | |
| this.tokenEnd = this.firstCharOffset; | |
| } | |
| setSource(source = '', tokenize = () => {}) { | |
| source = String(source || ''); | |
| const sourceLength = source.length; | |
| const offsetAndType = adoptBuffer.adoptBuffer(this.offsetAndType, source.length + 1); // +1 because of eof-token | |
| const balance = adoptBuffer.adoptBuffer(this.balance, source.length + 1); | |
| let tokenCount = 0; | |
| let balanceCloseType = 0; | |
| let balanceStart = 0; | |
| let firstCharOffset = -1; | |
| // capture buffers | |
| this.offsetAndType = null; | |
| this.balance = null; | |
| tokenize(source, (type, start, end) => { | |
| switch (type) { | |
| default: | |
| balance[tokenCount] = sourceLength; | |
| break; | |
| case balanceCloseType: { | |
| let balancePrev = balanceStart & OFFSET_MASK; | |
| balanceStart = balance[balancePrev]; | |
| balanceCloseType = balanceStart >> TYPE_SHIFT; | |
| balance[tokenCount] = balancePrev; | |
| balance[balancePrev++] = tokenCount; | |
| for (; balancePrev < tokenCount; balancePrev++) { | |
| if (balance[balancePrev] === sourceLength) { | |
| balance[balancePrev] = tokenCount; | |
| } | |
| } | |
| break; | |
| } | |
| case types.LeftParenthesis: | |
| case types.Function: | |
| case types.LeftSquareBracket: | |
| case types.LeftCurlyBracket: | |
| balance[tokenCount] = balanceStart; | |
| balanceCloseType = balancePair.get(type); | |
| balanceStart = (balanceCloseType << TYPE_SHIFT) | tokenCount; | |
| break; | |
| } | |
| offsetAndType[tokenCount++] = (type << TYPE_SHIFT) | end; | |
| if (firstCharOffset === -1) { | |
| firstCharOffset = start; | |
| } | |
| }); | |
| // finalize buffers | |
| offsetAndType[tokenCount] = (types.EOF << TYPE_SHIFT) | sourceLength; // <EOF-token> | |
| balance[tokenCount] = sourceLength; | |
| balance[sourceLength] = sourceLength; // prevents false positive balance match with any token | |
| while (balanceStart !== 0) { | |
| const balancePrev = balanceStart & OFFSET_MASK; | |
| balanceStart = balance[balancePrev]; | |
| balance[balancePrev] = sourceLength; | |
| } | |
| this.source = source; | |
| this.firstCharOffset = firstCharOffset === -1 ? 0 : firstCharOffset; | |
| this.tokenCount = tokenCount; | |
| this.offsetAndType = offsetAndType; | |
| this.balance = balance; | |
| this.reset(); | |
| this.next(); | |
| } | |
| lookupType(offset) { | |
| offset += this.tokenIndex; | |
| if (offset < this.tokenCount) { | |
| return this.offsetAndType[offset] >> TYPE_SHIFT; | |
| } | |
| return types.EOF; | |
| } | |
| lookupOffset(offset) { | |
| offset += this.tokenIndex; | |
| if (offset < this.tokenCount) { | |
| return this.offsetAndType[offset - 1] & OFFSET_MASK; | |
| } | |
| return this.source.length; | |
| } | |
| lookupValue(offset, referenceStr) { | |
| offset += this.tokenIndex; | |
| if (offset < this.tokenCount) { | |
| return utils.cmpStr( | |
| this.source, | |
| this.offsetAndType[offset - 1] & OFFSET_MASK, | |
| this.offsetAndType[offset] & OFFSET_MASK, | |
| referenceStr | |
| ); | |
| } | |
| return false; | |
| } | |
| getTokenStart(tokenIndex) { | |
| if (tokenIndex === this.tokenIndex) { | |
| return this.tokenStart; | |
| } | |
| if (tokenIndex > 0) { | |
| return tokenIndex < this.tokenCount | |
| ? this.offsetAndType[tokenIndex - 1] & OFFSET_MASK | |
| : this.offsetAndType[this.tokenCount] & OFFSET_MASK; | |
| } | |
| return this.firstCharOffset; | |
| } | |
| substrToCursor(start) { | |
| return this.source.substring(start, this.tokenStart); | |
| } | |
| isBalanceEdge(pos) { | |
| return this.balance[this.tokenIndex] < pos; | |
| } | |
| isDelim(code, offset) { | |
| if (offset) { | |
| return ( | |
| this.lookupType(offset) === types.Delim && | |
| this.source.charCodeAt(this.lookupOffset(offset)) === code | |
| ); | |
| } | |
| return ( | |
| this.tokenType === types.Delim && | |
| this.source.charCodeAt(this.tokenStart) === code | |
| ); | |
| } | |
| skip(tokenCount) { | |
| let next = this.tokenIndex + tokenCount; | |
| if (next < this.tokenCount) { | |
| this.tokenIndex = next; | |
| this.tokenStart = this.offsetAndType[next - 1] & OFFSET_MASK; | |
| next = this.offsetAndType[next]; | |
| this.tokenType = next >> TYPE_SHIFT; | |
| this.tokenEnd = next & OFFSET_MASK; | |
| } else { | |
| this.tokenIndex = this.tokenCount; | |
| this.next(); | |
| } | |
| } | |
| next() { | |
| let next = this.tokenIndex + 1; | |
| if (next < this.tokenCount) { | |
| this.tokenIndex = next; | |
| this.tokenStart = this.tokenEnd; | |
| next = this.offsetAndType[next]; | |
| this.tokenType = next >> TYPE_SHIFT; | |
| this.tokenEnd = next & OFFSET_MASK; | |
| } else { | |
| this.eof = true; | |
| this.tokenIndex = this.tokenCount; | |
| this.tokenType = types.EOF; | |
| this.tokenStart = this.tokenEnd = this.source.length; | |
| } | |
| } | |
| skipSC() { | |
| while (this.tokenType === types.WhiteSpace || this.tokenType === types.Comment) { | |
| this.next(); | |
| } | |
| } | |
| skipUntilBalanced(startToken, stopConsume) { | |
| let cursor = startToken; | |
| let balanceEnd; | |
| let offset; | |
| loop: | |
| for (; cursor < this.tokenCount; cursor++) { | |
| balanceEnd = this.balance[cursor]; | |
| // stop scanning on balance edge that points to offset before start token | |
| if (balanceEnd < startToken) { | |
| break loop; | |
| } | |
| offset = cursor > 0 ? this.offsetAndType[cursor - 1] & OFFSET_MASK : this.firstCharOffset; | |
| // check stop condition | |
| switch (stopConsume(this.source.charCodeAt(offset))) { | |
| case 1: // just stop | |
| break loop; | |
| case 2: // stop & included | |
| cursor++; | |
| break loop; | |
| default: | |
| // fast forward to the end of balanced block | |
| if (this.balance[balanceEnd] === cursor) { | |
| cursor = balanceEnd; | |
| } | |
| } | |
| } | |
| this.skip(cursor - this.tokenIndex); | |
| } | |
| forEachToken(fn) { | |
| for (let i = 0, offset = this.firstCharOffset; i < this.tokenCount; i++) { | |
| const start = offset; | |
| const item = this.offsetAndType[i]; | |
| const end = item & OFFSET_MASK; | |
| const type = item >> TYPE_SHIFT; | |
| offset = end; | |
| fn(type, start, end, i); | |
| } | |
| } | |
| dump() { | |
| const tokens = new Array(this.tokenCount); | |
| this.forEachToken((type, start, end, index) => { | |
| tokens[index] = { | |
| idx: index, | |
| type: names[type], | |
| chunk: this.source.substring(start, end), | |
| balance: this.balance[index] | |
| }; | |
| }); | |
| return tokens; | |
| } | |
| } | |
| exports.TokenStream = TokenStream; | |