Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import { InferenceOutputError } from "../../lib/InferenceOutputError"; | |
| import type { BaseArgs, Options } from "../../types"; | |
| import { toArray } from "../../utils/toArray"; | |
| import { request } from "../custom/request"; | |
| export type TokenClassificationArgs = BaseArgs & { | |
| /** | |
| * A string to be classified | |
| */ | |
| inputs: string; | |
| parameters?: { | |
| /** | |
| * (Default: simple). There are several aggregation strategies: | |
| * | |
| * none: Every token gets classified without further aggregation. | |
| * | |
| * simple: Entities are grouped according to the default schema (B-, I- tags get merged when the tag is similar). | |
| * | |
| * first: Same as the simple strategy except words cannot end up with different tags. Words will use the tag of the first token when there is ambiguity. | |
| * | |
| * average: Same as the simple strategy except words cannot end up with different tags. Scores are averaged across tokens and then the maximum label is applied. | |
| * | |
| * max: Same as the simple strategy except words cannot end up with different tags. Word entity will be the token with the maximum score. | |
| */ | |
| aggregation_strategy?: "none" | "simple" | "first" | "average" | "max"; | |
| }; | |
| }; | |
| export interface TokenClassificationOutputValue { | |
| /** | |
| * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times. | |
| */ | |
| end: number; | |
| /** | |
| * The type for the entity being recognized (model specific). | |
| */ | |
| entity_group: string; | |
| /** | |
| * How likely the entity was recognized. | |
| */ | |
| score: number; | |
| /** | |
| * The offset stringwise where the answer is located. Useful to disambiguate if word occurs multiple times. | |
| */ | |
| start: number; | |
| /** | |
| * The string that was captured | |
| */ | |
| word: string; | |
| } | |
| export type TokenClassificationOutput = TokenClassificationOutputValue[]; | |
| /** | |
| * Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text. Recommended model: dbmdz/bert-large-cased-finetuned-conll03-english | |
| */ | |
| export async function tokenClassification( | |
| args: TokenClassificationArgs, | |
| options?: Options | |
| ): Promise<TokenClassificationOutput> { | |
| const res = toArray( | |
| await request<TokenClassificationOutput[number] | TokenClassificationOutput>(args, { | |
| ...options, | |
| taskHint: "token-classification", | |
| }) | |
| ); | |
| const isValidOutput = | |
| Array.isArray(res) && | |
| res.every( | |
| (x) => | |
| typeof x.end === "number" && | |
| typeof x.entity_group === "string" && | |
| typeof x.score === "number" && | |
| typeof x.start === "number" && | |
| typeof x.word === "string" | |
| ); | |
| if (!isValidOutput) { | |
| throw new InferenceOutputError( | |
| "Expected Array<{end: number, entity_group: string, score: number, start: number, word: string}>" | |
| ); | |
| } | |
| return res; | |
| } | |