File size: 2,088 Bytes
b2ecf7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import type { TaskDataCustom } from "../Types";

const taskData: TaskDataCustom = {
	datasets: [
		{
			description: "A widely used dataset useful to benchmark named entity recognition models.",
			id: "conll2003",
		},
		{
			description:
				"A multilingual dataset of Wikipedia articles annotated for named entity recognition in over 150 different languages.",
			id: "wikiann",
		},
	],
	demo: {
		inputs: [
			{
				label: "Input",
				content: "My name is Omar and I live in Zürich.",
				type: "text",
			},
		],
		outputs: [
			{
				text: "My name is Omar and I live in Zürich.",
				tokens: [
					{
						type: "PERSON",
						start: 11,
						end: 15,
					},
					{
						type: "GPE",
						start: 30,
						end: 36,
					},
				],
				type: "text-with-tokens",
			},
		],
	},
	metrics: [
		{
			description: "",
			id: "accuracy",
		},
		{
			description: "",
			id: "recall",
		},
		{
			description: "",
			id: "precision",
		},
		{
			description: "",
			id: "f1",
		},
	],
	models: [
		{
			description:
				"A robust performance model to identify people, locations, organizations and names of miscellaneous entities.",
			id: "dslim/bert-base-NER",
		},
		{
			description: "Flair models are typically the state of the art in named entity recognition tasks.",
			id: "flair/ner-english",
		},
	],
	spaces: [
		{
			description:
				"An application that can recognizes entities, extracts noun chunks and recognizes various linguistic features of each token.",
			id: "spacy/gradio_pipeline_visualizer",
		},
	],
	summary:
		"Token classification is a natural language understanding task in which a label is assigned to some tokens in a text. Some popular token classification subtasks are Named Entity Recognition (NER) and Part-of-Speech (PoS) tagging. NER models could be trained to identify specific entities in a text, such as dates, individuals and places; and PoS tagging would identify, for example, which words in a text are verbs, nouns, and punctuation marks.",
	widgetModels: ["dslim/bert-base-NER"],
	youtubeId: "wVHdVlPScxA",
};

export default taskData;