File size: 1,428 Bytes
6090e79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
from datasets import load_dataset

# --- Load model and label map ---
crf = joblib.load("crf_model.pkl")
raw = load_dataset("DFKI-SLT/few-nerd", "supervised")
label_map = raw['train'].features['ner_tags'].feature.int2str

def word2features(tokens, i):
    w = tokens[i]
    f = {
        'word.lower()': w.lower(),
        'word.isupper()': w.isupper(),
        'word.istitle()': w.istitle(),
        'word.isdigit()': w.isdigit(),
        'bias': 1.0,
    }
    if i > 0:
        f['prev.lower()'] = tokens[i - 1].lower()
    else:
        f['BOS'] = True
    if i < len(tokens) - 1:
        f['next.lower()'] = tokens[i + 1].lower()
    else:
        f['EOS'] = True
    return f

def sentence_to_features(tokens):
    return [word2features(tokens, i) for i in range(len(tokens))]

# --- API schema ---
class SentenceRequest(BaseModel):
    tokens: list[str]

# --- Initialize app ---
app = FastAPI(title="NER with CRF")

@app.post("/predict")
def predict(req: SentenceRequest):
    features = [sentence_to_features(req.tokens)]
    y_pred = crf.predict(features)[0]

    # Convert to plain Python list
    y_pred = list(map(str, y_pred))

    # Return JSON-serializable dict
    return {
        "tokens": req.tokens,
        "predicted_labels": y_pred
    }

@app.post("/split")
def split(sent):
    tokens = sent.split()
    return {"tokens": tokens}