simple_NER / app_NER.py
quangbmk66dsai02
init
6090e79
raw
history blame
1.43 kB
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
from datasets import load_dataset
# --- Load model and label map ---
crf = joblib.load("crf_model.pkl")
raw = load_dataset("DFKI-SLT/few-nerd", "supervised")
label_map = raw['train'].features['ner_tags'].feature.int2str
def word2features(tokens, i):
w = tokens[i]
f = {
'word.lower()': w.lower(),
'word.isupper()': w.isupper(),
'word.istitle()': w.istitle(),
'word.isdigit()': w.isdigit(),
'bias': 1.0,
}
if i > 0:
f['prev.lower()'] = tokens[i - 1].lower()
else:
f['BOS'] = True
if i < len(tokens) - 1:
f['next.lower()'] = tokens[i + 1].lower()
else:
f['EOS'] = True
return f
def sentence_to_features(tokens):
return [word2features(tokens, i) for i in range(len(tokens))]
# --- API schema ---
class SentenceRequest(BaseModel):
tokens: list[str]
# --- Initialize app ---
app = FastAPI(title="NER with CRF")
@app.post("/predict")
def predict(req: SentenceRequest):
features = [sentence_to_features(req.tokens)]
y_pred = crf.predict(features)[0]
# Convert to plain Python list
y_pred = list(map(str, y_pred))
# Return JSON-serializable dict
return {
"tokens": req.tokens,
"predicted_labels": y_pred
}
@app.post("/split")
def split(sent):
tokens = sent.split()
return {"tokens": tokens}