quangbmk66dsai02 commited on
Commit
6090e79
·
1 Parent(s): 0d2e9ac
Files changed (4) hide show
  1. Dockerfile +23 -0
  2. app_NER.py +57 -0
  3. crf_model.pkl +3 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system deps if needed (optional, but useful for datasets)
8
+ RUN apt-get update && apt-get install -y git
9
+
10
+ # Copy requirements first (better caching)
11
+ COPY requirements.txt .
12
+
13
+ # Install dependencies
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy app code
17
+ COPY app_NER.py .
18
+
19
+ # Expose port
20
+ EXPOSE 7860
21
+
22
+ # Start FastAPI with uvicorn
23
+ CMD ["uvicorn", "app_NER:app", "--host", "0.0.0.0", "--port", "7860"]
app_NER.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import joblib
4
+ from datasets import load_dataset
5
+
6
+ # --- Load model and label map ---
7
+ crf = joblib.load("crf_model.pkl")
8
+ raw = load_dataset("DFKI-SLT/few-nerd", "supervised")
9
+ label_map = raw['train'].features['ner_tags'].feature.int2str
10
+
11
+ def word2features(tokens, i):
12
+ w = tokens[i]
13
+ f = {
14
+ 'word.lower()': w.lower(),
15
+ 'word.isupper()': w.isupper(),
16
+ 'word.istitle()': w.istitle(),
17
+ 'word.isdigit()': w.isdigit(),
18
+ 'bias': 1.0,
19
+ }
20
+ if i > 0:
21
+ f['prev.lower()'] = tokens[i - 1].lower()
22
+ else:
23
+ f['BOS'] = True
24
+ if i < len(tokens) - 1:
25
+ f['next.lower()'] = tokens[i + 1].lower()
26
+ else:
27
+ f['EOS'] = True
28
+ return f
29
+
30
+ def sentence_to_features(tokens):
31
+ return [word2features(tokens, i) for i in range(len(tokens))]
32
+
33
+ # --- API schema ---
34
+ class SentenceRequest(BaseModel):
35
+ tokens: list[str]
36
+
37
+ # --- Initialize app ---
38
+ app = FastAPI(title="NER with CRF")
39
+
40
+ @app.post("/predict")
41
+ def predict(req: SentenceRequest):
42
+ features = [sentence_to_features(req.tokens)]
43
+ y_pred = crf.predict(features)[0]
44
+
45
+ # Convert to plain Python list
46
+ y_pred = list(map(str, y_pred))
47
+
48
+ # Return JSON-serializable dict
49
+ return {
50
+ "tokens": req.tokens,
51
+ "predicted_labels": y_pred
52
+ }
53
+
54
+ @app.post("/split")
55
+ def split(sent):
56
+ tokens = sent.split()
57
+ return {"tokens": tokens}
crf_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be71e2c46e92d9e05dc22ace52affb88da6088518cad7e3047240fc09d26e45
3
+ size 23248083
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ joblib
4
+ datasets
5
+ scikit-learn
6
+ scikit-learn-crfsuite