adrienbrdne commited on
Commit
45f7e41
·
verified ·
1 Parent(s): db48e00

Update scoring/specificity.py

Browse files
Files changed (1) hide show
  1. scoring/specificity.py +116 -117
scoring/specificity.py CHANGED
@@ -1,118 +1,117 @@
1
- import os
2
- import uvicorn
3
- from fastapi import FastAPI, HTTPException
4
- from pydantic import BaseModel
5
- from typing import List, Dict, Union
6
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
- import torch
8
-
9
-
10
- # Definition of Pydantic data models
11
- class ProblematicItem(BaseModel):
12
- text: str
13
-
14
- class ProblematicList(BaseModel):
15
- problematics: List[str]
16
-
17
- class PredictionResponse(BaseModel):
18
- predicted_class: str
19
- score: float
20
-
21
- class PredictionsResponse(BaseModel):
22
- results: List[Dict[str, Union[str, float]]]
23
-
24
- # Model environment variables
25
- MODEL_NAME = os.getenv("MODEL_NAME", "votre-compte/votre-modele")
26
- LABEL_0 = os.getenv("LABEL_0", "Classe A")
27
- LABEL_1 = os.getenv("LABEL_1", "Classe B")
28
-
29
- # Loading the model and tokenizer
30
- tokenizer = None
31
- model = None
32
-
33
- def load_model():
34
- global tokenizer, model
35
- try:
36
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
37
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
38
- return True
39
- except Exception as e:
40
- print(f"Error loading model: {e}")
41
- return False
42
-
43
-
44
- def health_check():
45
- global model, tokenizer
46
- if model is None or tokenizer is None:
47
- success = load_model()
48
- if not success:
49
- raise HTTPException(status_code=503, detail="Model not available")
50
- return {"status": "ok", "model": MODEL_NAME}
51
-
52
-
53
- def predict_single(item: ProblematicItem):
54
- global model, tokenizer
55
-
56
- if model is None or tokenizer is None:
57
- success = load_model()
58
- if not success:
59
- print('Error loading the model.')
60
-
61
- try:
62
- # Tokenization
63
- inputs = tokenizer(item.text, padding=True, truncation=True, return_tensors="pt")
64
-
65
- # Prediction
66
- with torch.no_grad():
67
- outputs = model(**inputs)
68
- probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
69
- predicted_class = torch.argmax(probabilities, dim=1).item()
70
- confidence_score = probabilities[0][predicted_class].item()
71
-
72
- # Associate the correct label
73
- predicted_label = LABEL_0 if predicted_class == 0 else LABEL_1
74
-
75
- return PredictionResponse(predicted_class=predicted_label, score=confidence_score)
76
-
77
- except Exception as e:
78
- print(f"Error during prediction: {str(e)}")
79
-
80
- def predict_batch(items: ProblematicList):
81
- global model, tokenizer
82
-
83
- if model is None or tokenizer is None:
84
- success = load_model()
85
- if not success:
86
- print("Model not available")
87
-
88
- try:
89
- results = []
90
-
91
- # Batch processing
92
- batch_size = 8
93
- for i in range(0, len(items.problematics), batch_size):
94
- batch_texts = items.problematics[i:i+batch_size]
95
-
96
- # Tokenization
97
- inputs = tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt")
98
-
99
- # Prediction
100
- with torch.no_grad():
101
- outputs = model(**inputs)
102
- probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
103
- predicted_classes = torch.argmax(probabilities, dim=1).tolist()
104
- confidence_scores = [probabilities[j][predicted_classes[j]].item() for j in range(len(predicted_classes))]
105
-
106
- # Converting numerical predictions into labels
107
- for j, (pred_class, score) in enumerate(zip(predicted_classes, confidence_scores)):
108
- predicted_label = LABEL_0 if pred_class == 0 else LABEL_1
109
- results.append({
110
- "text": batch_texts[j],
111
- "class": predicted_label,
112
- "score": score
113
- })
114
-
115
- return PredictionsResponse(results=results)
116
-
117
- except Exception as e:
118
  print(f"Error during prediction: {str(e)}")
 
1
+ import os
2
+ from pydantic import BaseModel
3
+ from typing import List, Dict, Union
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import torch
6
+
7
+
8
+ # Definition of Pydantic data models
9
+ class ProblematicItem(BaseModel):
10
+ text: str
11
+
12
+ class ProblematicList(BaseModel):
13
+ problematics: List[str]
14
+
15
+ class PredictionResponse(BaseModel):
16
+ predicted_class: str
17
+ score: float
18
+
19
+ class PredictionsResponse(BaseModel):
20
+ results: List[Dict[str, Union[str, float]]]
21
+
22
+ # Model environment variables
23
+ MODEL_NAME = os.getenv("MODEL_NAME", "votre-compte/votre-modele")
24
+ LABEL_0 = os.getenv("LABEL_0", "Classe A")
25
+ LABEL_1 = os.getenv("LABEL_1", "Classe B")
26
+
27
+ # Loading the model and tokenizer
28
+ tokenizer = None
29
+ model = None
30
+
31
+
32
+ def load_model():
33
+ global tokenizer, model
34
+ try:
35
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
36
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
37
+ return True
38
+ except Exception as e:
39
+ print(f"Error loading model: {e}")
40
+ return False
41
+
42
+
43
+ def health_check():
44
+ global model, tokenizer
45
+ if model is None or tokenizer is None:
46
+ success = load_model()
47
+ if not success:
48
+ print("Model not available")
49
+ return {"status": "ok", "model": MODEL_NAME}
50
+
51
+
52
+ def predict_single(item: ProblematicItem):
53
+ global model, tokenizer
54
+
55
+ if model is None or tokenizer is None:
56
+ success = load_model()
57
+ if not success:
58
+ print('Error loading the model.')
59
+
60
+ try:
61
+ # Tokenization
62
+ inputs = tokenizer(item.text, padding=True, truncation=True, return_tensors="pt")
63
+
64
+ # Prediction
65
+ with torch.no_grad():
66
+ outputs = model(**inputs)
67
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
68
+ predicted_class = torch.argmax(probabilities, dim=1).item()
69
+ confidence_score = probabilities[0][predicted_class].item()
70
+
71
+ # Associate the correct label
72
+ predicted_label = LABEL_0 if predicted_class == 0 else LABEL_1
73
+
74
+ return PredictionResponse(predicted_class=predicted_label, score=confidence_score)
75
+
76
+ except Exception as e:
77
+ print(f"Error during prediction: {str(e)}")
78
+
79
+ def predict_batch(items: ProblematicList):
80
+ global model, tokenizer
81
+
82
+ if model is None or tokenizer is None:
83
+ success = load_model()
84
+ if not success:
85
+ print("Model not available")
86
+
87
+ try:
88
+ results = []
89
+
90
+ # Batch processing
91
+ batch_size = 8
92
+ for i in range(0, len(items.problematics), batch_size):
93
+ batch_texts = items.problematics[i:i+batch_size]
94
+
95
+ # Tokenization
96
+ inputs = tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt")
97
+
98
+ # Prediction
99
+ with torch.no_grad():
100
+ outputs = model(**inputs)
101
+ probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
102
+ predicted_classes = torch.argmax(probabilities, dim=1).tolist()
103
+ confidence_scores = [probabilities[j][predicted_classes[j]].item() for j in range(len(predicted_classes))]
104
+
105
+ # Converting numerical predictions into labels
106
+ for j, (pred_class, score) in enumerate(zip(predicted_classes, confidence_scores)):
107
+ predicted_label = LABEL_0 if pred_class == 0 else LABEL_1
108
+ results.append({
109
+ "text": batch_texts[j],
110
+ "class": predicted_label,
111
+ "score": score
112
+ })
113
+
114
+ return PredictionsResponse(results=results)
115
+
116
+ except Exception as e:
 
117
  print(f"Error during prediction: {str(e)}")