mp-02 commited on
Commit
f3df04e
·
verified ·
1 Parent(s): 4093517

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +21 -14
inference.py CHANGED
@@ -17,29 +17,36 @@ model.to(device)
17
  import json
18
 
19
  def token2json(words, labels):
20
- result = []
21
  current_entity = None
22
-
23
- for token, label in zip(words, labels):
 
24
  if label.startswith("B-"):
25
  if current_entity:
26
- result.append(current_entity)
27
- current_entity = {"type": label[2:], "text": token}
 
 
28
  elif label.startswith("I-"):
29
- if current_entity and current_entity["type"] == label[2:]:
30
- current_entity["text"] += " " + token
31
  else:
 
32
  if current_entity:
33
- result.append(current_entity)
34
- current_entity = {"type": label[2:], "text": token}
35
- else: # "O" label
 
36
  if current_entity:
37
- result.append(current_entity)
38
  current_entity = None
39
-
 
 
40
  if current_entity:
41
- result.append(current_entity)
42
-
43
  return json.dumps(result, ensure_ascii=False, indent=2)
44
 
45
 
 
17
  import json
18
 
19
  def token2json(words, labels):
20
+ result = {}
21
  current_entity = None
22
+ current_text = []
23
+
24
+ for word, label in zip(words, labels):
25
  if label.startswith("B-"):
26
  if current_entity:
27
+ result[current_entity] = " ".join(current_text).strip()
28
+ current_text = []
29
+ current_entity = label[2:].lower()
30
+ current_text = [word]
31
  elif label.startswith("I-"):
32
+ if current_entity == label[2:].lower():
33
+ current_text.append(word)
34
  else:
35
+ # Gestione di sequenze I- non precedute da B-
36
  if current_entity:
37
+ result[current_entity] = " ".join(current_text).strip()
38
+ current_entity = label[2:].lower()
39
+ current_text = [word]
40
+ else: # Label "O"
41
  if current_entity:
42
+ result[current_entity] = " ".join(current_text).strip()
43
  current_entity = None
44
+ current_text = []
45
+
46
+ # Aggiunge l'ultima entità se presente
47
  if current_entity:
48
+ result[current_entity] = " ".join(current_text).strip()
49
+
50
  return json.dumps(result, ensure_ascii=False, indent=2)
51
 
52