Spaces:

IISRFactCheck
/

claim_detection

Runtime error

JasonLiao commited on Apr 12, 2023

Commit

e473617

1 Parent(s): 88b532e

Update code/prediction.py

and threshold_num and threshold_rate in the function "add_sentence_table", and using the (new_start new_end) for postprocess index

Files changed (1) hide show

code/prediction.py +30 -12

code/prediction.py CHANGED Viewed

@@ -66,9 +66,7 @@ def test_predict(test_loader, device, model, min_label=1, max_label=3):
   model.train()
   return result
-def add_sentence_table(result):
-  pattern =":；。，？!～！： "
   for sample in result:
     boundary_list = []
     for i, char in enumerate(sample['text_a']):
@@ -77,16 +75,36 @@ def add_sentence_table(result):
     boundary_list.append(len(sample['text_a'])+1)
     start=0
     end =0
-    pre_states =False
     sample["predict_sentence_table"] = torch.zeros(len(sample["predict_span_table"]))
     for boundary in boundary_list:
       end = boundary
-      if(sum(sample["predict_span_table"][start:end])>0):
-        if pre_states:
-          sample["predict_sentence_table"][start-1:end] = 2
         else:
-          sample["predict_sentence_table"][start:end] = 2
-          sample["predict_sentence_table"][start] = 1
-        pre_states=True
-      else: pre_states =False
-      start = end+1

   model.train()
   return result
+def add_sentence_table(result, pattern =":；。，？!～！： ", threshold_num=5, threshold_rate=0.5):
   for sample in result:
     boundary_list = []
     for i, char in enumerate(sample['text_a']):
     boundary_list.append(len(sample['text_a'])+1)
     start=0
     end =0
+    fist_sentence = True
     sample["predict_sentence_table"] = torch.zeros(len(sample["predict_span_table"]))
     for boundary in boundary_list:
       end = boundary
+      new_start = start
+      new_end = end
+      for i, predict_char in enumerate(sample["predict_span_table"][start:end]):
+        if predict_char!=0:
+          prrdict_num+=1
+      prrdict_num = sum(sample["predict_span_table"][start:end])
+      sentence_num = len(sample["predict_span_table"][start:end])
+      if(prrdict_num > threshold_num) or (prrdict_num > sentence_num*threshold_rate):
+        while(sample["predict_span_table"][new_start]==0): new_start+=1
+        while(sample["predict_span_table"][new_end-1]==0): new_end-=1
+        if fist_sentence:
+          sample["predict_sentence_table"][new_start:new_end] = 2
+          sample["predict_sentence_table"][new_start] = 1
         else:
+          sample["predict_sentence_table"][new_start-1:new_end] = 2
+        fist_sentence=False
+      else: fist_sentence =True
+      start = end+1
+def add_doc_id(result, test_data):
+  #make dict {'text_a':"docid"}
+  text_to_id = dict()
+  for sample in test_data:
+    text_to_id[sample["text_a"]] = sample["docid"]
+  #add doc_id
+  for sample in result:
+    sample["docid"] = text_to_id[sample["text_a"]]