Spaces:
Runtime error
Runtime error
ajit
commited on
Commit
·
db6839f
1
Parent(s):
026e9d4
CS predictions unconditionally override CI predictions for non ensemble
Browse filesuse case. TBD. Rexamine this for ensembling too. In general, if CS
predictions are very confident, then CI is subservient to it. CS
predictions wil not be confident when there is true ambiguity in that
position for a model trained for that domain. So overriding CS by CI
needs to be done only in the case CS allows for it by not being so
confident. This needs to be factored in the ensemble case too.
- aggregate_server_json.py +6 -3
aggregate_server_json.py
CHANGED
@@ -248,7 +248,8 @@ class AggregateNER:
|
|
248 |
n1 = flip_category(orig_cs_entity)
|
249 |
n1["e"] = prefix + n1["e"]
|
250 |
n2 = flip_category(consolidated_entity)
|
251 |
-
|
|
|
252 |
return ret_obj
|
253 |
else:
|
254 |
#if we come here consolidated is same as cs prediction. So we try to either use ci or the second cs prediction if ci is out of domain
|
@@ -262,6 +263,7 @@ class AggregateNER:
|
|
262 |
n1["e"] = prefix + n1["e"]
|
263 |
n2 = flip_category(orig_ci_entity)
|
264 |
n2["e"] = prefix + n2["e"]
|
|
|
265 |
ret_obj["e"] = n1["e"] + "/" + n2["e"]
|
266 |
return ret_obj
|
267 |
else:
|
@@ -287,7 +289,7 @@ class AggregateNER:
|
|
287 |
else:
|
288 |
return flip_category(results[server_index]["ner"][run_index])
|
289 |
else:
|
290 |
-
#here cs and ci are same. So use two cs predictions if meaningful
|
291 |
if (len(results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution']) >= 2):
|
292 |
ret_arr = self.get_predictions_above_threshold(results[server_index]["orig_cs_prediction_details"][pivot_index])
|
293 |
orig_cs_second_entity = results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution'][1]
|
@@ -302,7 +304,8 @@ class AggregateNER:
|
|
302 |
n1["e"] = prefix + n1["e"]
|
303 |
n2 = flip_category(orig_cs_entity)
|
304 |
n2["e"] = prefix + n2["e"]
|
305 |
-
|
|
|
306 |
return ret_obj
|
307 |
else:
|
308 |
return flip_category(results[server_index]["ner"][run_index])
|
|
|
248 |
n1 = flip_category(orig_cs_entity)
|
249 |
n1["e"] = prefix + n1["e"]
|
250 |
n2 = flip_category(consolidated_entity)
|
251 |
+
print("consolidated != orig cs. P1 case. Emit orig cs first")
|
252 |
+
ret_obj["e"] = n1["e"] + "/" + n2["e"] #emit orig cs first
|
253 |
return ret_obj
|
254 |
else:
|
255 |
#if we come here consolidated is same as cs prediction. So we try to either use ci or the second cs prediction if ci is out of domain
|
|
|
263 |
n1["e"] = prefix + n1["e"]
|
264 |
n2 = flip_category(orig_ci_entity)
|
265 |
n2["e"] = prefix + n2["e"]
|
266 |
+
print("consolidated == orig cs. P2 case. Emit orig cs first. Then ci")
|
267 |
ret_obj["e"] = n1["e"] + "/" + n2["e"]
|
268 |
return ret_obj
|
269 |
else:
|
|
|
289 |
else:
|
290 |
return flip_category(results[server_index]["ner"][run_index])
|
291 |
else:
|
292 |
+
#here cs and ci are same. So use two consecutive cs predictions if meaningful
|
293 |
if (len(results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution']) >= 2):
|
294 |
ret_arr = self.get_predictions_above_threshold(results[server_index]["orig_cs_prediction_details"][pivot_index])
|
295 |
orig_cs_second_entity = results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution'][1]
|
|
|
304 |
n1["e"] = prefix + n1["e"]
|
305 |
n2 = flip_category(orig_cs_entity)
|
306 |
n2["e"] = prefix + n2["e"]
|
307 |
+
print("consolidated == orig cs. P3 case. Emit orig cs first. Then ci")
|
308 |
+
ret_obj["e"] = n2["e"] + "/" + n1["e"] #when using single server twice, best to keep cs first
|
309 |
return ret_obj
|
310 |
else:
|
311 |
return flip_category(results[server_index]["ner"][run_index])
|