|
|
|
""" |
|
Created on Mon Mar 1 15:33:54 2021 |
|
|
|
@author: luol2 |
|
""" |
|
|
|
|
|
def Rel_Evaluation(prefile): |
|
fin=open(prefile,'r',encoding='utf-8') |
|
all_in=fin.read().strip().split('\n\n') |
|
fin.close() |
|
TP=0 |
|
FP=0 |
|
FN=0 |
|
for sentence in all_in: |
|
tokens=sentence.split('\n') |
|
entity_id=0 |
|
token_id=0 |
|
temp_gold='O' |
|
temp_pre='O' |
|
while (token_id<len(tokens)): |
|
seg=tokens[token_id].split('\t') |
|
if seg[0]=='<GENE>': |
|
if seg[1]=='O': |
|
temp_gold=seg[1] |
|
else: |
|
temp_gold=seg[1][2:] |
|
if seg[2]=='O': |
|
temp_pre=seg[2] |
|
else: |
|
temp_pre=seg[2][2:] |
|
token_id+=1 |
|
seg=tokens[token_id].split('\t') |
|
while seg[0]!='</GENE>': |
|
token_id+=1 |
|
seg=tokens[token_id].split('\t') |
|
if seg[1]!='O' and temp_gold=='O': |
|
temp_gold=seg[1][2:] |
|
if seg[2]!='O' and temp_pre=='O': |
|
temp_pre=seg[2][2:] |
|
if temp_pre!='O' and temp_gold!='O' and temp_pre==temp_gold: |
|
TP+=1 |
|
elif temp_pre!='O' and temp_gold!='O' and temp_pre!=temp_gold: |
|
FP+=1 |
|
FN+=1 |
|
elif temp_pre!='O' and temp_gold=='O' : |
|
FP+=1 |
|
elif temp_pre=='O' and temp_gold!='O' : |
|
FN+=1 |
|
temp_pre='O' |
|
temp_gold='O' |
|
|
|
else: |
|
pass |
|
token_id+=1 |
|
|
|
if TP+FP==0: |
|
P=0 |
|
else: |
|
P=TP/(TP+FP) |
|
if TP+FN==0: |
|
R=0 |
|
else: |
|
R=TP/(TP+FN) |
|
if P+R==0: |
|
F1=0 |
|
else: |
|
F1=2*P*R/(P+R) |
|
print('TP,FP,FN:',TP,FP,FN) |
|
print('P,R,F1:',P,R,F1) |
|
|
|
|
|
def Rel_Evaluation_fn(prefile): |
|
fin=open(prefile,'r',encoding='utf-8') |
|
all_in=fin.read().strip().split('\n\n') |
|
fin.close() |
|
TP=0 |
|
FP=0 |
|
FN=0 |
|
for sentence in all_in: |
|
tokens=sentence.split('\n') |
|
entity_id=0 |
|
token_id=0 |
|
temp_gold='O' |
|
temp_pre='O' |
|
while (token_id<len(tokens)): |
|
seg=tokens[token_id].split('\t') |
|
if seg[0]=='<GENE>': |
|
if seg[1]=='O': |
|
temp_gold=seg[1] |
|
else: |
|
temp_gold=seg[1][2:] |
|
if seg[2]=='O': |
|
temp_pre=seg[2] |
|
else: |
|
temp_pre=seg[2][2:] |
|
token_id+=1 |
|
seg=tokens[token_id].split('\t') |
|
while seg[0]!='</GENE>': |
|
token_id+=1 |
|
seg=tokens[token_id].split('\t') |
|
if seg[1]!='O' and temp_gold=='O': |
|
temp_gold=seg[1][2:] |
|
if seg[2]!='O' and temp_pre=='O': |
|
temp_pre=seg[2][2:] |
|
if temp_pre!='O' and temp_gold!='O' and temp_pre==temp_gold: |
|
TP+=1 |
|
elif temp_pre!='O' and temp_gold!='O' and temp_pre!=temp_gold: |
|
FP+=1 |
|
elif temp_pre!='O' and temp_gold=='O' : |
|
FP+=1 |
|
elif temp_pre=='O' and temp_gold!='O' : |
|
FN+=1 |
|
temp_pre='O' |
|
temp_gold='O' |
|
|
|
else: |
|
pass |
|
token_id+=1 |
|
print('TP,FP,FN:',TP,FP,FN) |
|
if TP+FP==0: |
|
P=0 |
|
else: |
|
P=TP/(TP+FP) |
|
if TP+FN==0: |
|
R=0 |
|
else: |
|
R=TP/(TP+FN) |
|
if P+R==0: |
|
F1=0 |
|
else: |
|
F1=2*P*R/(P+R) |
|
|
|
print('P,R,F1:',P,R,F1) |
|
return F1 |
|
|
|
def Rel_Evaluation_Hugface_fn(prefile,ARG2_label='gene1s'): |
|
fin=open(prefile,'r',encoding='utf-8') |
|
all_in=fin.read().strip().split('\n\n') |
|
fin.close() |
|
TP=0 |
|
FP=0 |
|
FN=0 |
|
result_dict={} |
|
for sentence in all_in: |
|
tokens=sentence.split('\n') |
|
for token in tokens: |
|
seg=token.split('\t') |
|
if seg[0]==ARG2_label: |
|
if seg[1].find('ARG2')>=0: |
|
if seg[2]==seg[1]: |
|
if seg[1] not in result_dict.keys(): |
|
result_dict[seg[1]]=[1,0,0] |
|
else: |
|
result_dict[seg[1]][0]+=1 |
|
TP+=1 |
|
elif seg[2].find('ARG2')>=0: |
|
if seg[1] not in result_dict.keys(): |
|
result_dict[seg[1]]=[0,0,1] |
|
else: |
|
result_dict[seg[1]][2]+=1 |
|
if seg[2] not in result_dict.keys(): |
|
result_dict[seg[2]]=[0,1,0] |
|
else: |
|
result_dict[seg[2]][1]+=1 |
|
FP+=1 |
|
FN+=1 |
|
else: |
|
if seg[1] not in result_dict.keys(): |
|
result_dict[seg[1]]=[0,0,1] |
|
else: |
|
result_dict[seg[1]][2]+=1 |
|
FN+=1 |
|
|
|
else: |
|
if seg[2].find('ARG2')>=0: |
|
if seg[2] not in result_dict.keys(): |
|
result_dict[seg[2]]=[0,1,0] |
|
else: |
|
result_dict[seg[2]][1]+=1 |
|
FP+=1 |
|
|
|
rel_metrics={} |
|
for rel_type in result_dict.keys(): |
|
if result_dict[rel_type][0]+result_dict[rel_type][1]==0: |
|
p=0 |
|
else: |
|
p=result_dict[rel_type][0]/(result_dict[rel_type][0]+result_dict[rel_type][1]) |
|
if result_dict[rel_type][0]+result_dict[rel_type][2]==0: |
|
r=0 |
|
else: |
|
r=result_dict[rel_type][0]/(result_dict[rel_type][0]+result_dict[rel_type][2]) |
|
if p+r==0: |
|
f1=0 |
|
else: |
|
f1=2*p*r/(p+r) |
|
rel_metrics[rel_type]=[round(p,4),round(r,4),round(f1,4)] |
|
if TP+FP==0: |
|
P=0 |
|
else: |
|
P=TP/(TP+FP) |
|
if TP+FN==0: |
|
R=0 |
|
else: |
|
R=TP/(TP+FN) |
|
if P+R==0: |
|
F1=0 |
|
else: |
|
F1=2*P*R/(P+R) |
|
P=round(P,4) |
|
R=round(R,4) |
|
F1=round(F1,4) |
|
print('mertics:\n',rel_metrics) |
|
print('\nTP,FP,FN:',TP,FP,FN) |
|
print('Overall P,R,F1:',P,R,F1) |
|
return [P,R,F1],rel_metrics |
|
|
|
def Rel_Evaluation_AIO_fn(prefile): |
|
fin=open(prefile,'r',encoding='utf-8') |
|
all_in=fin.read().strip().split('\n\n') |
|
fin.close() |
|
TP=0 |
|
FP=0 |
|
FN=0 |
|
for sentence in all_in: |
|
tokens=sentence.split('\n') |
|
for token in tokens: |
|
seg=token.split('\t') |
|
if seg[0]=='<GENE>': |
|
if seg[1].find('ARG2-')>=0: |
|
if seg[2]==seg[1]: |
|
TP+=1 |
|
elif seg[2].find('ARG2-')>=0: |
|
FP+=1 |
|
FN+=1 |
|
else: |
|
FN+=1 |
|
|
|
else: |
|
if seg[2].find('ARG2-')>=0: |
|
FP+=1 |
|
|
|
if TP+FP==0: |
|
P=0 |
|
else: |
|
P=TP/(TP+FP) |
|
if TP+FN==0: |
|
R=0 |
|
else: |
|
R=TP/(TP+FN) |
|
if P+R==0: |
|
F1=0 |
|
else: |
|
F1=2*P*R/(P+R) |
|
P=round(P,4) |
|
R=round(R,4) |
|
F1=round(F1,4) |
|
print('TP,FP,FN:',TP,FP,FN) |
|
print('P,R,F1:',P,R,F1) |
|
return [P,R,F1] |
|
|
|
def Rel_Evaluation_AIO_GC_fn(prefile): |
|
fin=open(prefile,'r',encoding='utf-8') |
|
all_in=fin.read().strip().split('\n\n') |
|
fin.close() |
|
TP=0 |
|
FP=0 |
|
FN=0 |
|
for sentence in all_in: |
|
tokens=sentence.split('\n') |
|
for token in tokens: |
|
seg=token.split('\t') |
|
if seg[0]=='<CHEMICAL>': |
|
if seg[1].find('ARG2-')>=0: |
|
if seg[2]==seg[1]: |
|
TP+=1 |
|
elif seg[2].find('ARG2-')>=0: |
|
FP+=1 |
|
FN+=1 |
|
else: |
|
FN+=1 |
|
|
|
else: |
|
if seg[2].find('ARG2-')>=0: |
|
FP+=1 |
|
|
|
if TP+FP==0: |
|
P=0 |
|
else: |
|
P=TP/(TP+FP) |
|
if TP+FN==0: |
|
R=0 |
|
else: |
|
R=TP/(TP+FN) |
|
if P+R==0: |
|
F1=0 |
|
else: |
|
F1=2*P*R/(P+R) |
|
P=round(P,4) |
|
R=round(R,4) |
|
F1=round(F1,4) |
|
print('TP,FP,FN:',TP,FP,FN) |
|
print('P,R,F1:',P,R,F1) |
|
return [P,R,F1] |
|
|
|
def office_evaluation(goldfile,prefile): |
|
fin_gold=open(goldfile,'r',encoding='utf-8') |
|
all_gold=fin_gold.read().strip().split('\n') |
|
fin_gold.close() |
|
fin_pre=open(prefile,'r',encoding='utf-8') |
|
all_pre=fin_pre.read().strip().split('\n') |
|
fin_pre.close() |
|
|
|
gold_result={} |
|
pre_result={} |
|
all_result={} |
|
for line in all_gold: |
|
seg=line.split('\t') |
|
if seg[1] not in all_result.keys(): |
|
all_result[seg[1]]=[0,0,0] |
|
if seg[1] not in gold_result.keys(): |
|
gold_result[seg[1]]=set() |
|
gold_result[seg[1]].add(line) |
|
else: |
|
gold_result[seg[1]].add(line) |
|
|
|
for line in all_pre: |
|
seg=line.split('\t') |
|
if seg[1] not in pre_result.keys(): |
|
pre_result[seg[1]]=set() |
|
pre_result[seg[1]].add(line) |
|
else: |
|
pre_result[seg[1]].add(line) |
|
|
|
for rel_type in gold_result.keys(): |
|
for gold_ele in gold_result[rel_type]: |
|
if rel_type not in pre_result.keys(): |
|
all_result[rel_type][2]+=1 |
|
else: |
|
if gold_ele in pre_result[rel_type]: |
|
all_result[rel_type][0]+=1 |
|
else: |
|
all_result[rel_type][2]+=1 |
|
if rel_type in pre_result.keys(): |
|
for pre_ele in pre_result[rel_type]: |
|
if pre_ele not in gold_result[rel_type]: |
|
all_result[rel_type][1]+=1 |
|
ave_f=0 |
|
TP,FP,FN=0,0,0 |
|
print(all_result) |
|
for rel_type in all_result.keys(): |
|
TP+=all_result[rel_type][0] |
|
FP+=all_result[rel_type][1] |
|
FN+=all_result[rel_type][2] |
|
tem_p,tem_r,tem_f=0,0,0 |
|
if all_result[rel_type][0]+all_result[rel_type][1]==0: |
|
tem_p=0 |
|
else: |
|
tem_p=all_result[rel_type][0]/(all_result[rel_type][0]+all_result[rel_type][1]) |
|
if all_result[rel_type][0]+all_result[rel_type][2]==0: |
|
tem_r=0 |
|
else: |
|
tem_r=all_result[rel_type][0]/(all_result[rel_type][0]+all_result[rel_type][2]) |
|
if tem_p+tem_r==0: |
|
tem_f=0 |
|
else: |
|
tem_f=2*tem_p*tem_r/(tem_p+tem_r) |
|
ave_f+=tem_f |
|
print('%s:p=%.4f,r=%.4f,f=%.4f' % (rel_type,tem_p,tem_r,tem_f)) |
|
|
|
if TP+FP==0: |
|
P=0 |
|
else: |
|
P=TP/(TP+FP) |
|
if TP+FN==0: |
|
R=0 |
|
else: |
|
R=TP/(TP+FN) |
|
if P+R==0: |
|
F1=0 |
|
else: |
|
F1=2*P*R/(P+R) |
|
ave_f+=tem_f |
|
|
|
print('Overall:') |
|
print('ave_f1:',ave_f/len(all_result)) |
|
print('TP=%d, FP=%d, FN=%d'%(TP,FP,FN)) |
|
print('P=%.4f, R=%.4f, F1=%.4f'%(P,R,F1)) |
|
|
|
|
|
if __name__=='__main__': |
|
path='//panfs/pan1/bionlplab/luol2/BC7DrugProt/results/' |
|
office_evaluation(path+'dev/dev_gold_relations.tsv',path+'drugprot_dev_LSTM-CRF-ES_pre.tsv') |
|
print('............') |
|
Rel_Evaluation_check('//panfs/pan1/bionlplab/luol2/BC7DrugProt/check/dev_pre_temp.conll') |
|
|