File size: 2,630 Bytes
9ce8688
 
 
 
 
 
 
 
 
 
 
 
9d8a166
9ce8688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba5d229
9ce8688
ba5d229
9ce8688
 
 
ba5d229
9ce8688
 
 
 
 
 
 
 
 
 
ba5d229
9ce8688
ba5d229
9ce8688
 
 
 
a3a678d
9ce8688
 
 
 
 
 
 
 
 
 
 
 
 
 
b532178
9ce8688
 
 
 
ba5d229
9ce8688
ba5d229
9ce8688
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
import pandas as pd


from transformers import pipeline

# model_name="aminghias/distilbert-base-uncased-finetuned-imdb"

# mask_filler = pipeline(
#     "fill-mask", model=model_name
# )

pipe = pipeline("fill-mask", model="aminghias/Clinical-BERT-finetuned")
pipe2 = pipeline("fill-mask", model="emilyalsentzer/Bio_ClinicalBERT")
pipe3= pipeline("fill-mask", model="medicalai/ClinicalBERT")





def predict(text):

    pred1 = pipe(text)
    pred2 = pipe2(text)
    pred3= pipe3(text)
    
    df_sum=pd.DataFrame(pred1)
    
    df_sum
    df_sum['score_finetuned_CBERT']=df_sum['score']
    df_sum2=pd.DataFrame(pred2)
    df_sum2['score_Bio_CBERT']=df_sum2['score']
    df_sum2

    df_sum3= pd.DataFrame(pred3)
    df_sum3['score_CBERT']=df_sum3['score']
    
    # # join the two dataframes on token do outer join
    
    df_join=pd.merge(df_sum,df_sum2,on='token_str',how='outer')
    df_join=pd.merge(df_sum3,df_join,on='token_str',how='outer')
    
    df_join
    df_join['sum_sequence']=df_join['sequence_x'].fillna(df_join['sequence_y'])
    df_join['sum_sequence']=df_join['sum_sequence'].fillna(df_join['sequence'])
    df_join=df_join.fillna(0)
    df_join['score_average']=(df_join['score_finetuned_CBERT']+df_join['score_Bio_CBERT']+df_join['score_CBERT'])/3
    
    df_join=df_join.sort_values(by='score_average',ascending=False)
    df_join=df_join.reset_index(drop=True)
    # df_join=df_join.dropna()
    # df_join=df_join.fillna(0)
    df=df_join.copy()
    df_join=df_join[['token_str','score_average','score_finetuned_CBERT','score_Bio_CBERT','score_CBERT']].head()

    # gr.Interface(fn=lambda: df_join, inputs=None, outputs=gr.Dataframe(headers=df_join.columns)).launch()
    
    # print(df_join)
    # df_join['sum_sequence'][0]
    return (df['sum_sequence'][0],df_join)

    
  # return (pipe(text)[0]['sequence'],pipe2(text)[0]['sequence'])
    
demo = gr.Interface(
  fn=predict, 
  inputs='text',
  # outputs='text',
  outputs=['text', gr.Dataframe()],
    # outputs='text','text',

     # outputs=gr.Dataframe(headers=['title', 'author', 'text']), allow_flagging='never')

  title="Filling Missing Clinical/Medical Data ",
    examples=[ ['The  high blood pressure was due to [MASK]  which is critical.'],
              ['The  patient is suffering from throat infection causing [MASK] and cough.']
             ],
    description="This application fills any missing words in the medical domain",
  # fn=lambda: df, inputs=None, outputs=gr.Dataframe(headers=df_join.columns)
 # fn = infer, inputs = inputs, outputs = outputs, examples = [[df_join.head()]]
)

demo.launch()