Spaces:
Runtime error
Runtime error
File size: 3,027 Bytes
8cc757b c2e898c 32a276c e4cf30a 3d99012 a5f44ac 3d99012 e5f4b72 3d99012 e5f4b72 3d99012 8cc757b c2e898c e4cf30a 03c65d9 8cc757b e4cf30a b97ca50 e4cf30a 5c947fb f3c6f29 4c3c133 e4cf30a d4fb40d e4cf30a c2e898c e4cf30a 7c2b679 c2e898c 5d9a931 c2e898c 7935f47 5d9a931 3d99012 7935f47 3d99012 7935f47 395f535 5d9a931 d4fb40d e4cf30a d4fb40d 32a276c 03c65d9 e4cf30a 78f41eb 0b6ce8b ef4afbb d4fb40d 67b3ac5 ef4afbb 03c65d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import gradio as gr
import pysbd
from transformers import pipeline
from sentence_transformers import CrossEncoder
from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
model_name = "MaRiOrOsSi/t5-base-finetuned-question-answering"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelWithLMHead.from_pretrained(model_name)
#from transformers import pipeline
#text2text_generator = pipeline("text2text-generation", model = "gpt2")
sentence_segmenter = pysbd.Segmenter(language='en',clean=False)
passage_retreival_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
qa_model = pipeline("question-answering",'a-ware/bart-squadv2')
def fetch_answers(question, clincal_note ):
clincal_note_paragraphs = clincal_note.splitlines()
query_paragraph_list = [(question, para) for para in clincal_note_paragraphs if len(para.strip()) > 0 ]
scores = passage_retreival_model.predict(query_paragraph_list)
top_5_indices = scores.argsort()[-5:]
top_5_query_paragraph_list = [query_paragraph_list[i] for i in top_5_indices ]
top_5_query_paragraph_list.reverse()
top_5_query_paragraph_answer_list = ""
count = 1
for query, passage in top_5_query_paragraph_list:
passage_sentences = sentence_segmenter.segment(passage)
answer = qa_model(question = query, context = passage)['answer']
evidence_sentence = ""
for i in range(len(passage_sentences)):
if answer.startswith('.') or answer.startswith(':'):
answer = answer[1:].strip()
if answer in passage_sentences[i]:
evidence_sentence = evidence_sentence + " " + passage_sentences[i]
model_input = f"question: {query} context: {evidence_sentence}"
#output_answer = text2text_generator(model_input)[0]['generated_text']
encoded_input = tokenizer([model_input],
return_tensors='pt',
max_length=512,
truncation=True)
output = model.generate(input_ids = encoded_input.input_ids,
attention_mask = encoded_input.attention_mask)
output_answer = tokenizer.decode(output[0], skip_special_tokens=True)
result_str = "# ANSWER "+str(count)+": "+ output_answer +"\n"
result_str = result_str + "REFERENCE: "+ evidence_sentence + "\n\n"
top_5_query_paragraph_answer_list += result_str
count+=1
return top_5_query_paragraph_answer_list
demo = gr.Interface(
fn=fetch_answers,
#take input as real time audio and use OPENAPI whisper for S2T
#clinical note upload as file (.This is an example of simple text. or doc/docx file)
inputs=[gr.Textbox(lines=2, label='Question', show_label=True),
gr.Textbox(lines=10, label='Document Text', show_label=True)],
outputs="markdown",
examples='.',
title='Document Question Answering System with Evidence from document'
)
demo.launch()
|