import gradio as gr import re from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer") model = AutoModelForSeq2SeqLM.from_pretrained("potsawee/t5-large-generation-squad-QuestionAnswer") def inference(input_text): if input_text is None: return "Please upload a text" input_ids = tokenizer.encode(input_text, return_tensors="pt") sentences = re.split(r'(?<=[.!?])', input_text) question_answer_pairs = [] for i, sentence in enumerate(sentences): input_ids_clone = tokenizer.encode(sentence, return_tensors="pt") outputs = model.generate(input_ids_clone, max_length=100, num_return_sequences=1) question_answer = tokenizer.decode(outputs[0], skip_special_tokens=True) question = question_answer.strip() question_answer_pairs.append((f"Question:", question)) result = '' for i in range(len(question_answer_pairs)): if question_answer_pairs[i][1] == '': break question_part = question_answer_pairs[i][1].split("?")[0] + "?" answer_part = question_answer_pairs[i][1].split("?")[1].strip() if answer_part not in input_text: break result += f"Question: {question_part}\nAnswer: {answer_part}\n\n" return result title = "Question Answer Pairs Generator" input_text = gr.Textbox(lines=4, label="Text:") interface = gr.Interface( fn=inference, inputs=[input_text], outputs= "text", title=title, ) interface.launch()