Spaces:
Build error
Build error
import streamlit as st | |
from textwrap3 import wrap | |
from flashtext import KeywordProcessor | |
import torch, random, nltk, string, traceback, sys, os, requests, datetime | |
import numpy as np | |
import pandas as pd | |
from transformers import T5ForConditionalGeneration,T5Tokenizer | |
import pke | |
from helper import postprocesstext, summarizer, get_nouns_multipartite, get_keywords,\ | |
get_question, get_related_word, get_final_option_list, load_raw_text | |
def set_seed(seed: int): | |
random.seed(seed) | |
np.random.seed(seed) | |
torch.manual_seed(seed) | |
torch.cuda.manual_seed_all(seed) | |
set_seed(42) | |
def load_model(): | |
nltk.download('punkt') | |
nltk.download('brown') | |
nltk.download('wordnet') | |
nltk.download('stopwords') | |
nltk.download('wordnet') | |
nltk.download('omw-1.4') | |
## summary_mod_name = os.environ["summary_mod_name"] | |
## question_mod_name = os.environ["question_mod_name"] | |
summary_mod_name = "mrm8488/t5-base-finetuned-summarize-news" | |
question_mod_name = "mrm8488/t5-base-finetuned-question-generation-ap" | |
summary_model = T5ForConditionalGeneration.from_pretrained(summary_mod_name) | |
summary_tokenizer = T5Tokenizer.from_pretrained(summary_mod_name) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
summary_model = summary_model.to(device) | |
question_model = T5ForConditionalGeneration.from_pretrained(question_mod_name) | |
question_tokenizer = T5Tokenizer.from_pretrained(question_mod_name) | |
question_model = question_model.to(device) | |
return summary_model, summary_tokenizer, question_tokenizer, question_model | |
from nltk.corpus import wordnet as wn | |
from nltk.tokenize import sent_tokenize | |
from nltk.corpus import stopwords | |
# def csv_downloader(df): | |
# res = df.to_csv(index=False,sep="\t").encode('utf-8') | |
# st.download_button( | |
# label="Download logs data as CSV separated by tab", | |
# data=res, | |
# file_name='df_quiz_log_file_v1.csv', | |
# mime='text/csv') | |
def load_file(): | |
"""Load text from file""" | |
uploaded_file = st.file_uploader("Paste text",type=['txt']) | |
if uploaded_file is not None: | |
if uploaded_file.type == "text/plain": | |
raw_text = str(uploaded_file.read(),"utf-8") | |
return raw_text | |
# Loading Model | |
summary_model, summary_tokenizer, question_tokenizer, question_model =load_model() | |
# App title and description | |
st.title("P's Prototye") | |
st.write("Get multiple choice questions from random facts") | |
# Load file | |
st.text("Disclaimer: This is early version. sorry if there's still bugs") | |
# Load file | |
default_text = load_raw_text() | |
raw_text = st.text_area("Enter text here - press Ctrl + enter to submit", default_text, height=100, max_chars=1000, ) | |
# raw_text = load_file() | |
start_time = str(datetime.datetime.now()) | |
if raw_text != None and raw_text != '': | |
summary_text = summarizer(raw_text,summary_model,summary_tokenizer) | |
ans_list = get_keywords(raw_text,summary_text) | |
#print("Ans list: {}".format(ans_list)) | |
questions = [] | |
option1=[] | |
option2=[] | |
option3=[] | |
option4=[] | |
for idx,ans in enumerate(ans_list): | |
#print("IDX: {}, ANS: {}".format(idx, ans)) | |
ques = get_question(summary_text,ans,question_model,question_tokenizer) | |
other_options = get_related_word(ans) | |
final_options, ans_index = get_final_option_list(ans,other_options) | |
option1.append(final_options[0]) | |
option2.append(final_options[1]) | |
option3.append(final_options[2]) | |
option4.append(final_options[3]) | |
if ques not in questions: | |
html_str = f""" | |
<div> | |
<p> | |
{idx+1}: <b> {ques} </b> | |
</p> | |
</div> | |
""" | |
html_str += f' <p style="color:Green;"><b> {final_options[0]} </b></p> ' if ans_index == 0 else f' <p><b> {final_options[0]} </b></p> ' | |
html_str += f' <p style="color:Green;"><b> {final_options[1]} </b></p> ' if ans_index == 1 else f' <p><b> {final_options[1]} </b></p> ' | |
html_str += f' <p style="color:Green;"><b> {final_options[2]} </b></p> ' if ans_index == 2 else f' <p><b> {final_options[2]} </b></p> ' | |
html_str += f' <p style="color:Green;"><b> {final_options[3]} </b></p> ' if ans_index == 3 else f' <p><b> {final_options[3]} </b></p> ' | |
html_str += f""" | |
""" | |
st.markdown(html_str , unsafe_allow_html=True) | |
st.markdown("-----") | |
questions.append(ques) | |
# st.dataframe(pd.read_csv(output_path,sep="\t").tail(5)) |