Spaces:

Atif20024
/

doc_quizzer

Runtime error

App Files Files Community

Atif20024 commited on Mar 17, 2024

Commit

d67d72b

verified ·

1 Parent(s): be19fe3

Adding all basic files

Browse files

Files changed (5) hide show

app.py +172 -0
doc_loading.py +51 -0
llm_functions.py +77 -0
requirements.txt +11 -0
utils.py +28 -0

app.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import streamlit as st
+from doc_loading import get_article_text, read_pdf_text
+from utils import get_topics
+from llm_functions import generate_qa_pairs, evaluate_answer, get_conversational_chain, get_topics_from_chunk
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_openai import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain_openai import ChatOpenAI
+from dotenv import load_dotenv
+load_dotenv()
+# Setting up basics
+st.set_page_config(page_title="LLM UC")
+st.header("Let's check what you know")
+# Greeting the user
+st.write("Welcome!!")
+st.session_state
+# Sidebar with selectbox
+if "option" not in st.session_state:
+    option = st.selectbox("How are you going to input your document?",
+             ("Upload PDF", "Blog link", "YouTube Link", "Paste copied article"))
+# Conditionally show components based on user's choice
+file_name = ""
+main_text = ""
+if option == "Upload PDF":
+    uploaded_file = st.file_uploader("Choose a PDF file", type=["pdf"], key="uploading_pdf")
+    if uploaded_file is not None:
+        # Process the uploaded file
+        st.write("PDF uploaded successfully!, we will read only first 5 pages")
+        main_text = read_pdf_text(uploaded_file)
+elif option == "Paste copied article":
+    main_text = st.text_area("Paste your article here", key="article_key")
+    if st.button('Submit'):
+        # Process the pasted text
+        st.write("Text submitted successfully!")
+else:
+    link_input = st.text_input("Paste your blog/youtube link here.", key='link_key')
+    if st.button('Submit'):
+        # Process the link
+        st.write("link submitted")
+        if option == "YouTube Link":
+            st.write("This functionality in under construction.")
+            main_text = ""
+        else:
+            # lets try now
+            article_text = get_article_text(link_input)
+            if article_text:
+                main_text = ""
+                st.write("This functionality in under construction.")
+            else:
+                st.write("Unable to fetch text from url. Can you please check the link?")
+# Show a warning if the user hasn't selected an option or if the uploaded file is not a PDF
+if option == "Upload PDF":
+    if uploaded_file is not None and uploaded_file.type != "application/pdf":
+        st.error("Please choose a PDF file only.")
+if "total_text" not in st.session_state:
+    st.session_state['total_text'] = main_text
+if len(main_text) > 0:
+    # creating chunks of the given article
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
+        chunk_overlap=100,
+        length_function=len,
+        is_separator_regex=False
+    )
+    texts = text_splitter.create_documents([main_text])
+    # building vector database
+    embeddings = OpenAIEmbeddings()
+    # store in vector db
+    db = FAISS.from_documents(texts, embeddings)
+    hash_name = f"{option.replace(' ', '-')}"
+    db.save_local(f'faiss_{hash_name}_index')
+    # Create a toggle button
+    toggle_button_state = st.checkbox("check this, if i should quiz you")
+    # Display a message based on the toggle button state
+    if toggle_button_state:
+        # make the quiz
+        st.write("Quiz incoming...")
+        # give selection of toughness
+        toughness_selection = None
+        while "toughness_selection" not in st.session_state:
+            toughness_selection = st.selectbox("Select the question toughness",
+                                    ("Easy", "Moderate", "Tough"))
+            st.session_state['toughness_selection'] = toughness_selection
+        top_topics = None
+        if "top_topics" not in st.session_state:
+            topics_chain = get_topics_from_chunk()
+            top_topics = get_topics(texts[:10], topics_chain)
+            top_topics.append("Any")
+            st.session_state['top_topics'] = top_topics
+        topic_selection = None
+        if "topic_selection" not in st.session_state:
+            top_topics = st.session_state['top_topics']
+            topic_selection = st.selectbox("Select the topic i should quiz from!!",
+                                    tuple(top_topics), key="topic_selection")
+            st.session_state['topics_selection'] = topic_selection
+            toughness_selection = st.session_state['toughness_selection']
+        if "response" not in st.session_state:
+            ques_chain = generate_qa_pairs()
+            topic_selection = st.session_state['topic_selection']
+            toughness_selection = st.session_state['toughness_selection']
+            # st.write(f"here we go, a {toughness_selection} level question from {topic_selection} topic.")
+            docs_for_questions = db.similarity_search(topic_selection, k=5)
+            response = ques_chain.invoke({"context": docs_for_questions,
+                    "topic": topic_selection,
+                    'toughness': toughness_selection})
+            st.session_state['response'] = response[0]
+        if "scoring" not in st.session_state:
+            eval_chain = evaluate_answer()
+            response = st.session_state['response']
+            st.write(f"\n Question: {response['question']}")
+            user_answer = st.text_input(f"Answer here: ", key="my_ans")
+            if st.button(f"Evaluate"):
+                score = eval_chain({"context": response['answer'],
+                                "answer": user_answer})
+                st.write(f"You scored {score['score']}/10")
+                if int(score['score'])<6:
+                    st.write(f"The correct answer would be: {response['answer']}")
+                else:
+                    st.write("Good Job!!!")
+                st.session_state['scoring'] = score['score']
+            elif st.button("Don't know"):
+                st.write(f"The correct answer would be: {response['answer']}")
+    else:
+        st.write("What's your question?")
+        # let the user ask question.
+        if "input_question" not in st.session_state or st.session_state['input_question'] == "":
+            input_question = st.text_input("Here, input your question and click `Answer this`", key="Question")
+            st.session_state['input_question'] = input_question
+        answer_button = st.button('Answer this', key='my_question')
+        if answer_button:
+            input_question = st.session_state['input_question']
+            docs = db.similarity_search(input_question, k=5)
+            chain = get_conversational_chain()
+            response = chain({"input_documents" : docs,
+                            "question": input_question,
+                                })
+            st.write(response['output_text'])
+            del st.session_state['input_question']

doc_loading.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import requests
+from bs4 import BeautifulSoup
+# Use 1: when people want to share a webpage and ask you to generate question from here
+def get_article_text(url):
+    # Fetch the webpage content
+    response = requests.get(url)
+    if response.status_code != 200:
+        print(f"Failed to fetch the webpage. Status code: {response.status_code}")
+        return None
+    # Parse the HTML content
+    soup = BeautifulSoup(response.content, 'html.parser')
+    # Find the main article body
+    article_body = soup.find('body')
+    # Extract text from the body
+    article_text = article_body.get_text()
+    return article_text
+# Use 2. When people want to upload a pdf and ask you to generate question from it
+import os
+from PyPDF2 import PdfReader
+# pdf_directory = os.getcwd()
+# # Opening the pdf
+# for filename in os.listdir(pdf_directory):
+#     if filename.endswith(".pdf"):
+#         pdf_file_name = filename
+#         pdf_file = open(os.path.join(pdf_directory, filename), "rb")
+def read_pdf_text(pdf_file):
+    # Reading the pdf
+    pdf_reader = PdfReader(pdf_file)
+    all_text = ""
+    # make it limited. min(5, len(pages))
+    for idx, page in enumerate(pdf_reader.pages):
+        all_text += page.extract_text()
+        if idx > 4:
+            break
+    return all_text
+# print(all_text)
+# Use 3. let the user input an story to generate questions from
+# Use 4. let the use share a video link to generate questions from

llm_functions.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from langchain.chains.question_answering import load_qa_chain
+from langchain_openai import ChatOpenAI
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from langchain_core.output_parsers import JsonOutputParser
+import streamlit as st
+def get_conversational_chain():
+    prompt_template = """Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is present in the document
+    feel free to say, "try ansking something else, this information is not available", don't provide the wrong answer no matter what is present in the question\n\n
+    Context:\n {context}?\n
+    Question: \n{question}\n
+    Answer:
+    """
+    model = ChatOpenAI(temperature=0.7)
+    prompt = PromptTemplate(template=prompt_template,
+                            input_variables=["context", "Question"])
+    chain = load_qa_chain(model, chain_type="stuff", prompt = prompt)
+    return chain
+def get_topics_from_chunk():
+    prompt_template = """
+            I will give a context, and you have to tell me what top 3 topics the text might belong to.
+            if you unable to find any, you can respond with <no_topic>, but dont output any rubbish topics.
+            do not write anything other than the topics names. also, give the topics in a comma separted way.\n\n
+            context:\n{context}\n
+            Answer:
+            """
+    model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.5)
+    prompt = PromptTemplate(template=prompt_template,
+                            input_variables=['context'])
+    response = LLMChain(llm=model, prompt=prompt, output_key='top_topics')
+    return response
+def generate_qa_pairs():
+    prompt_template = """
+        Given a context, i want you to generate 1 question of toughness level: {toughness} out of these three levels
+        Easy, Moderate and Tough. The question must belong to topic: {topic}.
+        Make sure the answer to the question you generate belong to the context provided.
+        give me the list of question answer pair in json format, with each element of list containing a dict with keys "question" and "answer".\n\n
+        context:\n{context}\n
+        """
+    model = ChatOpenAI(model='gpt-3.5-turbo', temperature=1)
+    parser = JsonOutputParser()
+    prompt = PromptTemplate(template = prompt_template,
+                            input_variables=['context', 'topic', 'toughness'],
+                            partial_variables={'format_instructions': parser.get_format_instructions()})
+    chain = prompt | model | parser
+    return chain
+def evaluate_answer():
+    prompt_template = """
+        You are good teacher, suppose there are two texts, 1. real_answer  and 2. user_answer. i want you to score the user_answer based on real_answer
+        you have to give integer score. max score can be 10, and min score can be 0. Be lenient in scoring,
+        but if someone give rubbish answer/out of context answer, feel free to give 0 score.
+        give final output as an integer value only. we dont want an explanation from you.
+        real_answer:\n{context}\n
+        user_answer:\n{answer}\n
+        score:
+    """
+    model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.2)
+    prompt = PromptTemplate(template=prompt_template,
+                            input_variables=['context', 'answer'])
+    response = LLMChain(llm=model, prompt=prompt, output_key = "score")
+    return response

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+langchain
+openai
+langchain_openai
+PyPDF2
+python-dotenv
+requests
+beautifulsoup4
+faiss-cpu
+random
+streamlit
+counter

utils.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from collections import Counter
+import streamlit as st
+def get_topics(texts, chain):
+    all_topics = []
+    for t in texts:
+        response = chain(t.page_content)
+        response = response['top_topics'].split(", ")
+        all_topics.extend([x.strip() for x in response])
+    most_common_words = Counter([x.lower() for  x in all_topics]).most_common(3)
+    most_common_words_without_count = [word for word, _ in most_common_words]
+    return most_common_words_without_count
+# def question_answering(response, eval_chain):
+#     for i in range(len(response)):
+#         st.write(f"\n Question {i+1}: {response[i]['question']}")
+#         user_answer = st.text_input(f"Answer {i+1} here: ")
+#         if st.button(f"Evaluate {i+1}"):
+#             score = eval_chain({"context": response[i]['answer'],
+#                             "answer": user_answer})
+#             st.write(f"You scored {score['score']}/10")
+#             if int(score['score'])<6:
+#                 st.write(f"The correct answer would be: {response[i]['answer']}")
+#             else:
+#                 st.write("Good Job!!!")