Spaces:
Running
Running
File size: 2,389 Bytes
bba13a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import PyPDF2
import chromadb
import streamlit as st
from langchain_openai import OpenAI
# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
text = ""
with open(pdf_path, "rb") as file:
pdf_reader = PyPDF2.PdfFileReader(file)
for page_num in range(pdf_reader.numPages):
text += pdf_reader.getPage(page_num).extractText()
return text
# Function to create chunks of text
def create_text_chunks(text, chunk_size=1000, overlap_size=100):
chunks = []
for i in range(0, len(text), chunk_size - overlap_size):
chunks.append(text[i:i + chunk_size])
return chunks
# Function to save chunks to chromadb vector database
def save_to_chromadb(chunks, quiz_name, quiz_topic):
# Assume you have a ChromaDB instance named 'db'
db = chromadb.ChromaDB("your_chromadb_url")
for i, chunk in enumerate(chunks):
vector = langchain_openai.get_vector(chunk)
db.add_vector(quiz_name, quiz_topic, i, vector)
# Function to generate questions using ChatGPT-3.5-turbo-16k
def generate_questions(topic):
prompt = f"Generate questions on the topic: {topic}"
response = langchain_openai.complete(prompt)
return response.choices[0].text.strip()
# Streamlit interface
def main():
st.title("Quiz Generator")
# User inputs
quiz_name = st.text_input("Enter Quiz Name:")
quiz_topic = st.text_input("Enter Quiz Topic:")
num_questions = st.number_input("Number of Questions:", value=5, min_value=1)
pdf_path = st.file_uploader("Upload PDF File:", type=["pdf"])
if pdf_path:
# Extract text from PDF
pdf_text = extract_text_from_pdf(pdf_path)
# Create and save text chunks to ChromaDB
text_chunks = create_text_chunks(pdf_text)
save_to_chromadb(text_chunks, quiz_name, quiz_topic)
# User input for query
user_query = st.text_input("Enter Query for Question Generation:")
# Search for the topic in the vector database
if quiz_topic in db.get_topics(quiz_name):
# Generate questions using ChatGPT-3.5-turbo-16k
generated_questions = generate_questions(user_query)
st.subheader("Generated Questions:")
st.write(generated_questions)
else:
st.warning("Specified topic not found in the document.")
if __name__ == "__main__":
main()
|