Spaces:
Running
Running
import PyPDF2 | |
import chromadb | |
import streamlit as st | |
from langchain_openai import OpenAI | |
# Function to extract text from PDF | |
def extract_text_from_pdf(pdf_path): | |
text = "" | |
with open(pdf_path, "rb") as file: | |
pdf_reader = PyPDF2.PdfFileReader(file) | |
for page_num in range(pdf_reader.numPages): | |
text += pdf_reader.getPage(page_num).extractText() | |
return text | |
# Function to create chunks of text | |
def create_text_chunks(text, chunk_size=1000, overlap_size=100): | |
chunks = [] | |
for i in range(0, len(text), chunk_size - overlap_size): | |
chunks.append(text[i:i + chunk_size]) | |
return chunks | |
# Function to save chunks to chromadb vector database | |
def save_to_chromadb(chunks, quiz_name, quiz_topic): | |
# Assume you have a ChromaDB instance named 'db' | |
db = chromadb.ChromaDB("your_chromadb_url") | |
for i, chunk in enumerate(chunks): | |
vector = langchain_openai.get_vector(chunk) | |
db.add_vector(quiz_name, quiz_topic, i, vector) | |
# Function to generate questions using ChatGPT-3.5-turbo-16k | |
def generate_questions(topic): | |
prompt = f"Generate questions on the topic: {topic}" | |
response = langchain_openai.complete(prompt) | |
return response.choices[0].text.strip() | |
# Streamlit interface | |
def main(): | |
st.title("Quiz Generator") | |
# User inputs | |
quiz_name = st.text_input("Enter Quiz Name:") | |
quiz_topic = st.text_input("Enter Quiz Topic:") | |
num_questions = st.number_input("Number of Questions:", value=5, min_value=1) | |
pdf_path = st.file_uploader("Upload PDF File:", type=["pdf"]) | |
if pdf_path: | |
# Extract text from PDF | |
pdf_text = extract_text_from_pdf(pdf_path) | |
# Create and save text chunks to ChromaDB | |
text_chunks = create_text_chunks(pdf_text) | |
save_to_chromadb(text_chunks, quiz_name, quiz_topic) | |
# User input for query | |
user_query = st.text_input("Enter Query for Question Generation:") | |
# Search for the topic in the vector database | |
if quiz_topic in db.get_topics(quiz_name): | |
# Generate questions using ChatGPT-3.5-turbo-16k | |
generated_questions = generate_questions(user_query) | |
st.subheader("Generated Questions:") | |
st.write(generated_questions) | |
else: | |
st.warning("Specified topic not found in the document.") | |
if __name__ == "__main__": | |
main() | |