|
import os |
|
import gradio as gr |
|
from langchain.chains import ConversationalRetrievalChain |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.vectorstores import Chroma |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
|
|
vectordb = None |
|
|
|
|
|
def load_and_process_pdf(pdf_file, api_key): |
|
try: |
|
loader = PyPDFLoader(pdf_file.name) |
|
documents = loader.load() |
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) |
|
docs = text_splitter.split_documents(documents) |
|
|
|
|
|
embeddings = OpenAIEmbeddings(openai_api_key=api_key) |
|
vectordb = Chroma.from_documents(docs, embedding=embeddings, persist_directory="./data") |
|
return vectordb |
|
except Exception as e: |
|
return f"η‘ζ³θηζδ»ΆοΌι―θͺ€: {str(e)}" |
|
|
|
|
|
def handle_query(api_key, user_message, pdf_vectordb, chat_history): |
|
if not pdf_vectordb or isinstance(pdf_vectordb, str): |
|
return chat_history, "θ«ε
δΈε³ζζη PDF ζδ»Άγ" |
|
|
|
try: |
|
retriever = pdf_vectordb.as_retriever(search_kwargs={"k": 5}) |
|
pdf_qa_chain = ConversationalRetrievalChain.from_llm( |
|
ChatOpenAI(temperature=0.7, model_name="gpt-4", openai_api_key=api_key), |
|
retriever=retriever |
|
) |
|
result = pdf_qa_chain({"question": user_message, "chat_history": chat_history}) |
|
answer = result.get("answer", "ζ±ζοΌζη‘ζ³ζδΎηζ‘γ") |
|
chat_history.append((user_message, answer)) |
|
return chat_history, answer |
|
except Exception as e: |
|
return chat_history, f"εΊηΎι―θͺ€: {str(e)}" |
|
|
|
|
|
def process_pdf_upload(api_key, pdf_file): |
|
global vectordb |
|
vectordb = load_and_process_pdf(pdf_file, api_key) |
|
if isinstance(vectordb, str): |
|
return vectordb |
|
return "ι£θε·²ζεθΌε
₯οΌθ«ιε§ι»θγ" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## π ειε€§ε»ζδ½ εθ ") |
|
|
|
with gr.Row(): |
|
api_key = gr.Textbox(label="π θ«θΌΈε
₯ζ¨η OpenAI API Key", type="password") |
|
pdf_file = gr.File(label="π δΈε³ PDF ι£θ", file_types=[".pdf"]) |
|
|
|
chatbot = gr.Chatbot(label="π¬ θ倩ε") |
|
state = gr.State([]) |
|
|
|
with gr.Row(): |
|
user_input = gr.Textbox(show_label=False, placeholder="θ«θΌΈε
₯ει‘...", lines=2) |
|
send_btn = gr.Button("ζδΊ€ει‘") |
|
|
|
response = gr.Textbox(label="π’ εζ", interactive=False, lines=4) |
|
|
|
pdf_file.change(process_pdf_upload, inputs=[api_key, pdf_file], outputs=response) |
|
|
|
def handle_user_input(api_key, user_message, chat_history): |
|
if not vectordb: |
|
return chat_history, "θ«ε
δΈε³ PDF ι£θγ" |
|
if user_message.strip().lower() == "θ¬θ¬": |
|
return chat_history, "ε ζ²Ή~~~" |
|
return handle_query(api_key, user_message, vectordb, chat_history) |
|
|
|
send_btn.click(handle_user_input, inputs=[api_key, user_input, state], outputs=[chatbot, response]) |
|
user_input.submit(handle_user_input, inputs=[api_key, user_input, state], outputs=[chatbot, response]) |
|
|
|
demo.launch() |