File size: 3,297 Bytes
ce8d30d 9727059 ce8d30d 63b5b71 9727059 63b5b71 9727059 63b5b71 9185bb7 63b5b71 9727059 63b5b71 9185bb7 63b5b71 9185bb7 63b5b71 9185bb7 ce8d30d 63b5b71 ce8d30d 63b5b71 ce8d30d 63b5b71 9185bb7 63b5b71 69b6673 9185bb7 63b5b71 ce8d30d 69b6673 9727059 63b5b71 69b6673 9727059 63b5b71 9727059 63b5b71 9727059 63b5b71 9727059 63b5b71 ce8d30d 63b5b71 69b6673 63b5b71 ce8d30d 63b5b71 ce8d30d 63b5b71 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import os
import gradio as gr
from langchain.chains import ConversationalRetrievalChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
vectordb = None
# θη PDF ζδ»ΆδΈ¦εε§εειζΈζεΊ«
def load_and_process_pdf(pdf_file, api_key):
try:
loader = PyPDFLoader(pdf_file.name)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)
# ε³ι API Key
embeddings = OpenAIEmbeddings(openai_api_key=api_key)
vectordb = Chroma.from_documents(docs, embedding=embeddings, persist_directory="./data")
return vectordb
except Exception as e:
return f"η‘ζ³θηζδ»ΆοΌι―θͺ€: {str(e)}"
# ζ₯θ©’θηε½ζΈ
def handle_query(api_key, user_message, pdf_vectordb, chat_history):
if not pdf_vectordb or isinstance(pdf_vectordb, str):
return chat_history, "θ«ε
δΈε³ζζη PDF ζδ»Άγ"
try:
retriever = pdf_vectordb.as_retriever(search_kwargs={"k": 5})
pdf_qa_chain = ConversationalRetrievalChain.from_llm(
ChatOpenAI(temperature=0.7, model_name="gpt-4", openai_api_key=api_key),
retriever=retriever
)
result = pdf_qa_chain({"question": user_message, "chat_history": chat_history})
answer = result.get("answer", "ζ±ζοΌζη‘ζ³ζδΎηζ‘γ")
chat_history.append((user_message, answer))
return chat_history, answer
except Exception as e:
return chat_history, f"εΊηΎι―θͺ€: {str(e)}"
# ηΆη¨ζΆδΈε³ PDF ζθη
def process_pdf_upload(api_key, pdf_file):
global vectordb
vectordb = load_and_process_pdf(pdf_file, api_key)
if isinstance(vectordb, str):
return vectordb
return "ι£θε·²ζεθΌε
₯οΌθ«ιε§ι»θγ"
# δΈ»η¨εΌ - Gradio δ»ι’
with gr.Blocks() as demo:
gr.Markdown("## π ειε€§ε»ζδ½ εθ ")
with gr.Row():
api_key = gr.Textbox(label="π θ«θΌΈε
₯ζ¨η OpenAI API Key", type="password")
pdf_file = gr.File(label="π δΈε³ PDF ι£θ", file_types=[".pdf"])
chatbot = gr.Chatbot(label="π¬ θ倩ε")
state = gr.State([])
with gr.Row():
user_input = gr.Textbox(show_label=False, placeholder="θ«θΌΈε
₯ει‘...", lines=2)
send_btn = gr.Button("ζδΊ€ει‘")
response = gr.Textbox(label="π’ εζ", interactive=False, lines=4)
pdf_file.change(process_pdf_upload, inputs=[api_key, pdf_file], outputs=response)
def handle_user_input(api_key, user_message, chat_history):
if not vectordb:
return chat_history, "θ«ε
δΈε³ PDF ι£θγ"
if user_message.strip().lower() == "θ¬θ¬":
return chat_history, "ε ζ²Ή~~~"
return handle_query(api_key, user_message, vectordb, chat_history)
send_btn.click(handle_user_input, inputs=[api_key, user_input, state], outputs=[chatbot, response])
user_input.submit(handle_user_input, inputs=[api_key, user_input, state], outputs=[chatbot, response])
demo.launch() |