Spaces:
Running
Running
from langchain_community.document_loaders import PyPDFLoader | |
import os | |
from langchain_openai import ChatOpenAI | |
from langchain_chroma import Chroma | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_huggingface import HuggingFaceEndpoint | |
from setup.environment import default_model | |
from uuid import uuid4 | |
os.environ.get("OPENAI_API_KEY") | |
os.environ.get("HUGGINGFACEHUB_API_TOKEN") | |
vectorstore = Chroma( | |
collection_name="documents", | |
embedding_function=OpenAIEmbeddings() | |
) | |
allIds = [] | |
def getPDF(file_path): | |
documentId = 0 | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
loader = PyPDFLoader(file_path, extract_images=False) | |
pages = loader.load_and_split(text_splitter) | |
for page in pages: | |
print('\n\n\n') | |
print('allIds: ', allIds) | |
documentId = str(uuid4()) | |
allIds.append(documentId) | |
page.id = documentId | |
return pages | |
def create_retriever(documents): | |
print('\n\n') | |
print('documents: ', documents) | |
# vectorstore = Chroma.from_documents( | |
# documents, | |
# embedding=OpenAIEmbeddings(), | |
# ) | |
# vectorstore.delete_collection() | |
vectorstore.add_documents(documents=documents) | |
retriever = vectorstore.as_retriever( | |
# search_type="similarity", | |
# search_kwargs={"k": 3}, | |
) | |
return retriever | |
def create_prompt_llm_chain(system_prompt, modelParam): | |
if modelParam == default_model: | |
model = ChatOpenAI(model=modelParam) | |
else: | |
model = HuggingFaceEndpoint( | |
repo_id=modelParam, | |
task="text-generation", | |
max_new_tokens=100, | |
do_sample=False, | |
huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN") | |
) | |
# result = model.invoke("Hugging Face is") | |
# print('result: ', result) | |
system_prompt = system_prompt + "\n\n" + "{context}" | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", system_prompt), | |
("human", "{input}"), | |
] | |
) | |
question_answer_chain = create_stuff_documents_chain(model, prompt) | |
return question_answer_chain |