rohan13's picture
msg fix
6009c1f
raw
history blame
18.7 kB
import os
import pickle
import langchain
import faiss
from langchain import HuggingFaceHub
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredHTMLLoader
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings
from langchain.memory import ConversationBufferWindowMemory
from langchain.prompts.chat import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores.faiss import FAISS
from langchain.cache import InMemoryCache
# Create a dict of filenames and their corresponding url links to be used as source in line #203
file_url_mapping = {
'docs/01_course-orientation/01_about-the-course/01_introduction-to-the-3d-printing-specialization.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/mLvWU/introduction-to-the-3d-printing-specialization',
'docs/01_course-orientation/01_about-the-course/02_welcome-to-the-3d-printing-revolution.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/zQ7lG/welcome-to-the-3d-printing-revolution',
'docs/03_module-2-why-is-it-revolutionary/03_the-3d-printing-revolution-facts-concepts/02_how-will-3d-printing-change-business.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/D8VUj/how-will-3d-printing-change-business',
'docs/03_module-2-why-is-it-revolutionary/03_the-3d-printing-revolution-facts-concepts/01_whats-special-about-3d-printing.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/WmJQL/whats-special-about-3d-printing',
'docs/03_module-2-why-is-it-revolutionary/03_the-3d-printing-revolution-facts-concepts/04_remixing-products-exercise-overview.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/J3Tq3/remixing-products-exercise-overview',
'docs/03_module-2-why-is-it-revolutionary/03_the-3d-printing-revolution-facts-concepts/03_the-future-of-3d-printing.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/tjSlh/the-future-of-3d-printing',
'docs/03_module-2-why-is-it-revolutionary/02_an-early-look-at-the-coming-revolution/01_tour-of-the-illinois-makerlab-vishal-sachdev.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/rcVnN/tour-of-the-illinois-makerlab-vishal-sachdev',
'docs/03_module-2-why-is-it-revolutionary/02_an-early-look-at-the-coming-revolution/02_meet-the-makers-arielle-rausin-cameron-alberg-scott-zelman.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/J2nD2/meet-the-makers-arielle-rausin-cameron-alberg-scott-zelman',
'docs/03_module-2-why-is-it-revolutionary/04_the-revolutionaries/03_shapeways-lauren-slowik.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/mi1zr/shapeways-lauren-slowik',
'docs/03_module-2-why-is-it-revolutionary/04_the-revolutionaries/04_3d-fashion-francis-bitonti.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/KmYxf/3d-fashion-francis-bitonti',
'docs/03_module-2-why-is-it-revolutionary/04_the-revolutionaries/06_whats-next-hod-lipson.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/3zKRN/whats-next-hod-lipson',
'docs/03_module-2-why-is-it-revolutionary/04_the-revolutionaries/05_3d-printed-battery-paul-braun.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/mrEAK/3d-printed-battery-paul-braun',
'docs/03_module-2-why-is-it-revolutionary/04_the-revolutionaries/02_normal-nikki-kaufman.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/I8QXq/normal-nikki-kaufman',
'docs/03_module-2-why-is-it-revolutionary/04_the-revolutionaries/01_voodoo-manufacturing-max-friefeld.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/atrTP/voodoo-manufacturing-max-friefeld',
'docs/04_course-conclusion/01_course-wrap-up-whats-next/01_the-3d-printing-revolution-wrap-up.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/f6XvX/the-3d-printing-revolution-wrap-up',
'docs/04_course-conclusion/01_course-wrap-up-whats-next/03_gies-online-programs.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/JdvZk/gies-online-programs',
'docs/02_module-1-what-is-3d-printing/05_optional-content/01_like-this-course-learn-more-with-the-imba-optional.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/MIATt/like-this-course-learn-more-with-the-imba-optional',
'docs/02_module-1-what-is-3d-printing/03_3d-printing-facts-concepts/04_where-designs-come-from.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/0fU1x/where-designs-come-from',
'docs/02_module-1-what-is-3d-printing/03_3d-printing-facts-concepts/05_where-to-find-3d-printers.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/3XTPj/where-to-find-3d-printers',
'docs/02_module-1-what-is-3d-printing/03_3d-printing-facts-concepts/01_history-of-3d-printing.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/HMvuh/history-of-3d-printing',
'docs/02_module-1-what-is-3d-printing/03_3d-printing-facts-concepts/02_how-3d-printers-work.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/dNpvw/how-3d-printers-work',
'docs/02_module-1-what-is-3d-printing/03_3d-printing-facts-concepts/03_materials-costs.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/j63D2/materials-costs',
'docs/02_module-1-what-is-3d-printing/03_3d-printing-facts-concepts/06_3d-printing-applications.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/EJdi9/3d-printing-applications',
'docs/02_module-1-what-is-3d-printing/04_more-3d-printing-insights/03_selective-laser-melting-rodrigo-gutierrez.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/kkpwz/selective-laser-melting-rodrigo-gutierrez',
'docs/02_module-1-what-is-3d-printing/04_more-3d-printing-insights/04_3d-printing-ecosystem-aaron-roy.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/2zjzy/3d-printing-ecosystem-aaron-roy',
'docs/02_module-1-what-is-3d-printing/04_more-3d-printing-insights/01_3d-printing-demonstration-danny-lohan.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/5b8IG/3d-printing-demonstration-danny-lohan',
'docs/02_module-1-what-is-3d-printing/04_more-3d-printing-insights/02_3d-printing-vs-additive-manufacturing-mark-cotteleer.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/qBvTb/3d-printing-vs-additive-manufacturing-mark-cotteleer',
'docs/02_module-1-what-is-3d-printing/02_3d-printing-insights/02_my-3d-printing-story-aric-rindfleisch.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/hhWc8/my-3d-printing-story-aric-rindfleisch',
'docs/02_module-1-what-is-3d-printing/02_3d-printing-insights/04_3d-printing-the-maker-movement-hackerspaces-chris-meyer.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/8HO88/3d-printing-the-maker-movement-hackerspaces-chris-meyer',
'docs/02_module-1-what-is-3d-printing/02_3d-printing-insights/06_what-would-you-make-exercise-overview.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/kfDp2/what-would-you-make-exercise-overview',
'docs/02_module-1-what-is-3d-printing/02_3d-printing-insights/05_the-birth-of-desktop-3d-printing-matt-griffin.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/RlvEF/the-birth-of-desktop-3d-printing-matt-griffin',
'docs/02_module-1-what-is-3d-printing/02_3d-printing-insights/03_tour-of-sector67-chris-meyer.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/HXJrm/tour-of-sector67-chris-meyer',
'docs/02_module-1-what-is-3d-printing/02_3d-printing-insights/01_views-on-3d-printing-champaign-new-york.en.txt':
'https://www.coursera.org/learn/3d-printing-revolution/lecture/jvULv/views-on-3d-printing-champaign-new-york',
'docs/01_course-orientation/01_about-the-course/05_glossary_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/supplement/515pX/glossary',
'docs/01_course-orientation/01_about-the-course/03_syllabus_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/supplement/l2J8d/syllabus',
'docs/01_course-orientation/01_about-the-course/06_learner-stories_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/supplement/goE8a/learner-stories',
'docs/01_course-orientation/01_about-the-course/04_about-the-discussion-forums_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/supplement/HA7H7/about-the-discussion-forums',
'docs/01_course-orientation/02_about-your-classmates/02_social-media_illinois.edu.html':
'https://www.facebook.com/illinois.edu',
'docs/01_course-orientation/02_about-your-classmates/01_updating-your-profile_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/supplement/f2iFO/updating-your-profile',
'docs/01_course-orientation/02_about-your-classmates/02_social-media_termsDknlfPe-5xKJJaHWl4j6kveY2GjKiZxK_vd-JDIN3-6piB5w.2PhI_l8TrILDNPPI-v-VPg.kolgdmfhbhzcbgm3-lv_.html':
'https://www.coursera.org/about/termsDknlfPe-5xKJJaHWl4j6kveY2GjKiZxK_vd-JDIN3-6piB5w.2PhI_l8TrILDNPPI-v-VPg.kOLGdmfhbhzCbGM3-lv_j477mwb9Ffuxi5xhEac6Biqu1NvspELKPGjNRjoAzuv8LasLtq22lxzrgdd9C8Y4JQ4gHm7FuRqTL4rlby3Pb_N4mpVXMkT83a3Ob_0QWVHv7LiZghGDTwCWYxU4lMfZpfsqsm7PoQ7HfKUFvHwgUIbOGox3ZZgJtBE2t-TDkbegcktpcn6k2VqZZ0WYvQTad7oijs5WHJLfL7EYiUGb01udFqMaOLIPP1msztyo496GDNUgBSvsJcPfHE20dluqe5_KzaSoXzKxXdiW12DjKJk_XDNc14mf41U17h5HgMXg',
'docs/01_course-orientation/02_about-your-classmates/02_social-media_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/supplement/1vga3/social-media',
'docs/03_module-2-why-is-it-revolutionary/03_the-3d-printing-revolution-facts-concepts/05_remixing-products-exercise_peer_assignment_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/peer/FAZ0P/remixing-products-exercise',
'docs/03_module-2-why-is-it-revolutionary/01_module-2-information/01_module-2-overview_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/supplement/UwuNp/module-2-overview',
'docs/04_course-conclusion/01_course-wrap-up-whats-next/02_congratulations_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/supplement/vISgA/congratulations',
'docs/05_Resources/03_3d-printing-services-and-products/01__resources.html':
'https://www.coursera.org/learn/3d-printing-revolution/resources/aPz3d',
'docs/05_Resources/04_3d-printing-community/01__resources.html':
'https://www.coursera.org/learn/3d-printing-revolution/resources/lfbM9',
'docs/05_Resources/02_3d-printing-softwares/01__resources.html':
'https://www.coursera.org/learn/3d-printing-revolution/resources/ltlHt',
'docs/05_Resources/01_books-articles/01__resources.html':
'https://www.coursera.org/learn/3d-printing-revolution/resources/FH3x3',
'docs/05_Resources/05_explore-the-imba/01__resources.html':
'https://www.coursera.org/learn/3d-printing-revolution/resources/0AejF',
'docs/02_module-1-what-is-3d-printing/01_module-1-overview/01_module-1-overview_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/supplement/HZXB5/module-1-overview',
'docs/02_module-1-what-is-3d-printing/02_3d-printing-insights/07_what-would-you-make-exercise_peer_assignment_instructions.html':
'https://www.coursera.org/learn/3d-printing-revolution/peer/t8bqq/what-would-you-make-exercise'}
langchain.llm_cache = InMemoryCache()
global model_name
models = ["GPT-3.5", "Flan UL2", "GPT-4", "Flan T5"]
pickle_file = "_vs.pkl"
index_file = "_vs.index"
models_folder = "models/"
llm = ChatOpenAI(model_name="gpt-4", temperature=0.1)
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
chat_history = []
memory = ConversationBufferWindowMemory(memory_key="chat_history", k=10)
vectorstore_index = None
system_template = """You are Coursera QA Bot. Have a conversation with a human, answering the following questions as best you can.
You are a teaching assistant for a Coursera Course: The 3D Printing Revolution and can answer any question about that using vectorstore or context.
Use the following pieces of context to answer the users question.
----------------
{context}"""
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}"),
]
CHAT_PROMPT = ChatPromptTemplate.from_messages(messages)
def set_model_and_embeddings(model):
global chat_history
set_model(model)
# set_embeddings(model)
chat_history = []
def set_model(model):
global llm
print("Setting model to " + str(model))
if model == "GPT-3.5":
print("Loading GPT-3.5")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)
elif model == "GPT-4":
print("Loading GPT-4")
llm = ChatOpenAI(model_name="gpt-4", temperature=0.1)
elif model == "Flan UL2":
print("Loading Flan-UL2")
llm = HuggingFaceHub(repo_id="google/flan-ul2", model_kwargs={"temperature": 0.1, "max_new_tokens":500})
elif model == "Flan T5":
print("Loading Flan T5")
llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.1})
else:
print("Loading GPT-3.5 from else")
llm = ChatOpenAI(model_name="text-davinci-002", temperature=0.1)
def set_embeddings(model):
global embeddings
if model == "GPT-3.5" or model == "GPT-4":
print("Loading OpenAI embeddings")
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
elif model == "Flan UL2" or model == "Flan T5":
print("Loading Hugging Face embeddings")
embeddings = HuggingFaceHubEmbeddings(repo_id="sentence-transformers/all-MiniLM-L6-v2")
def get_search_index(model):
global vectorstore_index
if os.path.isfile(get_file_path(model, pickle_file)) and os.path.isfile(
get_file_path(model, index_file)) and os.path.getsize(get_file_path(model, pickle_file)) > 0:
# Load index from pickle file
with open(get_file_path(model, pickle_file), "rb") as f:
search_index = pickle.load(f)
print("Loaded index")
else:
search_index = create_index(model)
print("Created index")
vectorstore_index = search_index
return search_index
def create_index(model):
source_chunks = create_chunk_documents()
search_index = search_index_from_docs(source_chunks)
faiss.write_index(search_index.index, get_file_path(model, index_file))
# Save index to pickle file
with open(get_file_path(model, pickle_file), "wb") as f:
pickle.dump(search_index, f)
return search_index
def get_file_path(model, file):
# If model is GPT3.5 or GPT4 return models_folder + openai + file else return models_folder + hf + file
if model == "GPT-3.5" or model == "GPT-4":
return models_folder + "openai" + file
else:
return models_folder + "hf" + file
def search_index_from_docs(source_chunks):
# print("source chunks: " + str(len(source_chunks)))
# print("embeddings: " + str(embeddings))
search_index = FAISS.from_documents(source_chunks, embeddings)
return search_index
def get_html_files():
loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
document_list = loader.load()
return document_list
def fetch_data_for_embeddings():
document_list = get_text_files()
document_list.extend(get_html_files())
# use file_url_mapping to set metadata of document to url which has been set as the source
for document in document_list:
document.metadata["url"] = file_url_mapping.get(document.metadata["source"])
print("document list: " + str(len(document_list)))
return document_list
def get_text_files():
loader = DirectoryLoader('docs', glob="**/*.txt", loader_cls=TextLoader, recursive=True)
document_list = loader.load()
return document_list
def create_chunk_documents():
sources = fetch_data_for_embeddings()
splitter = CharacterTextSplitter(separator=" ", chunk_size=800, chunk_overlap=0)
source_chunks = splitter.split_documents(sources)
print("chunks: " + str(len(source_chunks)))
return source_chunks
def get_qa_chain(vectorstore_index):
global llm, model_name
print(llm)
# embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
# compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=gpt_3_5_index.as_retriever())
retriever = vectorstore_index.as_retriever(search_type="similarity_score_threshold",
search_kwargs={"score_threshold": .7})
chain = ConversationalRetrievalChain.from_llm(llm, retriever, return_source_documents=True,
verbose=True, get_chat_history=get_chat_history,
combine_docs_chain_kwargs={"prompt": CHAT_PROMPT})
return chain
def get_chat_history(inputs) -> str:
res = []
for human, ai in inputs:
res.append(f"Human:{human}\nAI:{ai}")
return "\n".join(res)
def generate_answer(question) -> str:
global chat_history, vectorstore_index
chain = get_qa_chain(vectorstore_index)
result = chain(
{"question": question, "chat_history": chat_history, "vectordbkwargs": {"search_distance": 0.6}})
chat_history = [(question, result["answer"])]
sources = []
print(result)
for document in result['source_documents']:
sources.append("\n" + document.metadata['url'])
# sources.append(source.split('/')[-1].split('.')[0])
print(sources)
source = ',\n'.join(set(sources))
return result['answer'] + '\nSOURCES: ' + source