mkom_ugm_rag / app.py
robitalhazmi's picture
Update Streamlit app
8728fc3
raw
history blame
3.34 kB
import streamlit as st
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_cohere import ChatCohere
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv
load_dotenv('.env')
st.header("MKOM UGM RAG App")
# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
web_paths=(
"https://um.ugm.ac.id/ragam-seleksi-pascasarjana/",
"https://um.ugm.ac.id/persyaratan-pendaftaran-magister/",
"https://um.ugm.ac.id/persyaratan-pendaftaran-program-spesialis/",
"https://um.ugm.ac.id/persyaratan-pendaftaran-subspesialis/",
"https://um.ugm.ac.id/persyaratan-pendaftaran-doktor/",
"https://um.ugm.ac.id/prosedur-pendaftaran-magister/",
"https://um.ugm.ac.id/prosedur-pendaftaran-program-spesialis/",
"https://um.ugm.ac.id/prosedur-pendaftaran-program-subspesialis/",
"https://um.ugm.ac.id/prosedur-pendaftaran-doktor-2/",
"https://um.ugm.ac.id/program-studi-program-magister-2/",
"https://um.ugm.ac.id/program-studi-dan-daya-tampung-program-spesialis/",
"https://um.ugm.ac.id/program-studi-program-doktor/",
"https://um.ugm.ac.id/jadwal-seleksi-magister-dan-doktor/",
"https://um.ugm.ac.id/jadwal-kegiatan-seleksi-program-spesialis/",
"https://mkom.ugm.ac.id/alur-pendaftaran-magister/",
"https://mkom.ugm.ac.id/informasi-pendaftaran-program-pra-s2-ilmu-komputer/",
"https://mkom.ugm.ac.id/informasi-pendaftaran-program-s2-magister/",
"https://mkom.ugm.ac.id/program-dual-degree-double-degree-magister-ilmu-komputer/",
"https://mkom.ugm.ac.id/informasi-pendaftaran-program-s3-doktor/"
),
bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)
vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name='firqaaa/indo-sentence-bert-base'))
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
llm = ChatCohere(model="command-r")
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
template = """Gunakan konteks berikut untuk menjawab pertanyaan pada bagian akhir.
Jika kamu tidak tahu jawabannya, katakan saja bahwa kamu tidak tahu, jangan mencoba untuk mengarang jawaban.
Selalu katakan "Terima kasih sudah bertanya!" pada setiap akhir jawaban.
{context}
Pertanyaan: {question}
Jawaban:"""
custom_rag_prompt = PromptTemplate.from_template(template)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| custom_rag_prompt
| llm
| StrOutputParser()
)
question = st.text_input("Tanya ujian masuk Pascasarjana Universitas Gadjah Mada")
if question:
response = rag_chain.invoke(question)
st.write(response)