robitalhazmi commited on
Commit
8728fc3
·
1 Parent(s): 64cbc9f

Update Streamlit app

Browse files
Files changed (3) hide show
  1. .env +1 -0
  2. app.py +74 -2
  3. rag_notebook.ipynb +0 -2
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ COHERE_API_KEY=I2LwLsW10InzTzGZ0WMriKxHUHq78E5pSVkl3MTe
app.py CHANGED
@@ -1,4 +1,76 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import bs4
3
+ from langchain_community.document_loaders import WebBaseLoader
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain_cohere import ChatCohere
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.runnables import RunnablePassthrough
10
+ from langchain_core.prompts import PromptTemplate
11
+ from dotenv import load_dotenv
12
+ load_dotenv('.env')
13
 
14
+ st.header("MKOM UGM RAG App")
15
+
16
+ # Only keep post title, headers, and content from the full HTML.
17
+ bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
18
+ loader = WebBaseLoader(
19
+ web_paths=(
20
+ "https://um.ugm.ac.id/ragam-seleksi-pascasarjana/",
21
+ "https://um.ugm.ac.id/persyaratan-pendaftaran-magister/",
22
+ "https://um.ugm.ac.id/persyaratan-pendaftaran-program-spesialis/",
23
+ "https://um.ugm.ac.id/persyaratan-pendaftaran-subspesialis/",
24
+ "https://um.ugm.ac.id/persyaratan-pendaftaran-doktor/",
25
+ "https://um.ugm.ac.id/prosedur-pendaftaran-magister/",
26
+ "https://um.ugm.ac.id/prosedur-pendaftaran-program-spesialis/",
27
+ "https://um.ugm.ac.id/prosedur-pendaftaran-program-subspesialis/",
28
+ "https://um.ugm.ac.id/prosedur-pendaftaran-doktor-2/",
29
+ "https://um.ugm.ac.id/program-studi-program-magister-2/",
30
+ "https://um.ugm.ac.id/program-studi-dan-daya-tampung-program-spesialis/",
31
+ "https://um.ugm.ac.id/program-studi-program-doktor/",
32
+ "https://um.ugm.ac.id/jadwal-seleksi-magister-dan-doktor/",
33
+ "https://um.ugm.ac.id/jadwal-kegiatan-seleksi-program-spesialis/",
34
+ "https://mkom.ugm.ac.id/alur-pendaftaran-magister/",
35
+ "https://mkom.ugm.ac.id/informasi-pendaftaran-program-pra-s2-ilmu-komputer/",
36
+ "https://mkom.ugm.ac.id/informasi-pendaftaran-program-s2-magister/",
37
+ "https://mkom.ugm.ac.id/program-dual-degree-double-degree-magister-ilmu-komputer/",
38
+ "https://mkom.ugm.ac.id/informasi-pendaftaran-program-s3-doktor/"
39
+ ),
40
+ bs_kwargs={"parse_only": bs4_strainer},
41
+ )
42
+ docs = loader.load()
43
+ text_splitter = RecursiveCharacterTextSplitter(
44
+ chunk_size=1000, chunk_overlap=200, add_start_index=True
45
+ )
46
+ all_splits = text_splitter.split_documents(docs)
47
+ vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name='firqaaa/indo-sentence-bert-base'))
48
+ retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
49
+ llm = ChatCohere(model="command-r")
50
+
51
+ def format_docs(docs):
52
+ return "\n\n".join(doc.page_content for doc in docs)
53
+
54
+ template = """Gunakan konteks berikut untuk menjawab pertanyaan pada bagian akhir.
55
+ Jika kamu tidak tahu jawabannya, katakan saja bahwa kamu tidak tahu, jangan mencoba untuk mengarang jawaban.
56
+ Selalu katakan "Terima kasih sudah bertanya!" pada setiap akhir jawaban.
57
+
58
+ {context}
59
+
60
+ Pertanyaan: {question}
61
+
62
+ Jawaban:"""
63
+
64
+ custom_rag_prompt = PromptTemplate.from_template(template)
65
+
66
+ rag_chain = (
67
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
68
+ | custom_rag_prompt
69
+ | llm
70
+ | StrOutputParser()
71
+ )
72
+
73
+ question = st.text_input("Tanya ujian masuk Pascasarjana Universitas Gadjah Mada")
74
+ if question:
75
+ response = rag_chain.invoke(question)
76
+ st.write(response)
rag_notebook.ipynb CHANGED
@@ -162,11 +162,9 @@
162
  }
163
  ],
164
  "source": [
165
- "from langchain_chroma import Chroma\n",
166
  "from langchain.vectorstores import FAISS\n",
167
  "from langchain.embeddings import HuggingFaceEmbeddings\n",
168
  "\n",
169
- "# vectorstore = Chroma.from_documents(documents=all_splits, embedding=HuggingFaceEmbeddings(model_name='firqaaa/indo-sentence-bert-base'))\n",
170
  "vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name='firqaaa/indo-sentence-bert-base'))"
171
  ]
172
  },
 
162
  }
163
  ],
164
  "source": [
 
165
  "from langchain.vectorstores import FAISS\n",
166
  "from langchain.embeddings import HuggingFaceEmbeddings\n",
167
  "\n",
 
168
  "vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name='firqaaa/indo-sentence-bert-base'))"
169
  ]
170
  },