FauziIsyrinApridal commited on
Commit
82031c8
·
1 Parent(s): 498342f

perbaiki plaholsdr input dan hapus chunking

Browse files
Files changed (2) hide show
  1. app/chat.py +1 -1
  2. app/document_processor.py +18 -10
app/chat.py CHANGED
@@ -50,7 +50,7 @@ def display_chat_history(chain):
50
 
51
  # Input teks biasa
52
  user_input_obj = st.chat_input(
53
- "Masukkan pertanyaan atau Tekan tombol mic untuk berbicara!",
54
  key="chat_input_field"
55
  )
56
 
 
50
 
51
  # Input teks biasa
52
  user_input_obj = st.chat_input(
53
+ "Masukkan pertanyaan",
54
  key="chat_input_field"
55
  )
56
 
app/document_processor.py CHANGED
@@ -3,8 +3,9 @@ from langchain_huggingface import HuggingFaceEmbeddings
3
  from langchain_community.vectorstores import FAISS
4
  import os
5
  import tempfile
6
- import zipfile
7
  import streamlit as st
 
 
8
 
9
  def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
10
  """Save vector store to Supabase storage as separate files."""
@@ -94,18 +95,25 @@ def load_vector_store_from_supabase(supabase, bucket_name, file_prefix="vector_s
94
  st.error(f"Error loading from Supabase: {e}")
95
  return None
96
 
 
97
  def process_documents(docs):
98
  embeddings = HuggingFaceEmbeddings(
99
  model_name="LazarusNLP/all-indo-e5-small-v4",
100
  model_kwargs={"device": "cpu"},
101
  encode_kwargs={"normalize_embeddings": True}
102
  )
103
-
104
- text_splitter = RecursiveCharacterTextSplitter(
105
- chunk_size=1500,
106
- chunk_overlap=300
107
- )
108
- text_chunks = text_splitter.split_documents(docs)
109
- vector_store = FAISS.from_documents(text_chunks, embeddings)
110
-
111
- return vector_store
 
 
 
 
 
 
 
3
  from langchain_community.vectorstores import FAISS
4
  import os
5
  import tempfile
 
6
  import streamlit as st
7
+ from langchain.schema import Document
8
+
9
 
10
  def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
11
  """Save vector store to Supabase storage as separate files."""
 
95
  st.error(f"Error loading from Supabase: {e}")
96
  return None
97
 
98
+
99
  def process_documents(docs):
100
  embeddings = HuggingFaceEmbeddings(
101
  model_name="LazarusNLP/all-indo-e5-small-v4",
102
  model_kwargs={"device": "cpu"},
103
  encode_kwargs={"normalize_embeddings": True}
104
  )
105
+
106
+ combined_docs = []
107
+ for doc in docs:
108
+ # Gunakan page_content langsung tanpa split
109
+ text = doc.page_content if hasattr(doc, "page_content") else doc
110
+ metadata = doc.metadata if hasattr(doc, "metadata") else {}
111
+
112
+ combined_doc = Document(
113
+ page_content=text,
114
+ metadata=metadata
115
+ )
116
+ combined_docs.append(combined_doc)
117
+
118
+ vector_store = FAISS.from_documents(combined_docs, embeddings)
119
+ return vector_store