ldhldh commited on
Commit
2caab98
ยท
1 Parent(s): bef4b66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  from threading import Thread
 
2
  from llama_cpp import Llama
3
  import torch
4
  import gradio as gr
@@ -14,11 +15,26 @@ from langchain.chains import RetrievalQA
14
  from langchain.vectorstores import FAISS
15
  from langchain.embeddings import HuggingFaceEmbeddings
16
  from langchain.prompts import PromptTemplate
 
 
 
 
 
17
 
18
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
19
  print("Running on device:", torch_device)
20
  print("CPU threads:", torch.get_num_threads())
21
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  llm = LlamaCpp(
24
  model_path='Llama-2-ko-7B-chat-gguf-q4_0.bin',
@@ -34,6 +50,14 @@ llm = LlamaCpp(
34
  # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
35
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
36
 
 
 
 
 
 
 
 
 
37
  # faiss_db ๋กœ ๋กœ์ปฌ์— ๋กœ๋“œํ•˜๊ธฐ
38
  docsearch = FAISS.load_local("faiss_db", embeddings)
39
 
 
1
  from threading import Thread
2
+ from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
  import torch
5
  import gradio as gr
 
15
  from langchain.vectorstores import FAISS
16
  from langchain.embeddings import HuggingFaceEmbeddings
17
  from langchain.prompts import PromptTemplate
18
+ from langchain.text_splitter import CharacterTextSplitter
19
+ from langchain.document_loaders import PyPDFLoader
20
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
21
+ from langchain.vectorstores import FAISS
22
+ from langchain.embeddings import HuggingFaceEmbeddings
23
 
24
  torch_device = "cuda" if torch.cuda.is_available() else "cpu"
25
  print("Running on device:", torch_device)
26
  print("CPU threads:", torch.get_num_threads())
27
 
28
+ loader = PyPDFLoader("total.pdf")
29
+ pages = loader.load()
30
+
31
+ # ๋ฐ์ดํ„ฐ๋ฅผ ๋ถˆ๋Ÿฌ์™€์„œ ํ…์ŠคํŠธ๋ฅผ ์ผ์ •ํ•œ ์ˆ˜๋กœ ๋‚˜๋ˆ„๊ณ  ๊ตฌ๋ถ„์ž๋กœ ์—ฐ๊ฒฐํ•˜๋Š” ์ž‘์—…
32
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
33
+ texts = text_splitter.split_documents(pages)
34
+
35
+ print(f"๋ฌธ์„œ์— {len(texts)}๊ฐœ์˜ ๋ฌธ์„œ๋ฅผ ๊ฐ€์ง€๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.")
36
+
37
+ hf_hub_download(repo_id='StarFox7/Llama-2-ko-7B-chat-gguf', filename='Llama-2-ko-7B-chat-gguf-q4_0.bin', local_dir='./')
38
 
39
  llm = LlamaCpp(
40
  model_path='Llama-2-ko-7B-chat-gguf-q4_0.bin',
 
50
  # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
51
  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
52
 
53
+ # ๋ฌธ์„œ์— ์žˆ๋Š” ํ…์ŠคํŠธ๋ฅผ ์ž„๋ฒ ๋”ฉํ•˜๊ณ  FAISS ์— ์ธ๋ฑ์Šค๋ฅผ ๊ตฌ์ถ•ํ•จ
54
+ index = FAISS.from_documents(
55
+ documents=texts,
56
+ embedding=embeddings,
57
+ )
58
+
59
+ # faiss_db ๋กœ ๋กœ์ปฌ์— ์ €์žฅํ•˜๊ธฐ
60
+ index.save_local("faiss_db")
61
  # faiss_db ๋กœ ๋กœ์ปฌ์— ๋กœ๋“œํ•˜๊ธฐ
62
  docsearch = FAISS.load_local("faiss_db", embeddings)
63