hewoo commited on
Commit
1dc17cb
ยท
verified ยท
1 Parent(s): 0d5c3f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -36
app.py CHANGED
@@ -2,68 +2,73 @@ import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from sentence_transformers import SentenceTransformer
4
  from langchain.vectorstores import Chroma
5
- import os
 
6
 
7
- # Hugging Face ๋ชจ๋ธ ID
8
- model_id = "hewoo/hehehehe" # ์—…๋กœ๋“œํ•œ ๋ชจ๋ธ์˜ repo_id
9
- token = os.getenv("HF_API_TOKEN") # Hugging Face API ํ† ํฐ (ํ•„์š” ์‹œ ์„ค์ •)
10
 
11
- # ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
12
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
13
- model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=token)
 
14
 
15
- # ํ…์ŠคํŠธ ์ƒ์„ฑ ํŒŒ์ดํ”„๋ผ์ธ ์„ค์ •
16
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.5, top_p=0.85, top_k=40, repetition_penalty=1.2)
 
 
 
 
17
 
18
-
19
-
20
- # ์‚ฌ์šฉ์ž ์ •์˜ ์ž„๋ฒ ๋”ฉ ํด๋ž˜์Šค ์ƒ์„ฑ
21
  class CustomEmbedding:
22
  def __init__(self, model):
23
  self.model = model
24
 
25
  def embed_query(self, text):
26
- return self.model.encode(text, convert_to_tensor=False).tolist()
27
 
28
  def embed_documents(self, texts):
29
- return [self.model.encode(text, convert_to_tensor=False).tolist() for text in texts]
30
-
31
- # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ์„ค์ •
32
- embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
33
- embedding_function = CustomEmbedding(embedding_model)
34
 
35
- # Chroma ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ค์ •
36
- persist_directory = "./chroma_batch_vectors"
37
- vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
 
38
 
39
- # ๊ฒ€์ƒ‰ ๊ธฐ๋Šฅ ์„ค์ •
40
- retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
 
 
41
 
42
-
43
- # ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์š”์•ฝ ํ•จ์ˆ˜
44
- def summarize_results(search_results):
45
- combined_text = "\n".join([result.page_content for result in search_results])
46
- summary = summarizer(combined_text, max_length=100, min_length=30, do_sample=False)[0]["summary_text"]
47
- return summary
48
-
49
- # ๊ฒ€์ƒ‰ ๋ฐ ์‘๋‹ต ์ƒ์„ฑ ํ•จ์ˆ˜
50
  def generate_response(user_input):
51
- # ๊ฒ€์ƒ‰ ๋ฐ ๋งฅ๋ฝ ์ƒ์„ฑ
52
  search_results = retriever.get_relevant_documents(user_input)
53
  context = "\n".join([result.page_content for result in search_results])
54
-
55
- # ๋ชจ๋ธ์— ๋งฅ๋ฝ๊ณผ ์งˆ๋ฌธ ์ „๋‹ฌ
56
  input_text = f"๋งฅ๋ฝ: {context}\n์งˆ๋ฌธ: {user_input}"
57
  response = pipe(input_text)[0]["generated_text"]
58
-
59
  return response
60
 
 
 
 
 
 
61
  # Streamlit ์•ฑ UI
62
- st.title("์ฑ—๋ด‡ํ…Œ์ŠคํŠธ")
63
  st.write("Llama 3.2-3B ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.")
64
 
 
 
65
  # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ›๊ธฐ
66
  user_input = st.text_input("์งˆ๋ฌธ")
67
  if user_input:
68
  response = generate_response(user_input)
69
  st.write("์ฑ—๋ด‡ ์‘๋‹ต:", response)
 
 
 
 
 
 
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from sentence_transformers import SentenceTransformer
4
  from langchain.vectorstores import Chroma
5
+ import gc
6
+ import psutil
7
 
8
+ # ๋ชจ๋ธ ID (๊ณต๊ฐœ๋œ ๋ชจ๋ธ์ด์–ด์•ผ ํ•จ)
9
+ model_id = "hewoo/hehehehe"
 
10
 
11
+ # ๋ฉ”๋ชจ๋ฆฌ ๋ชจ๋‹ˆํ„ฐ๋ง ํ•จ์ˆ˜
12
+ def monitor_memory():
13
+ memory_info = psutil.virtual_memory()
14
+ st.write(f"ํ˜„์žฌ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰: {memory_info.percent}%")
15
 
16
+ # ์บ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ชจ๋ธ ๋ฐ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
17
+ @st.cache_resource
18
+ def load_model():
19
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
20
+ model = AutoModelForCausalLM.from_pretrained(model_id)
21
+ return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.5, top_p=0.85, top_k=40, repetition_penalty=1.2)
22
 
23
+ # ์‚ฌ์šฉ์ž ์ •์˜ ์ž„๋ฒ ๋”ฉ ํด๋ž˜์Šค
 
 
24
  class CustomEmbedding:
25
  def __init__(self, model):
26
  self.model = model
27
 
28
  def embed_query(self, text):
29
+ return self.model.encode(text, convert_to_tensor=True).tolist()
30
 
31
  def embed_documents(self, texts):
32
+ return [self.model.encode(text, convert_to_tensor=True).tolist() for text in texts]
 
 
 
 
33
 
34
+ # ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ค์ •
35
+ @st.cache_resource
36
+ def load_embedding_model():
37
+ return SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
38
 
39
+ @st.cache_resource
40
+ def load_vectorstore(embedding_model):
41
+ embedding_function = CustomEmbedding(embedding_model)
42
+ return Chroma(persist_directory="./chroma_batch_vectors", embedding_function=embedding_function)
43
 
44
+ # ์งˆ๋ฌธ์— ๋Œ€ํ•œ ์‘๋‹ต ์ƒ์„ฑ ํ•จ์ˆ˜
 
 
 
 
 
 
 
45
  def generate_response(user_input):
46
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
47
  search_results = retriever.get_relevant_documents(user_input)
48
  context = "\n".join([result.page_content for result in search_results])
 
 
49
  input_text = f"๋งฅ๋ฝ: {context}\n์งˆ๋ฌธ: {user_input}"
50
  response = pipe(input_text)[0]["generated_text"]
 
51
  return response
52
 
53
+ # ๋ชจ๋ธ ๋ฐ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
54
+ pipe = load_model()
55
+ embedding_model = load_embedding_model()
56
+ vectorstore = load_vectorstore(embedding_model)
57
+
58
  # Streamlit ์•ฑ UI
59
+ st.title("์ฑ—๋ด‡ ๋ฐ๋ชจ")
60
  st.write("Llama 3.2-3B ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.")
61
 
62
+ monitor_memory() # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ
63
+
64
  # ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ›๊ธฐ
65
  user_input = st.text_input("์งˆ๋ฌธ")
66
  if user_input:
67
  response = generate_response(user_input)
68
  st.write("์ฑ—๋ด‡ ์‘๋‹ต:", response)
69
+ monitor_memory() # ๋ฉ”๋ชจ๋ฆฌ ์ƒํƒœ ์—…๋ฐ์ดํŠธ
70
+
71
+ # ๋ฉ”๋ชจ๋ฆฌ ํ•ด์ œ
72
+ del response
73
+ gc.collect()
74
+