File size: 3,011 Bytes
cc0604c
 
 
 
6ab87db
a91fe67
cc0604c
a91fe67
1dc17cb
cc0604c
a91fe67
 
 
 
 
 
1dc17cb
 
 
 
a91fe67
cc0604c
1dc17cb
c7f958e
 
 
 
 
a91fe67
c7f958e
 
a91fe67
c7f958e
a91fe67
1dc17cb
 
6ab87db
c7f958e
1dc17cb
a91fe67
c4f01b6
1dc17cb
cc0604c
1dc17cb
cc0604c
1dc17cb
cc0604c
 
6ab87db
 
 
 
 
 
 
 
 
 
 
 
 
 
a91fe67
cc0604c
1dc17cb
 
 
 
 
cc0604c
6ab87db
a91fe67
 
 
1dc17cb
cc0604c
 
 
a91fe67
cc0604c
a91fe67
1dc17cb
 
 
 
 
a91fe67
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Chroma
import gc
import psutil

# ๋ชจ๋ธ ID (๊ณต๊ฐœ๋œ ๋ชจ๋ธ์ด์–ด์•ผ ํ•จ)
model_id = "hewoo/hehehehe"

# ๋ฉ”๋ชจ๋ฆฌ ๋ชจ๋‹ˆํ„ฐ๋ง ํ•จ์ˆ˜
def monitor_memory():
    memory_info = psutil.virtual_memory()
    st.write(f"ํ˜„์žฌ ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰: {memory_info.percent}%")

# ์บ์‹œ๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ๋ชจ๋ธ ๋ฐ ํŒŒ์ดํ”„๋ผ์ธ ๋กœ๋“œ
@st.cache_resource
def load_model():
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id)
    return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.5, top_p=0.85, top_k=40, repetition_penalty=1.2)

# ์‚ฌ์šฉ์ž ์ •์˜ ์ž„๋ฒ ๋”ฉ ํด๋ž˜์Šค
class CustomEmbedding:
    def __init__(self, model):
        self.model = model

    def embed_query(self, text):
        return self.model.encode(text, convert_to_tensor=False).tolist()

    def embed_documents(self, texts):
        return [self.model.encode(text, convert_to_tensor=False).tolist() for text in texts]

# ํ•œ๊ตญ์–ด ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋ฐ ๋ฒกํ„ฐ ์Šคํ† ์–ด ์„ค์ •
@st.cache_resource
def load_embedding_model():
    return SentenceTransformer("jhgan/ko-sroberta-multitask")

@st.cache_resource
def load_vectorstore(_embedding_model):  # _embedding_model๋กœ ์ด๋ฆ„ ๋ณ€๊ฒฝ
    embedding_function = CustomEmbedding(_embedding_model)
    return Chroma(persist_directory="./chroma_batch_vectors", embedding_function=embedding_function)

# ์งˆ๋ฌธ์— ๋Œ€ํ•œ ์‘๋‹ต ์ƒ์„ฑ ํ•จ์ˆ˜
def generate_response(user_input):
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    search_results = retriever.get_relevant_documents(user_input)
    context = "\n".join([result.page_content for result in search_results])

    prompt = f"""๋‹ค์Œ์€ ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๋Š” ํ•œ๊ตญ์–ด ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค.
์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•ด ์ฃผ์–ด์ง„ ๋งฅ๋ฝ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์ •ํ™•ํ•˜๊ณ  ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ํ•œ๊ตญ์–ด๋กœ ์ž‘์„ฑํ•˜์„ธ์š”.
๋งŒ์•ฝ ๋งฅ๋ฝ์— ๊ด€๋ จ ์ •๋ณด๊ฐ€ ์—†์œผ๋ฉด, "์ฃ„์†กํ•˜์ง€๋งŒ ํ•ด๋‹น ์งˆ๋ฌธ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."๋ผ๊ณ  ๋‹ต๋ณ€ํ•˜์„ธ์š”.

๋งฅ๋ฝ:
{context}

์งˆ๋ฌธ:
{user_input}

๋‹ต๋ณ€:"""

    response = pipe(prompt)[0]["generated_text"]
    return response

# ๋ชจ๋ธ ๋ฐ ์ž„๋ฒ ๋”ฉ ๋ชจ๋ธ ๋กœ๋“œ
pipe = load_model()
embedding_model = load_embedding_model()
vectorstore = load_vectorstore(embedding_model)

# Streamlit ์•ฑ UI
st.title("์ฑ—๋ด‡ ๋ฐ๋ชจ")
st.write("Llama 3.2-3B ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์งˆ๋ฌธ์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.")

monitor_memory()  # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ํ™•์ธ

# ์‚ฌ์šฉ์ž ์ž…๋ ฅ ๋ฐ›๊ธฐ
user_input = st.text_input("์งˆ๋ฌธ")
if user_input:
    response = generate_response(user_input)
    st.write("์ฑ—๋ด‡ ์‘๋‹ต:", response)
    monitor_memory()  # ๋ฉ”๋ชจ๋ฆฌ ์ƒํƒœ ์—…๋ฐ์ดํŠธ

    # ๋ฉ”๋ชจ๋ฆฌ ํ•ด์ œ
    del response
    gc.collect()