File size: 2,849 Bytes
cc0604c ee37e7f 5be7074 cc0604c ee37e7f 1dc17cb 5be7074 cc0604c a91fe67 ee37e7f 5be7074 cc0604c ee37e7f c7f958e ee37e7f c7f958e ee37e7f c7f958e ee37e7f 5be7074 ee37e7f c7f958e ee37e7f 5be7074 ee37e7f cc0604c 1dc17cb cc0604c 5be7074 cc0604c 5be7074 ee37e7f 5be7074 cc0604c 5be7074 a91fe67 cc0604c 5be7074 cc0604c 5be7074 a91fe67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Chroma
import os
import psutil
import time
# Hugging Face λͺ¨λΈ ID
model_id = "hewoo/hehehehe"
# λͺ¨λΈκ³Ό ν ν¬λμ΄μ λ‘λ (ν ν° μμ΄ μ¬μ©)
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
# ν
μ€νΈ μμ± νμ΄νλΌμΈ μ€μ
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=150, temperature=0.3, top_p=0.85, top_k=40, repetition_penalty=1.2)
# μ¬μ©μ μ μ μλ² λ© ν΄λμ€ μμ±
class CustomEmbedding:
def __init__(self, model):
self.model = model
def embed_query(self, text):
return self.model.encode(text, convert_to_tensor=True).tolist()
def embed_documents(self, texts):
return [self.model.encode(text, convert_to_tensor=True).tolist() for text in texts]
# μλ² λ© λͺ¨λΈ λ° λ²‘ν° μ€ν μ΄ μ€μ
embedding_model = SentenceTransformer("jhgan/ko-sroberta-multitask")
embedding_function = CustomEmbedding(embedding_model)
# Chroma λ²‘ν° μ€ν μ΄ μ€μ
persist_directory = "./chroma_batch_vectors"
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embedding_function)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# μ§λ¬Έμ λν μλ΅ μμ± ν¨μ
def generate_response(user_input):
start_time = time.time() # μμ μκ° κΈ°λ‘
# λ¬Έμ κ²μ λ° λ§₯λ½ μμ±
search_results = retriever.get_relevant_documents(user_input)
context = "\n".join([result.page_content for result in search_results])
input_text = f"""μλλ νκ΅μ΄λ‘λ§ λ΅λ³νλ μ΄μμ€ν΄νΈμ
λλ€.
μ¬μ©μμ μ§λ¬Έμ λν΄ μ 곡λ λ§₯λ½μ λ°νμΌλ‘ μ ννκ³ μμΈν λ΅λ³μ νκ΅μ΄λ‘ μμ±νμΈμ.
λ§₯λ½: {context}
μ§λ¬Έ: {user_input}
λ΅λ³:"""
# μλ΅ μμ±
response = pipe(input_text)[0]["generated_text"]
end_time = time.time() # λλ μκ° κΈ°λ‘
response_time = end_time - start_time # μλ΅ μκ° κ³μ°
# λ©λͺ¨λ¦¬ μ¬μ©λ λͺ¨λν°λ§
memory_info = psutil.virtual_memory()
memory_usage = memory_info.percent # λ©λͺ¨λ¦¬ μ¬μ©λ λΉμ¨(%)
return response, response_time, memory_usage
# Streamlit μ± UI
st.title("μ±λ΄ λ°λͺ¨")
st.write("Llama 3.2-3B λͺ¨λΈμ μ¬μ©ν μ±λ΄μ
λλ€. μ§λ¬Έμ μ
λ ₯ν΄ μ£ΌμΈμ.")
# μ¬μ©μ μ
λ ₯ λ°κΈ°
user_input = st.text_input("μ§λ¬Έ")
if user_input:
response, response_time, memory_usage = generate_response(user_input)
st.write("μ±λ΄ μλ΅:", response)
st.write(f"μλ΅ μκ°: {response_time:.2f}μ΄")
st.write(f"νμ¬ λ©λͺ¨λ¦¬ μ¬μ©λ: {memory_usage}%")
|