Spaces:

LLMproj1
/

mypersona2

Paused

LLMproj1 commited on May 28, 2024

Commit

c138012

verified ·

1 Parent(s): dc9cf2c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -27,7 +27,7 @@ def respond(
     response = ""
-    for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,

 import gradio as gr
 from huggingface_hub import InferenceClient
+from peft import PeftModel, PeftConfig
+from transformers import AutoModelForCausalLM
+config = PeftConfig.from_pretrained("LLMproj1/mypersona-llama3-8b")
+base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-bnb-4bit")
+model = PeftModel.from_pretrained(base_model, "LLMproj1/mypersona-llama3-8b")
 def respond(
     message,
     history: list[tuple[str, str]],
     response = ""
+    for message in model.generate(
         messages,
         max_tokens=max_tokens,
         stream=True,