Spaces:

LLMproj1
/

my_persona

Build error

App Files Files Community

LLMproj1 commited on May 28, 2024

Commit

81aaffe

verified ·

1 Parent(s): df25ad5

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +77 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# -*- coding: utf-8 -*-
+"""Untitled18.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1_vTVH3hBX8wVXIgrW1T2Q4N1DSkWoXV8
+"""
+import gradio as gr
+import torch
+from transformers import TextStreamer
+from unsloth import FastLanguageModel
+from google.colab import drive
+import os
+# Ensure necessary packages are installed
+# Define the parameters for the model
+max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
+dtype = None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
+load_in_4bit = True  # Use 4bit quantization to reduce memory usage. Can be False.
+# Load the model and tokenizer
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name="lora_model",  # YOUR MODEL YOU USED FOR TRAINING
+    max_seq_length=max_seq_length,
+    dtype=dtype,
+    load_in_4bit=load_in_4bit,
+)
+FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
+# Define the Alpaca prompt
+alpaca_prompt = """
+### Input:
+{}
+### Response:
+{}"""
+# Define the function to generate responses
+def chat_alpaca(message: str, history: list, temperature: float, max_new_tokens: int) -> str:
+    prompt = alpaca_prompt.format(message, "")
+    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
+    # Define the streamer
+    text_streamer = TextStreamer(tokenizer)
+    # Generate the response
+    outputs = model.generate(**inputs, streamer=text_streamer, max_new_tokens=max_new_tokens, temperature=temperature)
+    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+    # Return the response
+    return response
+# Define the response function for the Gradio interface
+def respond(message, history, system_message, max_new_tokens, temperature, top_p):
+    return chat_alpaca(message, history, temperature, max_new_tokens)
+# Create the Gradio interface
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+    ],
+)
+if __name__ == "__main__":
+    demo.launch(share=True)