DJStomp commited on
Commit
f0f6bff
·
verified ·
1 Parent(s): 491841d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -21
app.py CHANGED
@@ -4,29 +4,15 @@ import transformers
4
  import torch
5
  import spaces
6
 
7
- # Load Hugging Face token from environment variables
8
  hf_token = os.getenv("HF_TOKEN")
9
  if not hf_token:
10
  raise ValueError("HF_TOKEN is not set in environment variables!")
11
 
12
- # Model ID
13
  model_id = "huihui-ai/Llama-3.3-70B-Instruct-abliterated"
14
 
15
- # Load the model pipeline with ZeroGPU compatibility
16
- @spaces.GPU
17
- def load_pipeline():
18
- return transformers.pipeline(
19
- "text-generation",
20
- model=model_id,
21
- use_auth_token=hf_token, # Pass the HF token
22
- model_kwargs={"torch_dtype": torch.bfloat16}, # Use optimized dtype
23
- device_map="auto", # Automatically map across GPUs
24
- )
25
 
26
- # Initialize the pipeline once
27
- pipeline = load_pipeline()
28
 
29
- # Define the function for response generation
30
  @spaces.GPU
31
  def generate_response(
32
  message,
@@ -36,7 +22,17 @@ def generate_response(
36
  temperature,
37
  top_p,
38
  ):
39
- # Combine system, history, and user messages into a formatted input string
 
 
 
 
 
 
 
 
 
 
40
  messages = [{"role": "system", "content": system_message}]
41
  for user_msg, assistant_msg in history:
42
  if user_msg:
@@ -44,11 +40,8 @@ def generate_response(
44
  if assistant_msg:
45
  messages.append({"role": "assistant", "content": assistant_msg})
46
  messages.append({"role": "user", "content": message})
47
-
48
- # Format the conversation as a single string
49
  conversation = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
50
 
51
- # Generate a response using the pipeline
52
  try:
53
  outputs = pipeline(
54
  conversation,
@@ -58,13 +51,11 @@ def generate_response(
58
  )
59
  generated_text = outputs[0]["generated_text"]
60
 
61
- # Extract and return the assistant's response
62
  response = generated_text.split("\n")[-1].replace("assistant: ", "")
63
  return response
64
  except Exception as e:
65
  return f"Error: {str(e)}"
66
 
67
- # Define the Gradio Chat Interface
68
  demo = gr.ChatInterface(
69
  generate_response,
70
  additional_inputs=[
 
4
  import torch
5
  import spaces
6
 
 
7
  hf_token = os.getenv("HF_TOKEN")
8
  if not hf_token:
9
  raise ValueError("HF_TOKEN is not set in environment variables!")
10
 
 
11
  model_id = "huihui-ai/Llama-3.3-70B-Instruct-abliterated"
12
 
13
+ pipeline = None
 
 
 
 
 
 
 
 
 
14
 
 
 
15
 
 
16
  @spaces.GPU
17
  def generate_response(
18
  message,
 
22
  temperature,
23
  top_p,
24
  ):
25
+ global pipeline
26
+
27
+ if pipeline is None:
28
+ pipeline = transformers.pipeline(
29
+ "text-generation",
30
+ model=model_id,
31
+ use_auth_token=hf_token,
32
+ model_kwargs={"torch_dtype": torch.bfloat16},
33
+ device_map="auto",
34
+ )
35
+
36
  messages = [{"role": "system", "content": system_message}]
37
  for user_msg, assistant_msg in history:
38
  if user_msg:
 
40
  if assistant_msg:
41
  messages.append({"role": "assistant", "content": assistant_msg})
42
  messages.append({"role": "user", "content": message})
 
 
43
  conversation = "\n".join(f"{m['role']}: {m['content']}" for m in messages)
44
 
 
45
  try:
46
  outputs = pipeline(
47
  conversation,
 
51
  )
52
  generated_text = outputs[0]["generated_text"]
53
 
 
54
  response = generated_text.split("\n")[-1].replace("assistant: ", "")
55
  return response
56
  except Exception as e:
57
  return f"Error: {str(e)}"
58
 
 
59
  demo = gr.ChatInterface(
60
  generate_response,
61
  additional_inputs=[