Spaces:

Akjava
/

chat-phi-4-deepseek-R1K-RL-EZO

Runtime error

App Files Files Community

Akjava commited on Aug 5, 2024

Commit

dc37782

1 Parent(s): 940d9a9

update

Browse files

Files changed (1) hide show

app.py +22 -75

app.py CHANGED Viewed

@@ -6,104 +6,51 @@ import spaces
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 if not huggingface_token:
-    pass
-    print("no HUGGINGFACE_TOKEN if you need set secret ")
-    #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
 model_id = "microsoft/Phi-3-mini-128k-instruct"
-device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.bfloat16
-tokenizer = AutoTokenizer.from_pretrained(model_id)#, token=huggingface_token)
-import time
-time.sleep(10)
 print(model_id,device,dtype)
-histories = []
-contents = []
-def call_generate_text(prompt, system_message="You are a helpful assistant."):
-    print(histories)
-    print(contents)
-    if prompt =="":
-        print("empty prompt return")
-        return ""
-    global initialized
-    if not initialized:
-        initialized = True
-        #return
-    try:
-        text = generate_text(prompt,system_message)
-        contents.append(text)
-        return text
-    except RuntimeError  as e:
-        print(f"An unexpected error occurred: {e}")
-    return ""
-initialized = False
-iface = gr.Interface(
-    fn=call_generate_text,
-    inputs=[
-        gr.Textbox(lines=3, label="Input Prompt"),
-        gr.Textbox(lines=2, label="System Message", value="You are a helpful assistant."),
-    ],
-    outputs=gr.Textbox(label="Generated Text"),
-    title="Phi-3-mini-128k-instruct",
-    description="Phi-3-mini-128k-instruct",
-)
-print("Initialized")
-# keeping model seems make crash
-@spaces.GPU(duration=100)
 def generate_text(prompt, system_message="You are a helpful assistant."):
-    #print(prompt,system_message)
-    global histories
     model = AutoModelForCausalLM.from_pretrained(
-        model_id ,torch_dtype=dtype,device_map=device # token=huggingface_token
     )
-    #print(system_message)
-    text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
     messages = [
         {"role": "system", "content": system_message},
     ]
-    messages += histories
-    user_message = {"role": "user", "content": prompt}
-    messages += [user_message]
-    #print(messages)
     result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
     generated_output = result[0]["generated_text"]
     if isinstance(generated_output, list):
         for message in reversed(generated_output):
             if message.get("role") == "assistant":
-                content= message.get("content", "No content found.")
-                histories += [user_message,{"role": "assistant", "content": content}]
-                print(f"history = {len(histories)}")
-                return content
         return "No assistant response found."
     else:
         return "Unexpected output format."
 if __name__ == "__main__":
-    print("Main")
     iface.launch()

 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 if not huggingface_token:
+    raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
+model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 model_id = "microsoft/Phi-3-mini-128k-instruct"
+# device_map style value auto not cuda
+device = "auto" #torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype = torch.bfloat16
+tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
 print(model_id,device,dtype)
+@spaces.GPU
 def generate_text(prompt, system_message="You are a helpful assistant."):
     model = AutoModelForCausalLM.from_pretrained(
+        model_id, torch_dtype=dtype,device_map=device, token=huggingface_token
     )
+    text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=dtype, device_map=device)
     messages = [
         {"role": "system", "content": system_message},
+        {"role": "user", "content": prompt},
     ]
     result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
     generated_output = result[0]["generated_text"]
     if isinstance(generated_output, list):
         for message in reversed(generated_output):
             if message.get("role") == "assistant":
+                return message.get("content", "No content found.")
         return "No assistant response found."
     else:
         return "Unexpected output format."
+iface = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.Textbox(lines=3, label="Input Prompt"),
+        gr.Textbox(lines=2, label="System Message", value="You are a helpful assistant."),
+    ],
+    outputs=gr.Textbox(label="Generated Text"),
+    title="Llama 3.1 8B Instruct Text Generation",
+    description="Enter a prompt and optional system message to generate text using the Llama 3.1 8B Instruct model.",
+)
 if __name__ == "__main__":
     iface.launch()