Spaces:

Akjava
/

chat-phi-4-deepseek-R1K-RL-EZO

Runtime error

Akjava commited on Aug 6, 2024

Commit

0ee8fa9

verified ·

1 Parent(s): 81cfac3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,38 +6,40 @@ import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import gradio as gr
-huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
-if not huggingface_token:
-    pass
-    print("no HUGGINGFACE_TOKEN if you need set secret ")
-    #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
-model_id = "google/gemma-2-9b-it"
-device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
-dtype = torch.bfloat16
-tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
-print(model_id,device,dtype)
-histories = []
-#model = None
-model = AutoModelForCausalLM.from_pretrained(
-        model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
-    )
-text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
-if next(model.parameters()).is_cuda:
-    print("The model is on a GPU")
-else:
-    print("The model is on a CPU")
-if text_generator.device == 'cuda':
-    print("The pipeline is using a GPU")
-else:
-    print("The pipeline is using a CPU")
 @spaces.GPU(duration=120)
 def generate_text(messages):
@@ -78,4 +80,5 @@ def call_generate_text(message, history):
 demo = gr.ChatInterface(call_generate_text,type="messages")
 if __name__ == "__main__":
     demo.launch(share=True)

 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import gradio as gr
+def init():
+    huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
+    if not huggingface_token:
+        pass
+        print("no HUGGINGFACE_TOKEN if you need set secret ")
+        #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
+    model_id = "google/gemma-2-9b-it"
+    device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    dtype = torch.bfloat16
+    tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
+    print(model_id,device,dtype)
+    histories = []
+    #model = None
+    model = AutoModelForCausalLM.from_pretrained(
+            model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
+        )
+    text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
+    if next(model.parameters()).is_cuda:
+        print("The model is on a GPU")
+    else:
+        print("The model is on a CPU")
+    if text_generator.device == 'cuda':
+        print("The pipeline is using a GPU")
+    else:
+        print("The pipeline is using a CPU")
+    print("initialized")
 @spaces.GPU(duration=120)
 def generate_text(messages):
 demo = gr.ChatInterface(call_generate_text,type="messages")
 if __name__ == "__main__":
+    init()
     demo.launch(share=True)