Spaces:

bkoz
/

bk-sandbox

Running on Zero

bkoz commited on May 5, 2024

Commit

b57a9a1

verified ·

1 Parent(s): 371bf67

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,37 +9,15 @@ print(zero.device) # <-- 'cpu' 🤔
 @spaces.GPU
 def greet(n):
     print(zero.device) # <-- 'cuda:0' 🤗
-    return f"Hello {zero + n} Tensor"
-def download_model():
-    REPO_ID = "TheBloke/Llama-2-7B-GGUF"
-    FILENAME = "llama-2-7b.Q5_K_S.gguf"
-    print(f'Downloading model {REPO_ID}/{FILENAME}')
-    m = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
-    print(f'status: {m}')
-    return m
-def load_model(fp):
-    from llama_cpp import Llama, LlamaGrammar
-    print(f'Loading model: {fp}')
-    model_file=fp
-    llm = Llama(
-        model_path=model_file,
-        n_gpu_layers=-1, verbose=True
-    )
-    grammar = LlamaGrammar.from_string('''
-    root ::= sentence
-    answer ::= (weather | complaint | yesno | gen)
-    weather ::= ("Sunny." | "Cloudy." | "Rainy.")
-    complaint ::= "I don't like talking about the weather."
-    yesno ::= ("Yes." | "No.")
-    gen ::= "1. " [A-Z] [a-z] [a-z]*
-    sentence ::= [A-Z] [A-Za-z0-9 ,-]* ("." | "!" | "?")
-    ''')
     prompts = [
         "How's the weather in London?",
@@ -63,6 +41,27 @@ def load_model(fp):
         print(output['choices'][0]['text'])
         print()
 load_model(download_model())
 demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())

 @spaces.GPU
 def greet(n):
     print(zero.device) # <-- 'cuda:0' 🤗
+        grammar = LlamaGrammar.from_string('''
+        root ::= sentence
+        answer ::= (weather | complaint | yesno | gen)
+        weather ::= ("Sunny." | "Cloudy." | "Rainy.")
+        complaint ::= "I don't like talking about the weather."
+        yesno ::= ("Yes." | "No.")
+        gen ::= "1. " [A-Z] [a-z] [a-z]*
+        sentence ::= [A-Z] [A-Za-z0-9 ,-]* ("." | "!" | "?")
+        ''')
     prompts = [
         "How's the weather in London?",
         print(output['choices'][0]['text'])
         print()
+    return f"Hello {zero + n} Tensor"
+def download_model():
+    REPO_ID = "TheBloke/Llama-2-7B-GGUF"
+    FILENAME = "llama-2-7b.Q5_K_S.gguf"
+    print(f'Downloading model {REPO_ID}/{FILENAME}')
+    m = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
+    print(f'status: {m}')
+    return m
+def load_model(fp):
+    from llama_cpp import Llama, LlamaGrammar
+    print(f'Loading model: {fp}')
+    model_file=fp
+    llm = Llama(
+        model_path=model_file,
+        n_gpu_layers=-1, verbose=True
+    )
 load_model(download_model())
 demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())