Spaces:

bkoz
/

bk-sandbox

Running on Zero

bkoz commited on May 5, 2024

Commit

faee068

unverified ·

1 Parent(s): 33760f7

added llm

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,46 @@ def greet(n):
     print(zero.device) # <-- 'cuda:0' 🤗
     return f"Hello {zero + n} Tensor"
 demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
 demo.launch(share=False)

     print(zero.device) # <-- 'cuda:0' 🤗
     return f"Hello {zero + n} Tensor"
+def load_model():
+    from llama_cpp import Llama, LlamaGrammar
+    model_url="https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_S.gguf"
+    llm = Llama(
+        model_path=model_url,
+        n_gpu_layers=-1, verbose=False
+    )
+    grammar = LlamaGrammar.from_string('''
+    root ::= sentence
+    answer ::= (weather | complaint | yesno | gen)
+    weather ::= ("Sunny." | "Cloudy." | "Rainy.")
+    complaint ::= "I don't like talking about the weather."
+    yesno ::= ("Yes." | "No.")
+    gen ::= "1. " [A-Z] [a-z] [a-z]*
+    sentence ::= [A-Z] [A-Za-z0-9 ,-]* ("." | "!" | "?")
+    ''')
+    prompts = [
+        "How's the weather in London?",
+        "How's the weather in Munich?",
+        "How's the weather in Barcelona?",
+    ]
+    for prompt in prompts:
+    output = llm(
+            prompt,
+            max_tokens=512,
+            temperature=0.4,
+            grammar=grammar
+    )
+    s = output['choices'][0]['text']
+    print(f'{s} , len(s) = {len(s)}')
+    print(output['choices'])
+    print(output['choices'][0]['text'])
+    print()
+load_model()
 demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
 demo.launch(share=False)