Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import spaces | |
| import torch | |
| from huggingface_hub import hf_hub_download | |
| zero = torch.Tensor([0]).cuda() | |
| print(zero.device) # <-- 'cpu' π€ | |
| def greet(n): | |
| print(zero.device) # <-- 'cuda:0' π€ | |
| return f"Hello {zero + n} Tensor" | |
| def download_model(): | |
| REPO_ID = "TheBloke/Llama-2-7B-GGUF" | |
| FILENAME = "llama-2-7b.Q5_K_S.gguf" | |
| hf_hub_download(repo_id=REPO_ID, filename=FILENAME) | |
| def load_model(): | |
| from llama_cpp import Llama, LlamaGrammar | |
| model_url="https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_S.gguf" | |
| llm = Llama( | |
| model_path=model_url, | |
| n_gpu_layers=-1, verbose=False | |
| ) | |
| grammar = LlamaGrammar.from_string(''' | |
| root ::= sentence | |
| answer ::= (weather | complaint | yesno | gen) | |
| weather ::= ("Sunny." | "Cloudy." | "Rainy.") | |
| complaint ::= "I don't like talking about the weather." | |
| yesno ::= ("Yes." | "No.") | |
| gen ::= "1. " [A-Z] [a-z] [a-z]* | |
| sentence ::= [A-Z] [A-Za-z0-9 ,-]* ("." | "!" | "?") | |
| ''') | |
| prompts = [ | |
| "How's the weather in London?", | |
| "How's the weather in Munich?", | |
| "How's the weather in Barcelona?", | |
| ] | |
| for prompt in prompts: | |
| output = llm( | |
| prompt, | |
| max_tokens=512, | |
| temperature=0.4, | |
| grammar=grammar | |
| ) | |
| s = output['choices'][0]['text'] | |
| print(f'{s} , len(s) = {len(s)}') | |
| print(output['choices']) | |
| print(output['choices'][0]['text']) | |
| print() | |
| download_model() | |
| load_model() | |
| demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text()) | |
| demo.launch(share=False) | |