bkoz commited on
Commit
faee068
·
unverified ·
1 Parent(s): 33760f7
Files changed (1) hide show
  1. app.py +40 -0
app.py CHANGED
@@ -10,6 +10,46 @@ def greet(n):
10
  print(zero.device) # <-- 'cuda:0' 🤗
11
  return f"Hello {zero + n} Tensor"
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
14
  demo.launch(share=False)
15
 
 
10
  print(zero.device) # <-- 'cuda:0' 🤗
11
  return f"Hello {zero + n} Tensor"
12
 
13
+ def load_model():
14
+ from llama_cpp import Llama, LlamaGrammar
15
+ model_url="https://huggingface.co/TheBloke/Llama-2-7B-GGUF/resolve/main/llama-2-7b.Q5_K_S.gguf"
16
+ llm = Llama(
17
+ model_path=model_url,
18
+ n_gpu_layers=-1, verbose=False
19
+ )
20
+
21
+ grammar = LlamaGrammar.from_string('''
22
+ root ::= sentence
23
+ answer ::= (weather | complaint | yesno | gen)
24
+ weather ::= ("Sunny." | "Cloudy." | "Rainy.")
25
+ complaint ::= "I don't like talking about the weather."
26
+ yesno ::= ("Yes." | "No.")
27
+ gen ::= "1. " [A-Z] [a-z] [a-z]*
28
+ sentence ::= [A-Z] [A-Za-z0-9 ,-]* ("." | "!" | "?")
29
+ ''')
30
+
31
+ prompts = [
32
+ "How's the weather in London?",
33
+ "How's the weather in Munich?",
34
+ "How's the weather in Barcelona?",
35
+ ]
36
+
37
+ for prompt in prompts:
38
+ output = llm(
39
+ prompt,
40
+ max_tokens=512,
41
+ temperature=0.4,
42
+ grammar=grammar
43
+ )
44
+
45
+ s = output['choices'][0]['text']
46
+ print(f'{s} , len(s) = {len(s)}')
47
+ print(output['choices'])
48
+ print(output['choices'][0]['text'])
49
+ print()
50
+
51
+
52
+ load_model()
53
  demo = gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text())
54
  demo.launch(share=False)
55