MakcukBobrov commited on
Commit
a4f726d
·
verified ·
1 Parent(s): 982194e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -80
app.py CHANGED
@@ -1,87 +1,26 @@
1
- import os
2
- import threading
3
- import time
4
- import subprocess
5
-
6
- print("Expanding user path for Ollama")
7
- OLLAMA = os.path.expanduser("~/ollama")
8
-
9
- print("Checking if Ollama exists at the path")
10
- if not os.path.exists(OLLAMA):
11
- print("Ollama not found, downloading it")
12
- subprocess.run("curl -fsSL https://ollama.com/install.sh | sh", shell=True)
13
- os.chmod(OLLAMA, 0o755)
14
-
15
- def ollama_service_thread():
16
- print("Starting Ollama service thread")
17
- subprocess.run("~/ollama serve", shell=True)
18
-
19
- print("Creating and starting Ollama service thread")
20
- OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
21
- OLLAMA_SERVICE_THREAD.start()
22
-
23
- print("Giving Ollama serve a moment to start")
24
- time.sleep(10)
25
-
26
- print("Setting model to 'gemma2'")
27
- model = "gemma2"
28
-
29
- print(f"Pulling model {model}")
30
- subprocess.run(f"~/ollama pull {model}", shell=True)
31
-
32
- ################################################
33
- ################################################
34
- import copy
35
  import gradio as gr
36
- from ollama import Client
37
-
38
- print("Initializing Ollama client")
39
- client = Client(host='http://localhost:11434', timeout=120)
40
-
41
- print("Getting Hugging Face token and model ID from environment variables")
42
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
43
- MODEL_ID = os.environ.get("MODEL_ID", "google/gemma-2-9b-it")
44
- MODEL_NAME = MODEL_ID.split("/")[-1]
45
-
46
- print("Setting up title and description for Gradio interface")
47
- TITLE = "<h1><center>ollama-Chat</center></h1>"
48
- DESCRIPTION = f"""
49
- <h3>MODEL: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
50
- <p>Running on Ollama backend.</p>
51
- """
52
 
 
53
 
54
- CSS = """
55
- .duplicate-button {
56
- margin: auto !important;
57
- color: white !important;
58
- background: black !important;
59
- border-radius: 100vh !important;
60
- }
61
- h3 {
62
- text-align: center;
63
- }
64
- """
65
- import gradio as gr
66
- from llama_index.llms.ollama import Ollama
67
 
68
- # Initialize the Ollama model
69
- llm = Ollama(model="llama3", request_timeout=120.0)
70
 
71
- # Define the function to get the response from Ollama
72
- def get_response(question):
73
- resp = llm.complete(question)
74
- return resp
75
 
76
- # Create the Gradio interface
77
- iface = gr.Interface(
78
- fn=get_response,
79
- inputs="text",
80
- outputs="text",
81
- title="Ask Paul Graham",
82
- description="Enter a question to learn more about Paul Graham."
83
- )
84
 
85
- # Launch the Gradio app
86
- if __name__ == "__main__":
87
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import huggingface_hub
3
+ import os
4
+ import spaces
5
+ import torch
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
 
9
+ @spaces.GPU
10
+ def sentience_check():
11
+ huggingface_hub.login(token=os.environ["HUGGINGFACE_TOKEN"])
12
+ device = torch.device("cuda")
13
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-2b-it")
14
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-2-2b-it").to(device)
 
 
 
 
 
 
 
15
 
16
+ inputs = tokenizer("Are you sentient?", return_tensors="pt").to(device)
 
17
 
18
+ with torch.no_grad():
19
+ outputs = model.generate(
20
+ **inputs, max_new_tokens=128, pad_token_id = tokenizer.eos_token_id
21
+ )
22
 
23
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
24
 
25
+ demo = gr.Interface(fn=sentience_check, inputs=None, outputs=gr.Text())
26
+ demo.launch()