NitinBot001 commited on
Commit
f74fbda
·
verified ·
1 Parent(s): 80e1c62

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -62
app.py CHANGED
@@ -1,97 +1,75 @@
1
  import os
2
  import threading
 
 
3
  import requests
4
  import subprocess
5
  import json
6
- from flask import Flask, jsonify, request
7
- from llama_cpp import Llama
8
 
9
  app = Flask(__name__)
10
 
11
  # Configuration
12
- MODEL_DIR = "/data/model" # Persistent storage directory
13
- MODEL_NAME = "calme-3.3-llamaloi-3b.Q4_K_M.gguf"
14
- MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
15
- MODEL_URL = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
16
  GH_PAT = os.getenv("GH_PAT")
17
  REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
18
 
19
- # Global state
20
- initialized = False
21
- llm = None
22
-
23
  def background_init():
24
- global initialized, llm
25
  try:
26
- # 1. Ensure model directory exists
27
- os.makedirs(MODEL_DIR, exist_ok=True)
28
-
29
- # 2. Download model if not exists
30
  if not os.path.exists(MODEL_PATH):
31
- print("Downloading model...")
32
- with requests.get(MODEL_URL, stream=True) as r:
 
33
  r.raise_for_status()
34
  with open(MODEL_PATH, "wb") as f:
35
  for chunk in r.iter_content(chunk_size=8192):
36
  f.write(chunk)
37
- print("Model download complete")
38
 
39
- # 3. Initialize LLM
 
40
  llm = Llama(
41
  model_path=MODEL_PATH,
42
- n_ctx=8192,
43
  n_threads=2,
44
  n_gpu_layers=0,
45
  verbose=False
46
  )
47
 
48
- # 4. Start tunnel and update repo
49
  tunnel_url = start_tunnel()
50
- update_repo_with_tunnel(tunnel_url)
51
 
52
- initialized = True
53
- print("Initialization complete")
54
  except Exception as e:
55
- print(f"Initialization failed: {str(e)}")
56
 
57
  def start_tunnel():
58
- """Start nport tunnel and return URL"""
59
  proc = subprocess.Popen(
60
  ["npx", "nport", "-s", "hf-space", "-p", "7860"],
61
  stdout=subprocess.PIPE,
62
- stderr=subprocess.PIPE,
63
- text=True
64
  )
65
-
66
  # Wait for tunnel URL
67
- while True:
68
- line = proc.stdout.readline()
69
- if "your domain is:" in line:
70
- return line.split("your domain is: ")[1].strip()
71
- if proc.poll() is not None:
72
- break
73
- raise RuntimeError("Failed to establish tunnel")
74
 
75
- def update_repo_with_tunnel(url):
76
- """Update GitHub repository with tunnel URL"""
77
- repo_dir = "/data/repo"
78
- instance_path = os.path.join(repo_dir, "instance.json")
 
 
 
 
 
79
 
80
- # Clone or update repository
81
- if os.path.exists(repo_dir):
82
- subprocess.run(["git", "-C", repo_dir, "pull"], check=True)
83
- else:
84
- subprocess.run([
85
- "git", "clone",
86
- f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
87
- repo_dir
88
- ], check=True)
89
-
90
- # Update instance.json
91
- with open(instance_path, "w") as f:
92
  json.dump({"tunnel_url": url}, f)
93
-
94
- # Commit and push changes
95
  subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
96
  subprocess.run([
97
  "git", "-C", repo_dir,
@@ -100,9 +78,9 @@ def update_repo_with_tunnel(url):
100
  subprocess.run(["git", "-C", repo_dir, "push"], check=True)
101
 
102
  @app.route("/chat", methods=["GET"])
103
- def chat_endpoint():
104
- if not initialized:
105
- return jsonify({"error": "Service initializing"}), 503
106
  message = request.args.get("message", "")
107
  prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
108
  output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"])
@@ -110,13 +88,10 @@ def chat_endpoint():
110
 
111
  @app.route("/health")
112
  def health_check():
113
- return jsonify({
114
- "status": "ready" if initialized else "initializing",
115
- "model_loaded": os.path.exists(MODEL_PATH)
116
- }), 200 if initialized else 503
117
 
118
  if __name__ == "__main__":
119
- # Start initialization in background
120
  threading.Thread(target=background_init, daemon=True).start()
121
  # Start Flask server
122
  app.run(host="0.0.0.0", port=7860)
 
1
  import os
2
  import threading
3
+ from flask import Flask, jsonify, request
4
+ from llama_cpp import Llama
5
  import requests
6
  import subprocess
7
  import json
 
 
8
 
9
  app = Flask(__name__)
10
 
11
  # Configuration
12
+ MODEL_PATH = "/tmp/model/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
 
 
 
13
  GH_PAT = os.getenv("GH_PAT")
14
  REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
15
 
 
 
 
 
16
  def background_init():
17
+ """Handle time-consuming operations in background"""
18
  try:
19
+ # 1. Download model
20
+ os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
 
 
21
  if not os.path.exists(MODEL_PATH):
22
+ print("Starting model download...")
23
+ url = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
24
+ with requests.get(url, stream=True) as r:
25
  r.raise_for_status()
26
  with open(MODEL_PATH, "wb") as f:
27
  for chunk in r.iter_content(chunk_size=8192):
28
  f.write(chunk)
 
29
 
30
+ # 2. Initialize LLM
31
+ global llm
32
  llm = Llama(
33
  model_path=MODEL_PATH,
34
+ n_ctx=8192, # Reduced from 131072 for faster startup
35
  n_threads=2,
36
  n_gpu_layers=0,
37
  verbose=False
38
  )
39
 
40
+ # 3. Tunnel and Git operations
41
  tunnel_url = start_tunnel()
42
+ push_tunnel_url_to_repo(tunnel_url)
43
 
 
 
44
  except Exception as e:
45
+ print(f"Background init failed: {str(e)}")
46
 
47
  def start_tunnel():
48
+ """Start tunnel and return URL"""
49
  proc = subprocess.Popen(
50
  ["npx", "nport", "-s", "hf-space", "-p", "7860"],
51
  stdout=subprocess.PIPE,
52
+ stderr=subprocess.PIPE
 
53
  )
 
54
  # Wait for tunnel URL
55
+ for line in iter(proc.stdout.readline, b''):
56
+ if b"your domain is:" in line:
57
+ return line.decode().split("your domain is: ")[1].strip()
58
+ raise RuntimeError("Failed to get tunnel URL")
 
 
 
59
 
60
+ def push_tunnel_url_to_repo(url):
61
+ """Update repository with tunnel URL"""
62
+ repo_dir = "/tmp/repo"
63
+ subprocess.run(["rm", "-rf", repo_dir], check=True)
64
+ subprocess.run([
65
+ "git", "clone",
66
+ f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
67
+ repo_dir
68
+ ], check=True)
69
 
70
+ with open(f"{repo_dir}/instance.json", "w") as f:
 
 
 
 
 
 
 
 
 
 
 
71
  json.dump({"tunnel_url": url}, f)
72
+
 
73
  subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
74
  subprocess.run([
75
  "git", "-C", repo_dir,
 
78
  subprocess.run(["git", "-C", repo_dir, "push"], check=True)
79
 
80
  @app.route("/chat", methods=["GET"])
81
+ def chat():
82
+ if 'llm' not in globals():
83
+ return jsonify({"error": "Initializing, try again later"}), 503
84
  message = request.args.get("message", "")
85
  prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
86
  output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"])
 
88
 
89
  @app.route("/health")
90
  def health_check():
91
+ return "OK", 200
 
 
 
92
 
93
  if __name__ == "__main__":
94
+ # Start background initialization
95
  threading.Thread(target=background_init, daemon=True).start()
96
  # Start Flask server
97
  app.run(host="0.0.0.0", port=7860)