Spaces:

NitinBot001
/

llama3.2-api

Sleeping

App Files Files Community

NitinBot001 commited on 20 days ago

Commit

f74fbda

verified ·

1 Parent(s): 80e1c62

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -62

app.py CHANGED Viewed

@@ -1,97 +1,75 @@
 import os
 import threading
 import requests
 import subprocess
 import json
-from flask import Flask, jsonify, request
-from llama_cpp import Llama
 app = Flask(__name__)
 # Configuration
-MODEL_DIR = "/data/model"  # Persistent storage directory
-MODEL_NAME = "calme-3.3-llamaloi-3b.Q4_K_M.gguf"
-MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
-MODEL_URL = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
 GH_PAT = os.getenv("GH_PAT")
 REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
-# Global state
-initialized = False
-llm = None
 def background_init():
-    global initialized, llm
     try:
-        # 1. Ensure model directory exists
-        os.makedirs(MODEL_DIR, exist_ok=True)
-        # 2. Download model if not exists
         if not os.path.exists(MODEL_PATH):
-            print("Downloading model...")
-            with requests.get(MODEL_URL, stream=True) as r:
                 r.raise_for_status()
                 with open(MODEL_PATH, "wb") as f:
                     for chunk in r.iter_content(chunk_size=8192):
                         f.write(chunk)
-            print("Model download complete")
-        # 3. Initialize LLM
         llm = Llama(
             model_path=MODEL_PATH,
-            n_ctx=8192,
             n_threads=2,
             n_gpu_layers=0,
             verbose=False
         )
-        # 4. Start tunnel and update repo
         tunnel_url = start_tunnel()
-        update_repo_with_tunnel(tunnel_url)
-        initialized = True
-        print("Initialization complete")
     except Exception as e:
-        print(f"Initialization failed: {str(e)}")
 def start_tunnel():
-    """Start nport tunnel and return URL"""
     proc = subprocess.Popen(
         ["npx", "nport", "-s", "hf-space", "-p", "7860"],
         stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        text=True
     )
     # Wait for tunnel URL
-    while True:
-        line = proc.stdout.readline()
-        if "your domain is:" in line:
-            return line.split("your domain is: ")[1].strip()
-        if proc.poll() is not None:
-            break
-    raise RuntimeError("Failed to establish tunnel")
-def update_repo_with_tunnel(url):
-    """Update GitHub repository with tunnel URL"""
-    repo_dir = "/data/repo"
-    instance_path = os.path.join(repo_dir, "instance.json")
-    # Clone or update repository
-    if os.path.exists(repo_dir):
-        subprocess.run(["git", "-C", repo_dir, "pull"], check=True)
-    else:
-        subprocess.run([
-            "git", "clone",
-            f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
-            repo_dir
-        ], check=True)
-    # Update instance.json
-    with open(instance_path, "w") as f:
         json.dump({"tunnel_url": url}, f)
-    # Commit and push changes
     subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
     subprocess.run([
         "git", "-C", repo_dir,
@@ -100,9 +78,9 @@ def update_repo_with_tunnel(url):
     subprocess.run(["git", "-C", repo_dir, "push"], check=True)
 @app.route("/chat", methods=["GET"])
-def chat_endpoint():
-    if not initialized:
-        return jsonify({"error": "Service initializing"}), 503
     message = request.args.get("message", "")
     prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
     output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"])
@@ -110,13 +88,10 @@ def chat_endpoint():
 @app.route("/health")
 def health_check():
-    return jsonify({
-        "status": "ready" if initialized else "initializing",
-        "model_loaded": os.path.exists(MODEL_PATH)
-    }), 200 if initialized else 503
 if __name__ == "__main__":
-    # Start initialization in background
     threading.Thread(target=background_init, daemon=True).start()
     # Start Flask server
     app.run(host="0.0.0.0", port=7860)

 import os
 import threading
+from flask import Flask, jsonify, request
+from llama_cpp import Llama
 import requests
 import subprocess
 import json
 app = Flask(__name__)
 # Configuration
+MODEL_PATH = "/tmp/model/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
 GH_PAT = os.getenv("GH_PAT")
 REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
 def background_init():
+    """Handle time-consuming operations in background"""
     try:
+        # 1. Download model
+        os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
         if not os.path.exists(MODEL_PATH):
+            print("Starting model download...")
+            url = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
+            with requests.get(url, stream=True) as r:
                 r.raise_for_status()
                 with open(MODEL_PATH, "wb") as f:
                     for chunk in r.iter_content(chunk_size=8192):
                         f.write(chunk)
+        # 2. Initialize LLM
+        global llm
         llm = Llama(
             model_path=MODEL_PATH,
+            n_ctx=8192,  # Reduced from 131072 for faster startup
             n_threads=2,
             n_gpu_layers=0,
             verbose=False
         )
+        # 3. Tunnel and Git operations
         tunnel_url = start_tunnel()
+        push_tunnel_url_to_repo(tunnel_url)
     except Exception as e:
+        print(f"Background init failed: {str(e)}")
 def start_tunnel():
+    """Start tunnel and return URL"""
     proc = subprocess.Popen(
         ["npx", "nport", "-s", "hf-space", "-p", "7860"],
         stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE
     )
     # Wait for tunnel URL
+    for line in iter(proc.stdout.readline, b''):
+        if b"your domain is:" in line:
+            return line.decode().split("your domain is: ")[1].strip()
+    raise RuntimeError("Failed to get tunnel URL")
+def push_tunnel_url_to_repo(url):
+    """Update repository with tunnel URL"""
+    repo_dir = "/tmp/repo"
+    subprocess.run(["rm", "-rf", repo_dir], check=True)
+    subprocess.run([
+        "git", "clone",
+        f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
+        repo_dir
+    ], check=True)
+    with open(f"{repo_dir}/instance.json", "w") as f:
         json.dump({"tunnel_url": url}, f)
     subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
     subprocess.run([
         "git", "-C", repo_dir,
     subprocess.run(["git", "-C", repo_dir, "push"], check=True)
 @app.route("/chat", methods=["GET"])
+def chat():
+    if 'llm' not in globals():
+        return jsonify({"error": "Initializing, try again later"}), 503
     message = request.args.get("message", "")
     prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
     output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"])
 @app.route("/health")
 def health_check():
+    return "OK", 200
 if __name__ == "__main__":
+    # Start background initialization
     threading.Thread(target=background_init, daemon=True).start()
     # Start Flask server
     app.run(host="0.0.0.0", port=7860)