Spaces:

NitinBot001
/

llama3.2-api

Sleeping

App Files Files Community

NitinBot001 commited on 18 days ago

Commit

04816bd

verified ·

1 Parent(s): fb6c9af

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -78

app.py CHANGED Viewed

@@ -1,97 +1,116 @@
 import os
-import threading
-from flask import Flask, jsonify, request
-from llama_cpp import Llama
 import requests
 import subprocess
 import json
 app = Flask(__name__)
-# Configuration
-MODEL_PATH = "/tmp/model/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
-GH_PAT = os.getenv("GH_PAT")
 REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
-def background_init():
-    """Handle time-consuming operations in background"""
-    try:
-        # 1. Download model
-        os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
-        if not os.path.exists(MODEL_PATH):
-            print("Starting model download...")
-            url = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
-            with requests.get(url, stream=True) as r:
-                r.raise_for_status()
-                with open(MODEL_PATH, "wb") as f:
-                    for chunk in r.iter_content(chunk_size=8192):
-                        f.write(chunk)
-        # 2. Initialize LLM
-        global llm
-        llm = Llama(
-            model_path=MODEL_PATH,
-            n_ctx=8192,  # Reduced from 131072 for faster startup
-            n_threads=2,
-            n_gpu_layers=0,
-            verbose=False
         )
-        # 3. Tunnel and Git operations
-        tunnel_url = start_tunnel()
-        push_tunnel_url_to_repo(tunnel_url)
-    except Exception as e:
-        print(f"Background init failed: {str(e)}")
 def start_tunnel():
-    """Start tunnel and return URL"""
-    proc = subprocess.Popen(
-        ["npx", "nport", "-s", "hf-space", "-p", "7860"],
         stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE
     )
-    # Wait for tunnel URL
-    for line in iter(proc.stdout.readline, b''):
-        if b"your domain is:" in line:
-            return line.decode().split("your domain is: ")[1].strip()
-    raise RuntimeError("Failed to get tunnel URL")
-def push_tunnel_url_to_repo(url):
-    """Update repository with tunnel URL"""
     repo_dir = "/tmp/repo"
-    subprocess.run(["rm", "-rf", repo_dir], check=True)
-    subprocess.run([
-        "git", "clone",
-        f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
-        repo_dir
-    ], check=True)
-    with open(f"{repo_dir}/instance.json", "w") as f:
-        json.dump({"tunnel_url": url}, f)
-    subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
-    subprocess.run([
-        "git", "-C", repo_dir,
-        "commit", "-m", f"Update tunnel URL: {url}"
-    ], check=True)
-    subprocess.run(["git", "-C", repo_dir, "push"], check=True)
-@app.route("/chat", methods=["GET"])
-def chat():
-    if 'llm' not in globals():
-        return jsonify({"error": "Initializing, try again later"}), 503
-    message = request.args.get("message", "")
-    prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
-    output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"])
-    return jsonify({"response": output['choices'][0]['text'].strip()})
-@app.route("/health")
-def health_check():
-    return "OK", 200
 if __name__ == "__main__":
-    # Start background initialization
-    threading.Thread(target=background_init, daemon=True).start()
-    # Start Flask server
     app.run(host="0.0.0.0", port=7860)

 import os
 import requests
+from flask import Flask, request, jsonify
+from llama_cpp import Llama
 import subprocess
+import time
 import json
 app = Flask(__name__)
+# Use /tmp directory for storing the model
+MODEL_DIR = "/tmp/model"
+MODEL_PATH = os.path.join(MODEL_DIR, "calme-3.3-llamaloi-3b.Q4_K_M.gguf")
+GH_PAT = os.getenv("GH_PAT")  # GitHub Personal Access Token
 REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
+def download_model():
+    os.makedirs(MODEL_DIR, exist_ok=True)  # Create the /tmp/model directory
+    if not os.path.exists(MODEL_PATH):
+        print("Downloading model...")
+        r = requests.get(
+            "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf",
+            stream=True,
         )
+        with open(MODEL_PATH, "wb") as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
 def start_tunnel():
+    # Start nport tunnel
+    tunnel_process = subprocess.Popen(
+        ["npx", "nport", "-s", "ai-service", "-p", "7860"],  # Use port 7860
         stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
     )
+    time.sleep(10)  # Wait for tunnel to establish
+    # Extract tunnel URL from logs
+    tunnel_url = None
+    for line in iter(tunnel_process.stdout.readline, b""):
+        line = line.decode("utf-8").strip()
+        if "your domain is:" in line:
+            tunnel_url = line.split("your domain is: ")[1]
+            break
+    if not tunnel_url:
+        raise Exception("Failed to extract tunnel URL")
+    return tunnel_url
+def push_tunnel_url_to_repo(tunnel_url):
+    # Create instance.json
+    instance_data = {"tunnel_url": tunnel_url}
+    with open("/tmp/instance.json", "w") as f:
+        json.dump(instance_data, f)
+    # Clone the repository
     repo_dir = "/tmp/repo"
+    repo_url = f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git"
+    subprocess.run(
+        ["git", "clone", repo_url, repo_dir],
+        check=True,
+    )
+    os.chdir(repo_dir)
+    # Move instance.json to the repository
+    subprocess.run(["mv", "/tmp/instance.json", "."], check=True)
+    # Configure Git locally (without --global)
+    subprocess.run(["git", "config", "user.email", "[email protected]"], check=True)
+    subprocess.run(["git", "config", "user.name", "github-actions"], check=True)
+    # Commit and push changes
+    subprocess.run(["git", "add", "instance.json"], check=True)
+    subprocess.run(["git", "commit", "-m", f"Update tunnel URL to {tunnel_url}"], check=True)
+    subprocess.run(["git", "push", "origin", "main"], check=True)
+@app.route("/chat", methods=["POST"])
+def chat():
+    data = request.json
+    # Construct the prompt without duplicate special tokens
+    prompt = (
+        f"<|begin_of_text|>"
+        f"<|start_header_id|>user<|end_header_id|>\n"
+        f"{data.get('message', '')}"
+        f"<|eot_id|>\n"
+        f"<|start_header_id|>assistant<|end_header_id|>\n"
+    )
+    output = llm(
+        prompt,
+        max_tokens=2048,
+        stop=["<|eot_id|>"],
+        temperature=0.8,
+        top_p=0.9,
+    )
+    return jsonify({"response": output["choices"][0]["text"].strip()})
 if __name__ == "__main__":
+    # Download the model
+    download_model()
+    # Initialize the LLM
+    llm = Llama(
+        model_path=MODEL_PATH,
+        n_ctx=131072,  # Set to match the training context length
+        n_threads=2,
+        n_gpu_layers=0,
+        verbose=False,
+    )
+    # Start the tunnel and push the URL
+    tunnel_url = start_tunnel()
+    push_tunnel_url_to_repo(tunnel_url)
+    # Run the Flask app (for development only)
     app.run(host="0.0.0.0", port=7860)