import os import threading from flask import Flask, jsonify, request from llama_cpp import Llama import requests import subprocess import json app = Flask(__name__) # Configuration MODEL_PATH = "/tmp/model/calme-3.3-llamaloi-3b.Q4_K_M.gguf" GH_PAT = os.getenv("GH_PAT") REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git" def background_init(): """Handle time-consuming operations in background""" try: # 1. Download model os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True) if not os.path.exists(MODEL_PATH): print("Starting model download...") url = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf" with requests.get(url, stream=True) as r: r.raise_for_status() with open(MODEL_PATH, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) # 2. Initialize LLM global llm llm = Llama( model_path=MODEL_PATH, n_ctx=8192, # Reduced from 131072 for faster startup n_threads=2, n_gpu_layers=0, verbose=False ) # 3. Tunnel and Git operations tunnel_url = start_tunnel() push_tunnel_url_to_repo(tunnel_url) except Exception as e: print(f"Background init failed: {str(e)}") def start_tunnel(): """Start tunnel and return URL""" proc = subprocess.Popen( ["npx", "nport", "-s", "hf-space", "-p", "7860"], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) # Wait for tunnel URL for line in iter(proc.stdout.readline, b''): if b"your domain is:" in line: return line.decode().split("your domain is: ")[1].strip() raise RuntimeError("Failed to get tunnel URL") def push_tunnel_url_to_repo(url): """Update repository with tunnel URL""" repo_dir = "/tmp/repo" subprocess.run(["rm", "-rf", repo_dir], check=True) subprocess.run([ "git", "clone", f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git", repo_dir ], check=True) with open(f"{repo_dir}/instance.json", "w") as f: json.dump({"tunnel_url": url}, f) subprocess.run(["git", "-C", repo_dir, "add", "."], check=True) subprocess.run([ "git", "-C", repo_dir, "commit", "-m", f"Update tunnel URL: {url}" ], check=True) subprocess.run(["git", "-C", repo_dir, "push"], check=True) @app.route("/chat", methods=["GET"]) def chat(): if 'llm' not in globals(): return jsonify({"error": "Initializing, try again later"}), 503 message = request.args.get("message", "") prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n" output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"]) return jsonify({"response": output['choices'][0]['text'].strip()}) @app.route("/health") def health_check(): return "OK", 200 if __name__ == "__main__": # Start background initialization threading.Thread(target=background_init, daemon=True).start() # Start Flask server app.run(host="0.0.0.0", port=7860)