Spaces:
Sleeping
Sleeping
File size: 3,982 Bytes
6cb858c 048224c 6cb858c 048224c 6cb858c 9dc0437 048224c 6cb858c 048224c 6cb858c 048224c 6cb858c 048224c 6cb858c 048224c 6cb858c 048224c 6cb858c 048224c 6cb858c 048224c 6cb858c 048224c 3e949dd 048224c 3e949dd 048224c 6cb858c 048224c 6cb858c 048224c 3e949dd 048224c 3e949dd 048224c 6cb858c 048224c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
import os
import threading
import requests
import subprocess
import json
from flask import Flask, jsonify, request
from llama_cpp import Llama
app = Flask(__name__)
# Configuration
MODEL_DIR = "/data/model" # Persistent storage directory
MODEL_NAME = "calme-3.3-llamaloi-3b.Q4_K_M.gguf"
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
MODEL_URL = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
GH_PAT = os.getenv("GH_PAT")
REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
# Global state
initialized = False
llm = None
def background_init():
global initialized, llm
try:
# 1. Ensure model directory exists
os.makedirs(MODEL_DIR, exist_ok=True)
# 2. Download model if not exists
if not os.path.exists(MODEL_PATH):
print("Downloading model...")
with requests.get(MODEL_URL, stream=True) as r:
r.raise_for_status()
with open(MODEL_PATH, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print("Model download complete")
# 3. Initialize LLM
llm = Llama(
model_path=MODEL_PATH,
n_ctx=8192,
n_threads=2,
n_gpu_layers=0,
verbose=False
)
# 4. Start tunnel and update repo
tunnel_url = start_tunnel()
update_repo_with_tunnel(tunnel_url)
initialized = True
print("Initialization complete")
except Exception as e:
print(f"Initialization failed: {str(e)}")
def start_tunnel():
"""Start nport tunnel and return URL"""
proc = subprocess.Popen(
["npx", "nport", "-s", "hf-space", "-p", "7860"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# Wait for tunnel URL
while True:
line = proc.stdout.readline()
if "your domain is:" in line:
return line.split("your domain is: ")[1].strip()
if proc.poll() is not None:
break
raise RuntimeError("Failed to establish tunnel")
def update_repo_with_tunnel(url):
"""Update GitHub repository with tunnel URL"""
repo_dir = "/data/repo"
instance_path = os.path.join(repo_dir, "instance.json")
# Clone or update repository
if os.path.exists(repo_dir):
subprocess.run(["git", "-C", repo_dir, "pull"], check=True)
else:
subprocess.run([
"git", "clone",
f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
repo_dir
], check=True)
# Update instance.json
with open(instance_path, "w") as f:
json.dump({"tunnel_url": url}, f)
# Commit and push changes
subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
subprocess.run([
"git", "-C", repo_dir,
"commit", "-m", f"Update tunnel URL: {url}"
], check=True)
subprocess.run(["git", "-C", repo_dir, "push"], check=True)
@app.route("/chat", methods=["GET"])
def chat_endpoint():
if not initialized:
return jsonify({"error": "Service initializing"}), 503
message = request.args.get("message", "")
prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"])
return jsonify({"response": output['choices'][0]['text'].strip()})
@app.route("/health")
def health_check():
return jsonify({
"status": "ready" if initialized else "initializing",
"model_loaded": os.path.exists(MODEL_PATH)
}), 200 if initialized else 503
if __name__ == "__main__":
# Start initialization in background
threading.Thread(target=background_init, daemon=True).start()
# Start Flask server
app.run(host="0.0.0.0", port=7860) |