Spaces:
Sleeping
Sleeping
File size: 3,330 Bytes
6cb858c 048224c f74fbda 6cb858c 9dc0437 048224c f74fbda 048224c 6cb858c 048224c f74fbda 048224c f74fbda 048224c f74fbda 048224c f74fbda 048224c f74fbda 048224c 6cb858c 048224c f74fbda 048224c f74fbda 048224c f74fbda 6cb858c f74fbda 048224c 6cb858c f74fbda 6cb858c 048224c f74fbda 6cb858c f74fbda 3e949dd f74fbda 048224c f74fbda 048224c 3e949dd f74fbda 3e949dd 048224c f74fbda 6cb858c f74fbda 048224c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import os
import threading
from flask import Flask, jsonify, request
from llama_cpp import Llama
import requests
import subprocess
import json
app = Flask(__name__)
# Configuration
MODEL_PATH = "/tmp/model/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
GH_PAT = os.getenv("GH_PAT")
REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
def background_init():
"""Handle time-consuming operations in background"""
try:
# 1. Download model
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
if not os.path.exists(MODEL_PATH):
print("Starting model download...")
url = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
with requests.get(url, stream=True) as r:
r.raise_for_status()
with open(MODEL_PATH, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# 2. Initialize LLM
global llm
llm = Llama(
model_path=MODEL_PATH,
n_ctx=8192, # Reduced from 131072 for faster startup
n_threads=2,
n_gpu_layers=0,
verbose=False
)
# 3. Tunnel and Git operations
tunnel_url = start_tunnel()
push_tunnel_url_to_repo(tunnel_url)
except Exception as e:
print(f"Background init failed: {str(e)}")
def start_tunnel():
"""Start tunnel and return URL"""
proc = subprocess.Popen(
["npx", "nport", "-s", "hf-space", "-p", "7860"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
# Wait for tunnel URL
for line in iter(proc.stdout.readline, b''):
if b"your domain is:" in line:
return line.decode().split("your domain is: ")[1].strip()
raise RuntimeError("Failed to get tunnel URL")
def push_tunnel_url_to_repo(url):
"""Update repository with tunnel URL"""
repo_dir = "/tmp/repo"
subprocess.run(["rm", "-rf", repo_dir], check=True)
subprocess.run([
"git", "clone",
f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
repo_dir
], check=True)
with open(f"{repo_dir}/instance.json", "w") as f:
json.dump({"tunnel_url": url}, f)
subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
subprocess.run([
"git", "-C", repo_dir,
"commit", "-m", f"Update tunnel URL: {url}"
], check=True)
subprocess.run(["git", "-C", repo_dir, "push"], check=True)
@app.route("/chat", methods=["GET"])
def chat():
if 'llm' not in globals():
return jsonify({"error": "Initializing, try again later"}), 503
message = request.args.get("message", "")
prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"])
return jsonify({"response": output['choices'][0]['text'].strip()})
@app.route("/health")
def health_check():
return "OK", 200
if __name__ == "__main__":
# Start background initialization
threading.Thread(target=background_init, daemon=True).start()
# Start Flask server
app.run(host="0.0.0.0", port=7860) |