llama3.2-api / app.py
NitinBot001's picture
Update app.py
048224c verified
raw
history blame
3.98 kB
import os
import threading
import requests
import subprocess
import json
from flask import Flask, jsonify, request
from llama_cpp import Llama
app = Flask(__name__)
# Configuration
MODEL_DIR = "/data/model" # Persistent storage directory
MODEL_NAME = "calme-3.3-llamaloi-3b.Q4_K_M.gguf"
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
MODEL_URL = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
GH_PAT = os.getenv("GH_PAT")
REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
# Global state
initialized = False
llm = None
def background_init():
global initialized, llm
try:
# 1. Ensure model directory exists
os.makedirs(MODEL_DIR, exist_ok=True)
# 2. Download model if not exists
if not os.path.exists(MODEL_PATH):
print("Downloading model...")
with requests.get(MODEL_URL, stream=True) as r:
r.raise_for_status()
with open(MODEL_PATH, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print("Model download complete")
# 3. Initialize LLM
llm = Llama(
model_path=MODEL_PATH,
n_ctx=8192,
n_threads=2,
n_gpu_layers=0,
verbose=False
)
# 4. Start tunnel and update repo
tunnel_url = start_tunnel()
update_repo_with_tunnel(tunnel_url)
initialized = True
print("Initialization complete")
except Exception as e:
print(f"Initialization failed: {str(e)}")
def start_tunnel():
"""Start nport tunnel and return URL"""
proc = subprocess.Popen(
["npx", "nport", "-s", "hf-space", "-p", "7860"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# Wait for tunnel URL
while True:
line = proc.stdout.readline()
if "your domain is:" in line:
return line.split("your domain is: ")[1].strip()
if proc.poll() is not None:
break
raise RuntimeError("Failed to establish tunnel")
def update_repo_with_tunnel(url):
"""Update GitHub repository with tunnel URL"""
repo_dir = "/data/repo"
instance_path = os.path.join(repo_dir, "instance.json")
# Clone or update repository
if os.path.exists(repo_dir):
subprocess.run(["git", "-C", repo_dir, "pull"], check=True)
else:
subprocess.run([
"git", "clone",
f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
repo_dir
], check=True)
# Update instance.json
with open(instance_path, "w") as f:
json.dump({"tunnel_url": url}, f)
# Commit and push changes
subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
subprocess.run([
"git", "-C", repo_dir,
"commit", "-m", f"Update tunnel URL: {url}"
], check=True)
subprocess.run(["git", "-C", repo_dir, "push"], check=True)
@app.route("/chat", methods=["GET"])
def chat_endpoint():
if not initialized:
return jsonify({"error": "Service initializing"}), 503
message = request.args.get("message", "")
prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"])
return jsonify({"response": output['choices'][0]['text'].strip()})
@app.route("/health")
def health_check():
return jsonify({
"status": "ready" if initialized else "initializing",
"model_loaded": os.path.exists(MODEL_PATH)
}), 200 if initialized else 503
if __name__ == "__main__":
# Start initialization in background
threading.Thread(target=background_init, daemon=True).start()
# Start Flask server
app.run(host="0.0.0.0", port=7860)