Spaces:

NitinBot001
/

llama3.2-api

Sleeping

App Files Files Community

llama3.2-api / app.py

NitinBot001

Update app.py

048224c verified 26 days ago

raw

history blame

3.98 kB

	import os
	import threading
	import requests
	import subprocess
	import json
	from flask import Flask, jsonify, request
	from llama_cpp import Llama

	app = Flask(__name__)

	# Configuration
	MODEL_DIR = "/data/model" # Persistent storage directory
	MODEL_NAME = "calme-3.3-llamaloi-3b.Q4_K_M.gguf"
	MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
	MODEL_URL = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
	GH_PAT = os.getenv("GH_PAT")
	REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"

	# Global state
	initialized = False
	llm = None

	def background_init():
	global initialized, llm
	try:
	# 1. Ensure model directory exists
	os.makedirs(MODEL_DIR, exist_ok=True)

	# 2. Download model if not exists
	if not os.path.exists(MODEL_PATH):
	print("Downloading model...")
	with requests.get(MODEL_URL, stream=True) as r:
	r.raise_for_status()
	with open(MODEL_PATH, "wb") as f:
	for chunk in r.iter_content(chunk_size=8192):
	f.write(chunk)
	print("Model download complete")

	# 3. Initialize LLM
	llm = Llama(
	model_path=MODEL_PATH,
	n_ctx=8192,
	n_threads=2,
	n_gpu_layers=0,
	verbose=False
	)

	# 4. Start tunnel and update repo
	tunnel_url = start_tunnel()
	update_repo_with_tunnel(tunnel_url)

	initialized = True
	print("Initialization complete")
	except Exception as e:
	print(f"Initialization failed: {str(e)}")

	def start_tunnel():
	"""Start nport tunnel and return URL"""
	proc = subprocess.Popen(
	["npx", "nport", "-s", "hf-space", "-p", "7860"],
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	text=True
	)

	# Wait for tunnel URL
	while True:
	line = proc.stdout.readline()
	if "your domain is:" in line:
	return line.split("your domain is: ")[1].strip()
	if proc.poll() is not None:
	break
	raise RuntimeError("Failed to establish tunnel")

	def update_repo_with_tunnel(url):
	"""Update GitHub repository with tunnel URL"""
	repo_dir = "/data/repo"
	instance_path = os.path.join(repo_dir, "instance.json")

	# Clone or update repository
	if os.path.exists(repo_dir):
	subprocess.run(["git", "-C", repo_dir, "pull"], check=True)
	else:
	subprocess.run([
	"git", "clone",
	f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
	repo_dir
	], check=True)

	# Update instance.json
	with open(instance_path, "w") as f:
	json.dump({"tunnel_url": url}, f)

	# Commit and push changes
	subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
	subprocess.run([
	"git", "-C", repo_dir,
	"commit", "-m", f"Update tunnel URL: {url}"
	], check=True)
	subprocess.run(["git", "-C", repo_dir, "push"], check=True)

	@app.route("/chat", methods=["GET"])
	def chat_endpoint():
	if not initialized:
	return jsonify({"error": "Service initializing"}), 503
	message = request.args.get("message", "")
	prompt = f"<\|begin_of_text\|><\|start_header_id\|>user<\|end_header_id\|>\n{message}<\|eot_id\|>\n<\|start_header_id\|>assistant<\|end_header_id\|>\n"
	output = llm(prompt, max_tokens=512, stop=["<\|eot_id\|>"])
	return jsonify({"response": output['choices'][0]['text'].strip()})

	@app.route("/health")
	def health_check():
	return jsonify({
	"status": "ready" if initialized else "initializing",
	"model_loaded": os.path.exists(MODEL_PATH)
	}), 200 if initialized else 503

	if __name__ == "__main__":
	# Start initialization in background
	threading.Thread(target=background_init, daemon=True).start()
	# Start Flask server
	app.run(host="0.0.0.0", port=7860)