import os import requests from flask import Flask, request, jsonify from llama_cpp import Llama import subprocess import time import json app = Flask(__name__) # Use /tmp directory for storing the model MODEL_DIR = "/tmp/model" MODEL_PATH = os.path.join(MODEL_DIR, "calme-3.3-llamaloi-3b.Q4_K_M.gguf") GH_PAT = os.getenv("GH_PAT") # GitHub Personal Access Token REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git" def download_model(): os.makedirs(MODEL_DIR, exist_ok=True) # Create the /tmp/model directory if not os.path.exists(MODEL_PATH): print("Downloading model...") r = requests.get( "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf", stream=True, ) with open(MODEL_PATH, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) def start_tunnel(): # Start nport tunnel tunnel_process = subprocess.Popen( ["npx", "nport", "-s", "ai-service", "-p", "7860"], # Use port 7860 stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) time.sleep(10) # Wait for tunnel to establish # Extract tunnel URL from logs tunnel_url = None for line in iter(tunnel_process.stdout.readline, b""): line = line.decode("utf-8").strip() if "your domain is:" in line: tunnel_url = line.split("your domain is: ")[1] break if not tunnel_url: raise Exception("Failed to extract tunnel URL") return tunnel_url def push_tunnel_url_to_repo(tunnel_url): # Create instance.json instance_data = {"tunnel_url": tunnel_url} with open("/tmp/instance.json", "w") as f: json.dump(instance_data, f) # Clone the repository repo_dir = "/tmp/repo" repo_url = f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git" subprocess.run( ["git", "clone", repo_url, repo_dir], check=True, ) os.chdir(repo_dir) # Move instance.json to the repository subprocess.run(["mv", "/tmp/instance.json", "."], check=True) # Configure Git locally (without --global) subprocess.run(["git", "config", "user.email", "github-actions@github.com"], check=True) subprocess.run(["git", "config", "user.name", "github-actions"], check=True) # Commit and push changes subprocess.run(["git", "add", "instance.json"], check=True) subprocess.run(["git", "commit", "-m", f"Update tunnel URL to {tunnel_url}"], check=True) subprocess.run(["git", "push", "origin", "main"], check=True) @app.route("/chat", methods=["POST"]) def chat(): data = request.json # Construct the prompt without duplicate special tokens prompt = ( f"<|begin_of_text|>" f"<|start_header_id|>user<|end_header_id|>\n" f"{data.get('message', '')}" f"<|eot_id|>\n" f"<|start_header_id|>assistant<|end_header_id|>\n" ) output = llm( prompt, max_tokens=2048, stop=["<|eot_id|>"], temperature=0.8, top_p=0.9, ) return jsonify({"response": output["choices"][0]["text"].strip()}) if __name__ == "__main__": # Download the model download_model() # Initialize the LLM llm = Llama( model_path=MODEL_PATH, n_ctx=131072, # Set to match the training context length n_threads=2, n_gpu_layers=0, verbose=False, ) # Start the tunnel and push the URL tunnel_url = start_tunnel() push_tunnel_url_to_repo(tunnel_url) # Run the Flask app (for development only) app.run(host="0.0.0.0", port=7860)