File size: 3,982 Bytes
6cb858c
048224c
6cb858c
 
 
048224c
 
6cb858c
 
9dc0437
048224c
 
 
 
 
 
6cb858c
 
048224c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cb858c
048224c
 
 
 
 
 
 
 
 
6cb858c
 
048224c
 
 
6cb858c
 
048224c
6cb858c
048224c
 
 
 
6cb858c
048224c
 
6cb858c
048224c
6cb858c
048224c
 
 
 
3e949dd
048224c
3e949dd
048224c
 
 
 
 
 
 
6cb858c
048224c
 
 
6cb858c
048224c
 
 
 
 
 
 
3e949dd
 
048224c
 
 
3e949dd
048224c
 
 
 
 
 
 
 
 
 
6cb858c
 
048224c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
import threading
import requests
import subprocess
import json
from flask import Flask, jsonify, request
from llama_cpp import Llama

app = Flask(__name__)

# Configuration
MODEL_DIR = "/data/model"  # Persistent storage directory
MODEL_NAME = "calme-3.3-llamaloi-3b.Q4_K_M.gguf"
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
MODEL_URL = "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf"
GH_PAT = os.getenv("GH_PAT")
REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"

# Global state
initialized = False
llm = None

def background_init():
    global initialized, llm
    try:
        # 1. Ensure model directory exists
        os.makedirs(MODEL_DIR, exist_ok=True)
        
        # 2. Download model if not exists
        if not os.path.exists(MODEL_PATH):
            print("Downloading model...")
            with requests.get(MODEL_URL, stream=True) as r:
                r.raise_for_status()
                with open(MODEL_PATH, "wb") as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
            print("Model download complete")

        # 3. Initialize LLM
        llm = Llama(
            model_path=MODEL_PATH,
            n_ctx=8192,
            n_threads=2,
            n_gpu_layers=0,
            verbose=False
        )

        # 4. Start tunnel and update repo
        tunnel_url = start_tunnel()
        update_repo_with_tunnel(tunnel_url)
        
        initialized = True
        print("Initialization complete")
    except Exception as e:
        print(f"Initialization failed: {str(e)}")

def start_tunnel():
    """Start nport tunnel and return URL"""
    proc = subprocess.Popen(
        ["npx", "nport", "-s", "hf-space", "-p", "7860"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True
    )
    
    # Wait for tunnel URL
    while True:
        line = proc.stdout.readline()
        if "your domain is:" in line:
            return line.split("your domain is: ")[1].strip()
        if proc.poll() is not None:
            break
    raise RuntimeError("Failed to establish tunnel")

def update_repo_with_tunnel(url):
    """Update GitHub repository with tunnel URL"""
    repo_dir = "/data/repo"
    instance_path = os.path.join(repo_dir, "instance.json")
    
    # Clone or update repository
    if os.path.exists(repo_dir):
        subprocess.run(["git", "-C", repo_dir, "pull"], check=True)
    else:
        subprocess.run([
            "git", "clone",
            f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git",
            repo_dir
        ], check=True)

    # Update instance.json
    with open(instance_path, "w") as f:
        json.dump({"tunnel_url": url}, f)

    # Commit and push changes
    subprocess.run(["git", "-C", repo_dir, "add", "."], check=True)
    subprocess.run([
        "git", "-C", repo_dir,
        "commit", "-m", f"Update tunnel URL: {url}"
    ], check=True)
    subprocess.run(["git", "-C", repo_dir, "push"], check=True)

@app.route("/chat", methods=["GET"])
def chat_endpoint():
    if not initialized:
        return jsonify({"error": "Service initializing"}), 503
    message = request.args.get("message", "")
    prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
    output = llm(prompt, max_tokens=512, stop=["<|eot_id|>"])
    return jsonify({"response": output['choices'][0]['text'].strip()})

@app.route("/health")
def health_check():
    return jsonify({
        "status": "ready" if initialized else "initializing",
        "model_loaded": os.path.exists(MODEL_PATH)
    }), 200 if initialized else 503

if __name__ == "__main__":
    # Start initialization in background
    threading.Thread(target=background_init, daemon=True).start()
    # Start Flask server
    app.run(host="0.0.0.0", port=7860)