File size: 4,150 Bytes
6cb858c
 
 
 
 
 
 
 
 
9dc0437
 
 
 
b0629f5
6cb858c
 
 
9dc0437
6cb858c
 
 
 
 
 
 
 
 
 
 
 
 
3e949dd
6cb858c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9dc0437
6cb858c
 
 
5853ec7
362683e
3e949dd
 
 
 
 
6cb858c
362683e
6cb858c
 
5853ec7
6cb858c
 
9dc0437
6cb858c
5853ec7
 
 
 
3e949dd
6cb858c
 
3e949dd
 
 
 
 
 
 
 
 
6cb858c
3e949dd
 
 
362683e
 
 
 
3e949dd
362683e
 
 
 
 
 
 
 
 
 
6cb858c
 
 
 
 
 
 
 
 
362683e
6cb858c
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import requests
from flask import Flask, request, jsonify
from llama_cpp import Llama
import subprocess
import time
import json

app = Flask(__name__)

# Use /tmp directory for storing the model
MODEL_DIR = "/tmp/model"
MODEL_PATH = os.path.join(MODEL_DIR, "calme-3.3-llamaloi-3b.Q4_K_M.gguf")
GH_PAT = "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"  # GitHub Personal Access Token
REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"

def download_model():
    os.makedirs(MODEL_DIR, exist_ok=True)  # Create the /tmp/model directory
    if not os.path.exists(MODEL_PATH):
        print("Downloading model...")
        r = requests.get(
            "https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf",
            stream=True,
        )
        with open(MODEL_PATH, "wb") as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)

def start_tunnel():
    # Start nport tunnel
    tunnel_process = subprocess.Popen(
        ["npx", "nport", "-s", "ai-service", "-p", "5000"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    time.sleep(10)  # Wait for tunnel to establish

    # Extract tunnel URL from logs
    tunnel_url = None
    for line in iter(tunnel_process.stdout.readline, b""):
        line = line.decode("utf-8").strip()
        if "your domain is:" in line:
            tunnel_url = line.split("your domain is: ")[1]
            break

    if not tunnel_url:
        raise Exception("Failed to extract tunnel URL")

    return tunnel_url

def push_tunnel_url_to_repo(tunnel_url):
    # Create instance.json
    instance_data = {"tunnel_url": tunnel_url}
    with open("/tmp/instance.json", "w") as f:
        json.dump(instance_data, f)

    # Clone the repository
    repo_dir = "/tmp/repo"
    repo_url = f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git"
    
    # Remove the existing repo directory if it exists
    if os.path.exists(repo_dir):
        subprocess.run(["rm", "-rf", repo_dir], check=True)
    
    subprocess.run(
        ["git", "clone", repo_url, repo_dir],
        check=True,
    )
    os.chdir(repo_dir)

    # Move instance.json to the repository
    subprocess.run(["mv", "/tmp/instance.json", "."], check=True)

    # Configure Git locally (without --global)
    subprocess.run(["git", "config", "user.email", "[email protected]"], check=True)
    subprocess.run(["git", "config", "user.name", "github-actions"], check=True)

    # Add the file to the staging area
    subprocess.run(["git", "add", "instance.json"], check=True)

    # Check if there are changes to commit
    result = subprocess.run(["git", "status", "--porcelain"], stdout=subprocess.PIPE, text=True)
    if result.stdout.strip():  # If there are changes
        subprocess.run(["git", "commit", "-m", f"Update tunnel URL to {tunnel_url}"], check=True)
        subprocess.run(["git", "push", "origin", "main"], check=True)
    else:
        print("No changes to commit.")

@app.route("/chat", methods=["GET"])
def chat():
    # Get the message from the query parameter
    message = request.args.get("message", "")
    
    # Construct the prompt without duplicate special tokens
    prompt = (
        f"<|begin_of_text|>"
        f"<|start_header_id|>user<|end_header_id|>\n"
        f"{message}"
        f"<|eot_id|>\n"
        f"<|start_header_id|>assistant<|end_header_id|>\n"
    )
    output = llm(
        prompt,
        max_tokens=2048,
        stop=["<|eot_id|>"],
        temperature=0.8,
        top_p=0.9,
    )
    return jsonify({"response": output["choices"][0]["text"].strip()})

if __name__ == "__main__":
    # Download the model
    download_model()

    # Initialize the LLM
    llm = Llama(
        model_path=MODEL_PATH,
        n_ctx=131072,  # Set to match the training context length
        n_threads=2,
        n_gpu_layers=0,
        verbose=False,
    )

    # Start the tunnel and push the URL
    tunnel_url = start_tunnel()
    push_tunnel_url_to_repo(tunnel_url)

    # Run the Flask app
    app.run(host="0.0.0.0", port=5000)