Spaces:
Sleeping
Sleeping
File size: 4,150 Bytes
6cb858c 9dc0437 b0629f5 6cb858c 9dc0437 6cb858c 3e949dd 6cb858c 9dc0437 6cb858c 5853ec7 362683e 3e949dd 6cb858c 362683e 6cb858c 5853ec7 6cb858c 9dc0437 6cb858c 5853ec7 3e949dd 6cb858c 3e949dd 6cb858c 3e949dd 362683e 3e949dd 362683e 6cb858c 362683e 6cb858c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import os
import requests
from flask import Flask, request, jsonify
from llama_cpp import Llama
import subprocess
import time
import json
app = Flask(__name__)
# Use /tmp directory for storing the model
MODEL_DIR = "/tmp/model"
MODEL_PATH = os.path.join(MODEL_DIR, "calme-3.3-llamaloi-3b.Q4_K_M.gguf")
GH_PAT = "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" # GitHub Personal Access Token
REPO_URL = "https://github.com/NitinBot001/Audio-url-new-js.git"
def download_model():
os.makedirs(MODEL_DIR, exist_ok=True) # Create the /tmp/model directory
if not os.path.exists(MODEL_PATH):
print("Downloading model...")
r = requests.get(
"https://huggingface.co/MaziyarPanahi/calme-3.3-llamaloi-3b-GGUF/resolve/main/calme-3.3-llamaloi-3b.Q4_K_M.gguf",
stream=True,
)
with open(MODEL_PATH, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
def start_tunnel():
# Start nport tunnel
tunnel_process = subprocess.Popen(
["npx", "nport", "-s", "ai-service", "-p", "5000"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
time.sleep(10) # Wait for tunnel to establish
# Extract tunnel URL from logs
tunnel_url = None
for line in iter(tunnel_process.stdout.readline, b""):
line = line.decode("utf-8").strip()
if "your domain is:" in line:
tunnel_url = line.split("your domain is: ")[1]
break
if not tunnel_url:
raise Exception("Failed to extract tunnel URL")
return tunnel_url
def push_tunnel_url_to_repo(tunnel_url):
# Create instance.json
instance_data = {"tunnel_url": tunnel_url}
with open("/tmp/instance.json", "w") as f:
json.dump(instance_data, f)
# Clone the repository
repo_dir = "/tmp/repo"
repo_url = f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git"
# Remove the existing repo directory if it exists
if os.path.exists(repo_dir):
subprocess.run(["rm", "-rf", repo_dir], check=True)
subprocess.run(
["git", "clone", repo_url, repo_dir],
check=True,
)
os.chdir(repo_dir)
# Move instance.json to the repository
subprocess.run(["mv", "/tmp/instance.json", "."], check=True)
# Configure Git locally (without --global)
subprocess.run(["git", "config", "user.email", "[email protected]"], check=True)
subprocess.run(["git", "config", "user.name", "github-actions"], check=True)
# Add the file to the staging area
subprocess.run(["git", "add", "instance.json"], check=True)
# Check if there are changes to commit
result = subprocess.run(["git", "status", "--porcelain"], stdout=subprocess.PIPE, text=True)
if result.stdout.strip(): # If there are changes
subprocess.run(["git", "commit", "-m", f"Update tunnel URL to {tunnel_url}"], check=True)
subprocess.run(["git", "push", "origin", "main"], check=True)
else:
print("No changes to commit.")
@app.route("/chat", methods=["GET"])
def chat():
# Get the message from the query parameter
message = request.args.get("message", "")
# Construct the prompt without duplicate special tokens
prompt = (
f"<|begin_of_text|>"
f"<|start_header_id|>user<|end_header_id|>\n"
f"{message}"
f"<|eot_id|>\n"
f"<|start_header_id|>assistant<|end_header_id|>\n"
)
output = llm(
prompt,
max_tokens=2048,
stop=["<|eot_id|>"],
temperature=0.8,
top_p=0.9,
)
return jsonify({"response": output["choices"][0]["text"].strip()})
if __name__ == "__main__":
# Download the model
download_model()
# Initialize the LLM
llm = Llama(
model_path=MODEL_PATH,
n_ctx=131072, # Set to match the training context length
n_threads=2,
n_gpu_layers=0,
verbose=False,
)
# Start the tunnel and push the URL
tunnel_url = start_tunnel()
push_tunnel_url_to_repo(tunnel_url)
# Run the Flask app
app.run(host="0.0.0.0", port=5000) |