Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -29,7 +29,7 @@ def download_model():
|
|
29 |
def start_tunnel():
|
30 |
# Start nport tunnel
|
31 |
tunnel_process = subprocess.Popen(
|
32 |
-
["npx", "nport", "-s", "ai-service", "-p", "5000"],
|
33 |
stdout=subprocess.PIPE,
|
34 |
stderr=subprocess.PIPE,
|
35 |
)
|
@@ -56,8 +56,9 @@ def push_tunnel_url_to_repo(tunnel_url):
|
|
56 |
|
57 |
# Clone the repository
|
58 |
repo_dir = "/tmp/repo"
|
|
|
59 |
subprocess.run(
|
60 |
-
["git", "clone",
|
61 |
check=True,
|
62 |
)
|
63 |
os.chdir(repo_dir)
|
@@ -77,8 +78,21 @@ def push_tunnel_url_to_repo(tunnel_url):
|
|
77 |
@app.route("/chat", methods=["POST"])
|
78 |
def chat():
|
79 |
data = request.json
|
80 |
-
prompt
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
return jsonify({"response": output["choices"][0]["text"].strip()})
|
83 |
|
84 |
if __name__ == "__main__":
|
@@ -88,7 +102,7 @@ if __name__ == "__main__":
|
|
88 |
# Initialize the LLM
|
89 |
llm = Llama(
|
90 |
model_path=MODEL_PATH,
|
91 |
-
n_ctx=
|
92 |
n_threads=2,
|
93 |
n_gpu_layers=0,
|
94 |
verbose=False,
|
|
|
29 |
def start_tunnel():
|
30 |
# Start nport tunnel
|
31 |
tunnel_process = subprocess.Popen(
|
32 |
+
["npx", "nport", "-s", "ai-service-new", "-p", "5000"],
|
33 |
stdout=subprocess.PIPE,
|
34 |
stderr=subprocess.PIPE,
|
35 |
)
|
|
|
56 |
|
57 |
# Clone the repository
|
58 |
repo_dir = "/tmp/repo"
|
59 |
+
repo_url = f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git"
|
60 |
subprocess.run(
|
61 |
+
["git", "clone", repo_url, repo_dir],
|
62 |
check=True,
|
63 |
)
|
64 |
os.chdir(repo_dir)
|
|
|
78 |
@app.route("/chat", methods=["POST"])
|
79 |
def chat():
|
80 |
data = request.json
|
81 |
+
# Construct the prompt without duplicate special tokens
|
82 |
+
prompt = (
|
83 |
+
f"<|begin_of_text|>"
|
84 |
+
f"<|start_header_id|>user<|end_header_id|>\n"
|
85 |
+
f"{data.get('message', '')}"
|
86 |
+
f"<|eot_id|>\n"
|
87 |
+
f"<|start_header_id|>assistant<|end_header_id|>\n"
|
88 |
+
)
|
89 |
+
output = llm(
|
90 |
+
prompt,
|
91 |
+
max_tokens=2048,
|
92 |
+
stop=["<|eot_id|>"],
|
93 |
+
temperature=0.8,
|
94 |
+
top_p=0.9,
|
95 |
+
)
|
96 |
return jsonify({"response": output["choices"][0]["text"].strip()})
|
97 |
|
98 |
if __name__ == "__main__":
|
|
|
102 |
# Initialize the LLM
|
103 |
llm = Llama(
|
104 |
model_path=MODEL_PATH,
|
105 |
+
n_ctx=131072, # Set to match the training context length
|
106 |
n_threads=2,
|
107 |
n_gpu_layers=0,
|
108 |
verbose=False,
|