NitinBot001 commited on
Commit
362683e
·
verified ·
1 Parent(s): b0629f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -5
app.py CHANGED
@@ -29,7 +29,7 @@ def download_model():
29
  def start_tunnel():
30
  # Start nport tunnel
31
  tunnel_process = subprocess.Popen(
32
- ["npx", "nport", "-s", "ai-service", "-p", "5000"],
33
  stdout=subprocess.PIPE,
34
  stderr=subprocess.PIPE,
35
  )
@@ -56,8 +56,9 @@ def push_tunnel_url_to_repo(tunnel_url):
56
 
57
  # Clone the repository
58
  repo_dir = "/tmp/repo"
 
59
  subprocess.run(
60
- ["git", "clone", f"https://x-access-token:{GH_PAT}@{REPO_URL.split('https://')[1]}", repo_dir],
61
  check=True,
62
  )
63
  os.chdir(repo_dir)
@@ -77,8 +78,21 @@ def push_tunnel_url_to_repo(tunnel_url):
77
  @app.route("/chat", methods=["POST"])
78
  def chat():
79
  data = request.json
80
- prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{data.get('message','')}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
81
- output = llm(prompt, max_tokens=2048, stop=["<|eot_id|>"], temperature=0.8, top_p=0.9)
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  return jsonify({"response": output["choices"][0]["text"].strip()})
83
 
84
  if __name__ == "__main__":
@@ -88,7 +102,7 @@ if __name__ == "__main__":
88
  # Initialize the LLM
89
  llm = Llama(
90
  model_path=MODEL_PATH,
91
- n_ctx=8192,
92
  n_threads=2,
93
  n_gpu_layers=0,
94
  verbose=False,
 
29
  def start_tunnel():
30
  # Start nport tunnel
31
  tunnel_process = subprocess.Popen(
32
+ ["npx", "nport", "-s", "ai-service-new", "-p", "5000"],
33
  stdout=subprocess.PIPE,
34
  stderr=subprocess.PIPE,
35
  )
 
56
 
57
  # Clone the repository
58
  repo_dir = "/tmp/repo"
59
+ repo_url = f"https://x-access-token:{GH_PAT}@github.com/NitinBot001/Audio-url-new-js.git"
60
  subprocess.run(
61
+ ["git", "clone", repo_url, repo_dir],
62
  check=True,
63
  )
64
  os.chdir(repo_dir)
 
78
  @app.route("/chat", methods=["POST"])
79
  def chat():
80
  data = request.json
81
+ # Construct the prompt without duplicate special tokens
82
+ prompt = (
83
+ f"<|begin_of_text|>"
84
+ f"<|start_header_id|>user<|end_header_id|>\n"
85
+ f"{data.get('message', '')}"
86
+ f"<|eot_id|>\n"
87
+ f"<|start_header_id|>assistant<|end_header_id|>\n"
88
+ )
89
+ output = llm(
90
+ prompt,
91
+ max_tokens=2048,
92
+ stop=["<|eot_id|>"],
93
+ temperature=0.8,
94
+ top_p=0.9,
95
+ )
96
  return jsonify({"response": output["choices"][0]["text"].strip()})
97
 
98
  if __name__ == "__main__":
 
102
  # Initialize the LLM
103
  llm = Llama(
104
  model_path=MODEL_PATH,
105
+ n_ctx=131072, # Set to match the training context length
106
  n_threads=2,
107
  n_gpu_layers=0,
108
  verbose=False,