Shreyas094 commited on
Commit
caecf96
·
verified ·
1 Parent(s): cf58eb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -23
app.py CHANGED
@@ -16,17 +16,21 @@ from huggingface_hub import InferenceClient
16
  import inspect
17
  import logging
18
 
 
19
  # Set up basic configuration for logging
20
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
21
 
22
  # Environment variables and configurations
23
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
24
  llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
 
 
25
 
26
  MODELS = [
27
  "mistralai/Mistral-7B-Instruct-v0.3",
28
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
29
- "meta-llama/Meta-Llama-3.1-8B-Instruct"
 
30
  ]
31
 
32
  # Initialize LlamaParse
@@ -81,33 +85,81 @@ def update_vectors(files, parser):
81
 
82
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
83
 
 
 
 
 
 
 
 
 
84
  def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, temperature=0.2, should_stop=False):
85
  print(f"Starting generate_chunked_response with {num_calls} calls")
86
- client = InferenceClient(model, token=huggingface_token)
87
  full_response = ""
88
  messages = [{"role": "user", "content": prompt}]
89
 
90
- for i in range(num_calls):
91
- print(f"Starting API call {i+1}")
92
- if should_stop:
93
- print("Stop clicked, breaking loop")
94
- break
95
- try:
96
- for message in client.chat_completion(
97
- messages=messages,
98
- max_tokens=max_tokens,
99
- temperature=temperature,
100
- stream=True,
101
- ):
102
- if should_stop:
103
- print("Stop clicked during streaming, breaking")
104
- break
105
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
106
- chunk = message.choices[0].delta.content
107
- full_response += chunk
108
- print(f"API call {i+1} completed")
109
- except Exception as e:
110
- print(f"Error in generating response: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  # Clean up the response
113
  clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)
 
16
  import inspect
17
  import logging
18
 
19
+
20
  # Set up basic configuration for logging
21
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22
 
23
  # Environment variables and configurations
24
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
25
  llama_cloud_api_key = os.environ.get("LLAMA_CLOUD_API_KEY")
26
+ ACCOUNT_ID = os.environ.get("CLOUDFARE_ACCOUNT_ID")
27
+ AUTH_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")
28
 
29
  MODELS = [
30
  "mistralai/Mistral-7B-Instruct-v0.3",
31
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
32
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
33
+ "@cf/meta/llama-3.1-8b-instruct"
34
  ]
35
 
36
  # Initialize LlamaParse
 
85
 
86
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
87
 
88
+ import os
89
+ import json
90
+ import requests
91
+ from huggingface_hub import InferenceClient
92
+
93
+ ACCOUNT_ID = "your-account-id" # Replace with your actual Cloudflare account ID
94
+ AUTH_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")
95
+
96
  def generate_chunked_response(prompt, model, max_tokens=1000, num_calls=3, temperature=0.2, should_stop=False):
97
  print(f"Starting generate_chunked_response with {num_calls} calls")
 
98
  full_response = ""
99
  messages = [{"role": "user", "content": prompt}]
100
 
101
+ if model == "@cf/meta/llama-3.1-8b-instruct":
102
+ # Cloudflare API
103
+ for i in range(num_calls):
104
+ print(f"Starting Cloudflare API call {i+1}")
105
+ if should_stop:
106
+ print("Stop clicked, breaking loop")
107
+ break
108
+ try:
109
+ response = requests.post(
110
+ f"https://api.cloudflare.com/client/v4/accounts/{ACCOUNT_ID}/ai/run/@cf/meta/llama-3.1-8b-instruct",
111
+ headers={"Authorization": f"Bearer {AUTH_TOKEN}"},
112
+ json={
113
+ "stream": True,
114
+ "messages": [
115
+ {"role": "system", "content": "You are a friendly assistant"},
116
+ {"role": "user", "content": prompt}
117
+ ],
118
+ "max_tokens": max_tokens,
119
+ "temperature": temperature
120
+ },
121
+ stream=True
122
+ )
123
+
124
+ for line in response.iter_lines():
125
+ if should_stop:
126
+ print("Stop clicked during streaming, breaking")
127
+ break
128
+ if line:
129
+ try:
130
+ json_data = json.loads(line.decode('utf-8').split('data: ')[1])
131
+ chunk = json_data['response']
132
+ full_response += chunk
133
+ except json.JSONDecodeError:
134
+ continue
135
+ print(f"Cloudflare API call {i+1} completed")
136
+ except Exception as e:
137
+ print(f"Error in generating response from Cloudflare: {str(e)}")
138
+ else:
139
+ # Original Hugging Face API logic
140
+ client = InferenceClient(model, token=huggingface_token)
141
+
142
+ for i in range(num_calls):
143
+ print(f"Starting Hugging Face API call {i+1}")
144
+ if should_stop:
145
+ print("Stop clicked, breaking loop")
146
+ break
147
+ try:
148
+ for message in client.chat_completion(
149
+ messages=messages,
150
+ max_tokens=max_tokens,
151
+ temperature=temperature,
152
+ stream=True,
153
+ ):
154
+ if should_stop:
155
+ print("Stop clicked during streaming, breaking")
156
+ break
157
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
158
+ chunk = message.choices[0].delta.content
159
+ full_response += chunk
160
+ print(f"Hugging Face API call {i+1} completed")
161
+ except Exception as e:
162
+ print(f"Error in generating response from Hugging Face: {str(e)}")
163
 
164
  # Clean up the response
165
  clean_response = re.sub(r'<s>\[INST\].*?\[/INST\]\s*', '', full_response, flags=re.DOTALL)