Moonfanz commited on
Commit
970e6a2
·
verified ·
1 Parent(s): aa59d15

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +10 -0
  2. app.py +659 -0
  3. func.py +112 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,659 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, Response, stream_with_context
2
+ import json
3
+ import os
4
+ import re
5
+ import logging
6
+ import func
7
+ from datetime import datetime, timedelta
8
+ from apscheduler.schedulers.background import BackgroundScheduler
9
+ import time
10
+ import requests
11
+ from collections import deque
12
+ import random
13
+ from dataclasses import dataclass
14
+ from typing import Optional, Dict, Any
15
+
16
+ app = Flask(__name__)
17
+
18
+ os.environ['TZ'] = 'Asia/Shanghai'
19
+
20
+ app = Flask(__name__)
21
+
22
+ app.secret_key = os.urandom(24)
23
+
24
+ formatter = logging.Formatter('%(message)s')
25
+ logger = logging.getLogger(__name__)
26
+ logger.setLevel(logging.INFO)
27
+ handler = logging.StreamHandler()
28
+ handler.setFormatter(formatter)
29
+ logger.addHandler(handler)
30
+
31
+ MAX_RETRIES = int(os.environ.get('MaxRetries', '3').strip() or '3')
32
+ MAX_REQUESTS = int(os.environ.get('MaxRequests', '2').strip() or '2')
33
+ LIMIT_WINDOW = int(os.environ.get('LimitWindow', '60').strip() or '60')
34
+
35
+ RETRY_DELAY = 1
36
+ MAX_RETRY_DELAY = 16
37
+
38
+ request_counts = {}
39
+
40
+ api_key_blacklist = set()
41
+ api_key_blacklist_duration = 60
42
+
43
+
44
+ safety_settings = [
45
+ {
46
+ "category": "HARM_CATEGORY_HARASSMENT",
47
+ "threshold": "BLOCK_NONE"
48
+ },
49
+ {
50
+ "category": "HARM_CATEGORY_HATE_SPEECH",
51
+ "threshold": "BLOCK_NONE"
52
+ },
53
+ {
54
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
55
+ "threshold": "BLOCK_NONE"
56
+ },
57
+ {
58
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
59
+ "threshold": "BLOCK_NONE"
60
+ },
61
+ {
62
+ "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
63
+ "threshold": 'BLOCK_NONE'
64
+ }
65
+ ]
66
+ safety_settings_g2 = [
67
+ {
68
+ "category": "HARM_CATEGORY_HARASSMENT",
69
+ "threshold": "OFF"
70
+ },
71
+ {
72
+ "category": "HARM_CATEGORY_HATE_SPEECH",
73
+ "threshold": "OFF"
74
+ },
75
+ {
76
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
77
+ "threshold": "OFF"
78
+ },
79
+ {
80
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
81
+ "threshold": "OFF"
82
+ },
83
+ {
84
+ "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
85
+ "threshold": 'OFF'
86
+ }
87
+ ]
88
+ @dataclass
89
+ class GeneratedText:
90
+ text: str
91
+ finish_reason: Optional[str] = None
92
+
93
+
94
+ class ResponseWrapper:
95
+ def __init__(self, data: Dict[Any, Any]):
96
+ self._data = data
97
+ self._text = self._extract_text()
98
+ self._finish_reason = self._extract_finish_reason()
99
+ self._prompt_token_count = self._extract_prompt_token_count()
100
+ self._candidates_token_count = self._extract_candidates_token_count()
101
+ self._total_token_count = self._extract_total_token_count()
102
+ self._thoughts = self._extract_thoughts()
103
+ self._json_dumps = json.dumps(self._data, indent=4, ensure_ascii=False)
104
+
105
+ def _extract_thoughts(self) -> Optional[str]:
106
+ try:
107
+ for part in self._data['candidates'][0]['content']['parts']:
108
+ if 'thought' in part:
109
+ return part['text']
110
+ return ""
111
+ except (KeyError, IndexError):
112
+ return ""
113
+
114
+ def _extract_text(self) -> str:
115
+ try:
116
+ for part in self._data['candidates'][0]['content']['parts']:
117
+ if 'thought' not in part:
118
+ return part['text']
119
+ return ""
120
+ except (KeyError, IndexError):
121
+ return ""
122
+
123
+ def _extract_finish_reason(self) -> Optional[str]:
124
+ try:
125
+ return self._data['candidates'][0].get('finishReason')
126
+ except (KeyError, IndexError):
127
+ return None
128
+
129
+ def _extract_prompt_token_count(self) -> Optional[int]:
130
+ try:
131
+ return self._data['usageMetadata'].get('promptTokenCount')
132
+ except (KeyError):
133
+ return None
134
+
135
+ def _extract_candidates_token_count(self) -> Optional[int]:
136
+ try:
137
+ return self._data['usageMetadata'].get('candidatesTokenCount')
138
+ except (KeyError):
139
+ return None
140
+
141
+ def _extract_total_token_count(self) -> Optional[int]:
142
+ try:
143
+ return self._data['usageMetadata'].get('totalTokenCount')
144
+ except (KeyError):
145
+ return None
146
+
147
+ @property
148
+ def text(self) -> str:
149
+ return self._text
150
+
151
+ @property
152
+ def finish_reason(self) -> Optional[str]:
153
+ return self._finish_reason
154
+
155
+ @property
156
+ def prompt_token_count(self) -> Optional[int]:
157
+ return self._prompt_token_count
158
+
159
+ @property
160
+ def candidates_token_count(self) -> Optional[int]:
161
+ return self._candidates_token_count
162
+
163
+ @property
164
+ def total_token_count(self) -> Optional[int]:
165
+ return self._total_token_count
166
+
167
+ @property
168
+ def thoughts(self) -> Optional[str]:
169
+ return self._thoughts
170
+
171
+ @property
172
+ def json_dumps(self) -> str:
173
+ return self._json_dumps
174
+
175
+ class APIKeyManager:
176
+ def __init__(self):
177
+ self.api_keys = re.findall(r"AIzaSy[a-zA-Z0-9_-]{33}", os.environ.get('KeyArray'))
178
+ self.current_index = random.randint(0, len(self.api_keys) - 1)
179
+
180
+ def get_available_key(self):
181
+ num_keys = len(self.api_keys)
182
+ for _ in range(num_keys):
183
+ if self.current_index >= num_keys:
184
+ self.current_index = 0
185
+ current_key = self.api_keys[self.current_index]
186
+ self.current_index += 1
187
+
188
+ if current_key not in api_key_blacklist:
189
+ return current_key
190
+
191
+ logger.error("所有API key都已耗尽或被暂时禁用,请重新配置或稍后重试")
192
+ return None
193
+
194
+ def show_all_keys(self):
195
+ logger.info(f"当前可用API key个数: {len(self.api_keys)} ")
196
+ for i, api_key in enumerate(self.api_keys):
197
+ logger.info(f"API Key{i}: {api_key[:8]}...{api_key[-3:]}")
198
+
199
+ def blacklist_key(self, key):
200
+ logger.warning(f"{key[:8]} → 暂时禁用 {api_key_blacklist_duration} 秒")
201
+ api_key_blacklist.add(key)
202
+
203
+ scheduler.add_job(lambda: api_key_blacklist.discard(key), 'date', run_date=datetime.now() + timedelta(seconds=api_key_blacklist_duration))
204
+
205
+ key_manager = APIKeyManager()
206
+ key_manager.show_all_keys()
207
+ current_api_key = key_manager.get_available_key()
208
+
209
+ def switch_api_key():
210
+ global current_api_key
211
+ key = key_manager.get_available_key()
212
+ if key:
213
+ current_api_key = key
214
+ logger.info(f"API key 替换为 → {current_api_key[:8]}...{current_api_key[-3:]}")
215
+ else:
216
+ logger.error("API key 替换失败,所有API key都已耗尽或被暂时禁用,请重新配置或稍后重试")
217
+
218
+ logger.info(f"当前 API key: {current_api_key[:8]}...{current_api_key[-3:]}")
219
+
220
+ GEMINI_MODELS = [
221
+ {"id": "text-embedding-004"},
222
+ {"id": "gemini-1.5-flash-8b-latest"},
223
+ {"id": "gemini-1.5-flash-8b-exp-0924"},
224
+ {"id": "gemini-1.5-flash-latest"},
225
+ {"id": "gemini-1.5-flash-exp-0827"},
226
+ {"id": "gemini-1.5-pro-latest"},
227
+ {"id": "gemini-1.5-pro-exp-0827"},
228
+ {"id": "learnlm-1.5-pro-experimental"},
229
+ {"id": "gemini-exp-1114"},
230
+ {"id": "gemini-exp-1121"},
231
+ {"id": "gemini-exp-1206"},
232
+ {"id": "gemini-2.0-flash-exp"},
233
+ {"id": "gemini-2.0-flash-thinking-exp-1219"},
234
+ {"id": "gemini-2.0-flash-thinking-exp-01-21"},
235
+ {"id": "gemini-2.0-pro-exp-02-05"}
236
+ ]
237
+
238
+
239
+ def is_within_rate_limit(api_key):
240
+ now = datetime.now()
241
+ if api_key not in request_counts:
242
+ request_counts[api_key] = deque()
243
+
244
+ while request_counts[api_key] and request_counts[api_key][0] < now - timedelta(seconds=LIMIT_WINDOW):
245
+ request_counts[api_key].popleft()
246
+
247
+ if len(request_counts[api_key]) >= MAX_REQUESTS:
248
+ earliest_request_time = request_counts[api_key][0]
249
+ wait_time = (earliest_request_time + timedelta(seconds=LIMIT_WINDOW)) - now
250
+ return False, wait_time.total_seconds()
251
+ else:
252
+ return True, 0
253
+
254
+ def increment_request_count(api_key):
255
+ now = datetime.now()
256
+ if api_key not in request_counts:
257
+ request_counts[api_key] = deque()
258
+ request_counts[api_key].append(now)
259
+
260
+ def handle_api_error(error, attempt, current_api_key):
261
+ if attempt > MAX_RETRIES:
262
+ logger.error(f"{MAX_RETRIES} 次尝试后仍然失败,请修改预设或输入")
263
+ return 0, jsonify({
264
+ 'error': {
265
+ 'message': f"{MAX_RETRIES} 次尝试后仍然失败,请修改预设或输入",
266
+ 'type': 'max_retries_exceeded'
267
+ }
268
+ })
269
+
270
+ if isinstance(error, requests.exceptions.HTTPError):
271
+ status_code = error.response.status_code
272
+
273
+ if status_code == 400:
274
+
275
+ try:
276
+ error_data = error.response.json()
277
+ if 'error' in error_data:
278
+ if error_data['error'].get('code') == "invalid_argument":
279
+ logger.error(f"{current_api_key[:8]} ... {current_api_key[-3:]} → 无效,可能已过期或被删除")
280
+ key_manager.blacklist_key(current_api_key)
281
+ switch_api_key()
282
+ return 0, None
283
+ error_message = error_data['error'].get('message', 'Bad Request')
284
+ error_type = error_data['error'].get('type', 'invalid_request_error')
285
+ logger.warning(f"400 错误请求: {error_message}")
286
+ return 2, jsonify({'error': {'message': error_message, 'type': error_type}})
287
+ except ValueError:
288
+ logger.warning("400 错误请求:响应不是有效的JSON格式")
289
+ return 2, jsonify({'error': {'message': '', 'type': 'invalid_request_error'}})
290
+
291
+ elif status_code == 429:
292
+ logger.warning(
293
+ f"{current_api_key[:8]} ... {current_api_key[-3:]} → 429 官方资源耗尽 → 立即重试..."
294
+ )
295
+ key_manager.blacklist_key(current_api_key)
296
+ switch_api_key()
297
+ return 0, None
298
+
299
+ elif status_code == 403:
300
+ logger.error(
301
+ f"{current_api_key[:8]} ... {current_api_key[-3:]} → 403 权限被拒绝,该 API KEY 可能已经被官方封禁"
302
+ )
303
+ key_manager.blacklist_key(current_api_key)
304
+ switch_api_key()
305
+ return 0, None
306
+
307
+ elif status_code == 500:
308
+ logger.warning(
309
+ f"{current_api_key[:8]} ... {current_api_key[-3:]} → 500 服务器内部错误 → 立即重试..."
310
+ )
311
+ switch_api_key()
312
+ return 0, None
313
+
314
+ elif status_code == 503:
315
+ logger.warning(
316
+ f"{current_api_key[:8]} ... {current_api_key[-3:]} → 503 服务不可用 → 立即重试..."
317
+ )
318
+ switch_api_key()
319
+ return 0, None
320
+
321
+ else:
322
+ logger.warning(
323
+ f"{current_api_key[:8]} ... {current_api_key[-3:]} → {status_code} 未知错误/模型不可用 → 不重试..."
324
+ )
325
+ switch_api_key()
326
+ return 2, None
327
+
328
+ elif isinstance(error, requests.exceptions.ConnectionError):
329
+ delay = min(RETRY_DELAY * (2 ** attempt), MAX_RETRY_DELAY)
330
+ logger.warning(f"连接错误 → 立即重试...")
331
+ time.sleep(delay)
332
+ return 0, None
333
+
334
+ elif isinstance(error, requests.exceptions.Timeout):
335
+ delay = min(RETRY_DELAY * (2 ** attempt), MAX_RETRY_DELAY)
336
+ logger.warning(f"请求超时 → 立即重试...")
337
+ time.sleep(delay)
338
+ return 0, None
339
+
340
+ else:
341
+ logger.error(f"发生未知错误: {error}")
342
+ return 0, jsonify({
343
+ 'error': {
344
+ 'message': f"发生未知错误: {error}",
345
+ 'type': 'unknown_error'
346
+ }
347
+ })
348
+
349
+ @app.route('/hf/v1/chat/completions', methods=['POST'])
350
+ def chat_completions():
351
+ is_authenticated, auth_error, status_code = func.authenticate_request(request)
352
+ if not is_authenticated:
353
+ return auth_error if auth_error else jsonify({'error': '未授权'}), status_code if status_code else 401
354
+
355
+ request_data = request.get_json()
356
+ messages = request_data.get('messages', [])
357
+ model = request_data.get('model', 'gemini-2.0-flash-exp')
358
+ temperature = request_data.get('temperature', 1)
359
+ max_tokens = request_data.get('max_tokens', 8192)
360
+ show_thoughts = request_data.get('show_thoughts', False)
361
+ stream = request_data.get('stream', False)
362
+ use_system_prompt = request_data.get('use_system_prompt', False)
363
+ hint = "流式" if stream else "非流"
364
+ logger.info(f"\n{model} [{hint}] → {current_api_key[:8]}...{current_api_key[-3:]}")
365
+ is_thinking = 'thinking' in model
366
+ api_version = 'v1alpha' if is_thinking else 'v1beta'
367
+ response_type = 'streamGenerateContent' if stream else 'generateContent'
368
+ is_SSE = '&alt=sse' if stream else ''
369
+
370
+ contents, system_instruction, error_response = func.process_messages_for_gemini(messages, use_system_prompt)
371
+
372
+ if error_response:
373
+ logger.error(f"处理输入消息时出错↙\n {error_response}")
374
+ return jsonify(error_response), 400
375
+
376
+ def do_request(current_api_key, attempt):
377
+ isok, time_remaining = is_within_rate_limit(current_api_key)
378
+ if not isok:
379
+ logger.warning(f"暂时超过限额,该API key将在 {time_remaining} 秒后启用...")
380
+ switch_api_key()
381
+ return 0, None
382
+
383
+ increment_request_count(current_api_key)
384
+
385
+
386
+ url = f"https://generativelanguage.googleapis.com/{api_version}/models/{model}:{response_type}?key={current_api_key}{is_SSE}"
387
+ headers = {
388
+ "Content-Type": "application/json",
389
+ }
390
+
391
+ data = {
392
+ "contents": contents,
393
+ "generationConfig": {
394
+ "temperature": temperature,
395
+ "maxOutputTokens": max_tokens,
396
+ },
397
+ "safetySettings": safety_settings_g2 if 'gemini-2.0-flash-exp' in model else safety_settings,
398
+ }
399
+ if system_instruction:
400
+ data["system_instruction"] = system_instruction
401
+
402
+ try:
403
+ response = requests.post(url, headers=headers, json=data, stream=True)
404
+ response.raise_for_status()
405
+
406
+ if stream:
407
+ return 1, response
408
+ else:
409
+ return 1, ResponseWrapper(response.json())
410
+ except requests.exceptions.RequestException as e:
411
+ return handle_api_error(e, attempt, current_api_key)
412
+
413
+ def generate_stream(response):
414
+ logger.info(f"流式开始 →")
415
+ buffer = b""
416
+ try:
417
+ for line in response.iter_lines():
418
+ if not line:
419
+ continue
420
+ try:
421
+ if line.startswith(b'data: '):
422
+ line = line[6:]
423
+
424
+ buffer += line
425
+
426
+ try:
427
+ data = json.loads(buffer.decode('utf-8'))
428
+ buffer = b""
429
+ if 'candidates' in data and data['candidates']:
430
+ candidate = data['candidates'][0]
431
+ if 'content' in candidate:
432
+ content = candidate['content']
433
+ if 'parts' in content and content['parts']:
434
+ parts = content['parts']
435
+ if is_thinking and not show_thoughts:
436
+ parts = [part for part in parts if not part.get('thought')]
437
+ if parts:
438
+ text = parts[0].get('text', '')
439
+ finish_reason = candidate.get('finishReason')
440
+
441
+ if text:
442
+ data = {
443
+ 'choices': [{
444
+ 'delta': {
445
+ 'content': text
446
+ },
447
+ 'finish_reason': finish_reason,
448
+ 'index': 0
449
+ }],
450
+ 'object': 'chat.completion.chunk'
451
+ }
452
+ yield f"data: {json.dumps(data)}\n\n"
453
+
454
+ if candidate.get("finishReason") and candidate.get("finishReason") != "STOP":
455
+ error_message = {
456
+ "error": {
457
+ "code": "content_filter",
458
+ "message": f"模型的响应因违反内容政策而被标记:{candidate.get('finishReason')}",
459
+ "status": candidate.get("finishReason"),
460
+ "details": []
461
+ }
462
+ }
463
+ logger.warning(f"模型的响应因违反内容政策而被标记: {candidate.get('finishReason')}")
464
+ yield f"data: {json.dumps(error_message)}\n\n"
465
+ break
466
+
467
+ if 'safetyRatings' in candidate:
468
+ for rating in candidate['safetyRatings']:
469
+ if rating['probability'] == 'HIGH':
470
+ error_message = {
471
+ "error": {
472
+ "code": "content_filter",
473
+ "message": f"模型的响应因高概率被标记为 {rating['category']}",
474
+ "status": "SAFETY_RATING_HIGH",
475
+ "details": [rating]
476
+ }
477
+ }
478
+ logger.warning(f"模型的响应因高概率被标记为 {rating['category']}")
479
+ yield f"data: {json.dumps(error_message)}\n\n"
480
+ break
481
+ else:
482
+ continue
483
+ break
484
+
485
+ except json.JSONDecodeError:
486
+ logger.debug(f"JSON解析错误, 当前缓冲区内容: {buffer}")
487
+ continue
488
+
489
+ except Exception as e:
490
+ logger.error(f"流式处理期间发生错误: {e}, 原始数据行↙\n{line}")
491
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
492
+
493
+ else:
494
+ yield f"data: {json.dumps({'choices': [{'delta': {}, 'finish_reason': 'stop', 'index': 0}]})}\n\n"
495
+ logger.info(f"流式结束 ←")
496
+ logger.info(f"200!")
497
+ except Exception as e:
498
+ logger.error(f"流式处理错误↙\n{e}")
499
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
500
+
501
+ attempt = 0
502
+ success = 0
503
+ response = None
504
+ for attempt in range(1, MAX_RETRIES + 1):
505
+ logger.info(f"第 {attempt}/{MAX_RETRIES} 次尝试 ...")
506
+ success, response = do_request(current_api_key, attempt)
507
+
508
+ if success == 0:
509
+ continue
510
+ elif success == 1 and response is None:
511
+ continue
512
+ elif success == 1 and stream:
513
+ return Response(
514
+ stream_with_context(generate_stream(response)),
515
+ mimetype='text/event-stream'
516
+ )
517
+ elif success == 1 and isinstance(response, ResponseWrapper):
518
+ try:
519
+ text_content = response.text
520
+ prompt_tokens = response.prompt_token_count
521
+ completion_tokens = response.candidates_token_count
522
+ total_tokens = response.total_token_count
523
+ finish_reason = response.finish_reason
524
+
525
+ if text_content == '':
526
+ error_message = None
527
+ if response._data and 'error' in response._data:
528
+ error_message = response._data['error'].get('message')
529
+ if error_message:
530
+ logger.error(f"生成内容失败,API 返回错误: {error_message}")
531
+ else:
532
+ logger.error(f"生成内容失败: text_content 为空")
533
+ continue
534
+
535
+ if is_thinking and show_thoughts:
536
+ text_content = response.thoughts + '\n' + text_content
537
+
538
+ except AttributeError as e:
539
+ logger.error(f"处理响应失败,缺少必要的属性: {e}")
540
+ logger.error(f"原始响应: {response._data}")
541
+ continue
542
+
543
+ except Exception as e:
544
+ logger.error(f"处理响应失败: {e}")
545
+ continue
546
+
547
+ response_data = {
548
+ 'id': 'chatcmpl-xxxxxxxxxxxx',
549
+ 'object': 'chat.completion',
550
+ 'created': int(datetime.now().timestamp()),
551
+ 'model': model,
552
+ 'choices': [{
553
+ 'index': 0,
554
+ 'message': {
555
+ 'role': 'assistant',
556
+ 'content': text_content
557
+ },
558
+ 'finish_reason': finish_reason
559
+ }],
560
+ 'usage': {
561
+ 'prompt_tokens': prompt_tokens,
562
+ 'completion_tokens': completion_tokens,
563
+ 'total_tokens': total_tokens
564
+ }
565
+ }
566
+ logger.info(f"200!")
567
+ return jsonify(response_data)
568
+ elif success == 1 and isinstance(response, tuple):
569
+ return response[1], response[0]
570
+ elif success == 2:
571
+ logger.error(f"{model} 可能暂时不可用,请更换模型或未来一段时间再试")
572
+ response = {
573
+ 'error': {
574
+ 'message': f'{model} 可能暂时不可用,请更换模型或未来一段时间再试',
575
+ 'type': 'internal_server_error'
576
+ }
577
+ }
578
+ return jsonify(response), 503
579
+ else:
580
+ logger.error(f"{MAX_RETRIES} 次尝试均失败,请重试或等待官方恢复")
581
+ response = {
582
+ 'error': {
583
+ 'message': f'{MAX_RETRIES} 次尝试均失败,请重试或等待官方恢复',
584
+ 'type': 'internal_server_error'
585
+ }
586
+ }
587
+ return jsonify(response), 500 if response is not None else 503
588
+
589
+ @app.route('/hf/v1/models', methods=['GET'])
590
+ def list_models():
591
+ response = {"object": "list", "data": GEMINI_MODELS}
592
+ return jsonify(response)
593
+
594
+ @app.route('/hf/v1/embeddings', methods=['POST'])
595
+ def embeddings():
596
+ data = request.get_json()
597
+ model_input = data.get("input")
598
+ model = data.get("model", "text-embedding-004")
599
+ if not model_input:
600
+ return jsonify({"error": "没有提供输入"}), 400
601
+
602
+ if isinstance(model_input, str):
603
+ model_input = [model_input]
604
+
605
+ gemini_request = {
606
+ "model": f"models/{model}",
607
+ "content": {
608
+ "parts": [{"text": text} for text in model_input]
609
+ }
610
+ }
611
+
612
+ gemini_url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:embedContent?key={current_api_key}"
613
+ headers = {"Content-Type": "application/json"}
614
+ try:
615
+ gemini_response = requests.post(gemini_url, json=gemini_request, headers=headers)
616
+ gemini_response.raise_for_status()
617
+
618
+ response_json = gemini_response.json()
619
+ embeddings_data = []
620
+ if 'embedding' in response_json:
621
+ embeddings_data.append({
622
+ "object": "embedding",
623
+ "embedding": response_json['embedding']['values'],
624
+ "index": 0,
625
+ })
626
+ elif 'embeddings' in response_json:
627
+ for i, embedding in enumerate(response_json['embeddings']):
628
+ embeddings_data.append({
629
+ "object": "embedding",
630
+ "embedding": embedding['values'],
631
+ "index": i,
632
+ })
633
+
634
+ client_response = {
635
+ "object": "list",
636
+ "data": embeddings_data,
637
+ "model": model,
638
+ "usage": {
639
+ "prompt_tokens": 0,
640
+ "total_tokens": 0,
641
+ },
642
+ }
643
+ switch_api_key()
644
+ return jsonify(client_response)
645
+
646
+ except requests.exceptions.RequestException as e:
647
+ print(f"请求Embeddings失败↙\: {e}")
648
+ return jsonify({"error": str(e)}), 500
649
+
650
+ if __name__ == '__main__':
651
+ scheduler = BackgroundScheduler()
652
+
653
+ scheduler.start()
654
+ logger.info(f"Reminiproxy v2.3.5 启动")
655
+ logger.info(f"最大尝试次数/MaxRetries: {MAX_RETRIES}")
656
+ logger.info(f"最大请求次数/MaxRequests: {MAX_REQUESTS}")
657
+ logger.info(f"请求限额窗口/LimitWindow: {LIMIT_WINDOW} 秒")
658
+
659
+ app.run(debug=True, host='0.0.0.0', port=7860)
func.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import jsonify
2
+ import logging
3
+ import os
4
+ logger = logging.getLogger(__name__)
5
+
6
+ request_counts = {}
7
+
8
+ password = os.environ['password']
9
+
10
+ def authenticate_request(request):
11
+ auth_header = request.headers.get('Authorization')
12
+
13
+ if not auth_header:
14
+ return False, jsonify({'error': '缺少Authorization请求头'}), 401
15
+
16
+ try:
17
+ auth_type, pass_word = auth_header.split(' ', 1)
18
+ except ValueError:
19
+ return False, jsonify({'error': 'Authorization请求头格式错误'}), 401
20
+
21
+ if auth_type.lower() != 'bearer':
22
+ return False, jsonify({'error': 'Authorization类型必须为Bearer'}), 401
23
+
24
+ if pass_word != password:
25
+ return False, jsonify({'error': '未授权'}), 401
26
+
27
+ return True, None, None
28
+
29
+ def process_messages_for_gemini(messages, use_system_prompt=False):
30
+ gemini_history = []
31
+ errors = []
32
+ system_instruction_text = ""
33
+ is_system_phase = use_system_prompt
34
+ for i, message in enumerate(messages):
35
+ role = message.get('role')
36
+ content = message.get('content')
37
+
38
+ if isinstance(content, str):
39
+ if is_system_phase and role == 'system':
40
+ if system_instruction_text:
41
+ system_instruction_text += "\n" + content
42
+ else:
43
+ system_instruction_text = content
44
+ else:
45
+ is_system_phase = False
46
+
47
+ if role in ['user', 'system']:
48
+ role_to_use = 'user'
49
+ elif role == 'assistant':
50
+ role_to_use = 'model'
51
+ else:
52
+ errors.append(f"Invalid role: {role}")
53
+ continue
54
+
55
+ if gemini_history and gemini_history[-1]['role'] == role_to_use:
56
+ gemini_history[-1]['parts'].append({"text": content})
57
+ else:
58
+ gemini_history.append({"role": role_to_use, "parts": [{"text": content}]})
59
+
60
+ elif isinstance(content, list):
61
+ parts = []
62
+ for item in content:
63
+ if item.get('type') == 'text':
64
+ parts.append({"text": item.get('text')})
65
+ elif item.get('type') == 'image_url':
66
+ image_data = item.get('image_url', {}).get('url', '')
67
+ if image_data.startswith('data:image/'):
68
+ try:
69
+ mime_type, base64_data = image_data.split(';')[0].split(':')[1], image_data.split(',')[1]
70
+ parts.append({
71
+ "inline_data": {
72
+ "mime_type": mime_type,
73
+ "data": base64_data
74
+ }
75
+ })
76
+ except (IndexError, ValueError):
77
+ errors.append(f"Invalid data URI for image: {image_data}")
78
+ else:
79
+ errors.append(f"Invalid image URL format for item: {item}")
80
+ elif item.get('type') == 'file_url':
81
+ file_data = item.get('file_url', {}).get('url', '')
82
+ if file_data.startswith('data:'):
83
+ try:
84
+ mime_type, base64_data = file_data.split(';')[0].split(':')[1], file_data.split(',')[1]
85
+ parts.append({
86
+ "inline_data": {
87
+ "mime_type": mime_type,
88
+ "data": base64_data
89
+ }
90
+ })
91
+ except (IndexError, ValueError):
92
+ errors.append(f"Invalid data URI for file: {file_data}")
93
+ else:
94
+ errors.append(f"Invalid file URL format for item: {item}")
95
+
96
+ if parts:
97
+ if role in ['user', 'system']:
98
+ role_to_use = 'user'
99
+ elif role == 'assistant':
100
+ role_to_use = 'model'
101
+ else:
102
+ errors.append(f"Invalid role: {role}")
103
+ continue
104
+ if gemini_history and gemini_history[-1]['role'] == role_to_use:
105
+ gemini_history[-1]['parts'].extend(parts)
106
+ else:
107
+ gemini_history.append({"role": role_to_use, "parts": parts})
108
+
109
+ if errors:
110
+ return gemini_history, {"parts": [{"text": system_instruction_text}]}, (jsonify({'error': errors}), 400)
111
+ else:
112
+ return gemini_history, {"parts": [{"text": system_instruction_text}]}, None
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Flask==2.0.3
2
+ Flask-CORS==3.0.10
3
+ requests==2.26.0
4
+ Werkzeug==2.0.3
5
+ pillow==10.4.0
6
+ APScheduler==3.11.0