Delete jetbrains.py
Browse files- jetbrains.py +0 -258
jetbrains.py
DELETED
@@ -1,258 +0,0 @@
|
|
1 |
-
from typing import Optional, Dict, Any, Tuple
|
2 |
-
import json
|
3 |
-
import time
|
4 |
-
import httpx
|
5 |
-
from dataclasses import dataclass
|
6 |
-
from DrissionPage import Chromium
|
7 |
-
from DrissionPage._configs.chromium_options import ChromiumOptions
|
8 |
-
from DrissionPage._pages.mix_tab import MixTab
|
9 |
-
import os
|
10 |
-
|
11 |
-
@dataclass
|
12 |
-
class CaptchaConfig:
|
13 |
-
"""验证码解决配置"""
|
14 |
-
speech2text_api: str = os.environ.get("SPEECH2TEXT_API", "")
|
15 |
-
auth_key: str = os.environ.get("AUTH_KEY", "")
|
16 |
-
# auth_key: str = "your_auth_key_here" # 替换为你的API密钥
|
17 |
-
max_retries: int = 3
|
18 |
-
retry_delay: float = 1.0
|
19 |
-
page_load_timeout: float = 20.0
|
20 |
-
element_timeout: float = 5.0
|
21 |
-
|
22 |
-
|
23 |
-
class CaptchaSolver:
|
24 |
-
def __init__(self, email: str, firstname, lastname, is_teacher: bool = False, config: Optional[CaptchaConfig] = None, proxy = None):
|
25 |
-
self.email = email
|
26 |
-
self.firstname = firstname
|
27 |
-
self.lastname = lastname
|
28 |
-
self.is_teacher = is_teacher
|
29 |
-
self.config = config or CaptchaConfig()
|
30 |
-
self.tab: Optional[MixTab] = None
|
31 |
-
self.proxy = proxy
|
32 |
-
|
33 |
-
def __enter__(self):
|
34 |
-
self.tab = self._init_browser()
|
35 |
-
return self
|
36 |
-
|
37 |
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
38 |
-
# return False # 允许异常传播
|
39 |
-
if self.tab:
|
40 |
-
self.tab.clear_cache()
|
41 |
-
self.tab.close()
|
42 |
-
self.tab.browser.quit()
|
43 |
-
|
44 |
-
def _init_browser(self) -> MixTab:
|
45 |
-
"""初始化浏览器并返回标签页"""
|
46 |
-
co = ChromiumOptions().set_paths(browser_path=r".\Chrome\chrome.exe")
|
47 |
-
co.incognito(on_off=True) # 无痕隐身模式打开的话。不会记住你的网站账号密码
|
48 |
-
# co.set_local_port("1236")
|
49 |
-
co.auto_port(on_off=True)
|
50 |
-
|
51 |
-
# co.headless(False)
|
52 |
-
co.headless(True)
|
53 |
-
co.set_user_agent(
|
54 |
-
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTM, like Gecko) chrome/124.0.0.8 safari/537.36')
|
55 |
-
# 阻止“自动保存密码”的提示气泡
|
56 |
-
co.set_pref('credentials_enable_service', False)
|
57 |
-
# 阻止“要恢复页面吗?Chrome未正确关闭”的提示气泡
|
58 |
-
co.set_argument('--hide-crash-restore-bubble')
|
59 |
-
co.set_proxy(proxy=self.proxy)
|
60 |
-
browser = Chromium(co)
|
61 |
-
browser.clear_cache()
|
62 |
-
tab = browser.new_tab()
|
63 |
-
tab.set.timeouts(page_load=self.config.page_load_timeout)
|
64 |
-
return tab
|
65 |
-
def _colese_browser(self):
|
66 |
-
"""关闭浏览器并清理资源"""
|
67 |
-
if self.tab:
|
68 |
-
self.tab.clear_cache()
|
69 |
-
self.tab.close()
|
70 |
-
self.tab.browser.quit()
|
71 |
-
def _wait_for_element(self, selector: str, timeout: Optional[float] = None) -> Any:
|
72 |
-
"""等待元素出现"""
|
73 |
-
timeout = timeout or self.config.element_timeout
|
74 |
-
return self.tab.ele(selector, timeout=timeout)
|
75 |
-
|
76 |
-
def get_audio_data(self) -> Tuple[Optional[str], bool]:
|
77 |
-
"""
|
78 |
-
获取音频验证码数据
|
79 |
-
返回: (audio_data, needs_captcha)
|
80 |
-
"""
|
81 |
-
try:
|
82 |
-
self.tab.get('https://www.jetbrains.com/shop/eform/students')
|
83 |
-
|
84 |
-
# 检查是否需要验证码
|
85 |
-
if "Human Verification" not in self.tab.title:
|
86 |
-
print("页面不需要人机验证")
|
87 |
-
return None, False
|
88 |
-
|
89 |
-
try:
|
90 |
-
# time.sleep(2) # 等待页面加载
|
91 |
-
self.tab.wait.eles_loaded('#amzn-captcha-verify-button')
|
92 |
-
verify_btn = self._wait_for_element('#amzn-captcha-verify-button')
|
93 |
-
verify_btn.click(by_js=True)
|
94 |
-
except Exception:
|
95 |
-
# 如果找不到验证按钮,可能已经通过验证
|
96 |
-
if self._wait_for_element('@name=email', timeout=2):
|
97 |
-
print("已经不需要人机验证")
|
98 |
-
return None, False
|
99 |
-
raise
|
100 |
-
|
101 |
-
# 设置监听并点击音频按钮
|
102 |
-
self.tab.listen.start('problem?kind=audio')
|
103 |
-
audio_btn = self._wait_for_element('#amzn-btn-audio-internal')
|
104 |
-
audio_btn.click(by_js=True)
|
105 |
-
|
106 |
-
# 等待并获取音频数据
|
107 |
-
for _ in range(self.config.max_retries):
|
108 |
-
res = self.tab.listen.wait()
|
109 |
-
try:
|
110 |
-
audio_data = res.response.body
|
111 |
-
return audio_data['assets']['audio'], True
|
112 |
-
except (json.JSONDecodeError, KeyError):
|
113 |
-
print("数据包解析失败,继续等待...")
|
114 |
-
time.sleep(self.config.retry_delay)
|
115 |
-
|
116 |
-
raise TimeoutError("获取音频数据超时")
|
117 |
-
|
118 |
-
except Exception as e:
|
119 |
-
print(f"获取音频数据失败: {str(e)}")
|
120 |
-
return None, False
|
121 |
-
|
122 |
-
def call_whisper_api(self, audio_base64: str) -> Optional[Dict[str, Any]]:
|
123 |
-
"""调用Whisper API进行语音识别"""
|
124 |
-
print("开始识别人机验��码")
|
125 |
-
|
126 |
-
headers = {
|
127 |
-
"Content-Type": "application/json",
|
128 |
-
"Accept": "application/json",
|
129 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.8 Safari/537.36"
|
130 |
-
}
|
131 |
-
if self.config.auth_key:
|
132 |
-
headers["Authorization"] = f"Bearer {self.config.auth_key}"
|
133 |
-
payload = {
|
134 |
-
"audio": audio_base64,
|
135 |
-
"task": "translate"
|
136 |
-
}
|
137 |
-
|
138 |
-
try:
|
139 |
-
with httpx.Client(timeout=20) as client:
|
140 |
-
response = client.post(
|
141 |
-
self.config.speech2text_api,
|
142 |
-
headers=headers,
|
143 |
-
json=payload
|
144 |
-
)
|
145 |
-
response.raise_for_status()
|
146 |
-
return response.json()
|
147 |
-
except httpx.HTTPError as e:
|
148 |
-
print(f"API请求失败: {str(e)}")
|
149 |
-
return None
|
150 |
-
except json.JSONDecodeError:
|
151 |
-
print("API响应解析失败")
|
152 |
-
return None
|
153 |
-
|
154 |
-
@staticmethod
|
155 |
-
def parse_result(result: Dict[str, Any]) -> str:
|
156 |
-
"""解析语音识别结果"""
|
157 |
-
# if not result or 'segments' not in result or not result['segments']:
|
158 |
-
# raise ValueError("无效的API响应: 缺少segments字段")
|
159 |
-
|
160 |
-
# last_segment = result['segments'][-1].get('text', '').strip()
|
161 |
-
if 'text' not in result:
|
162 |
-
raise ValueError("无效的API响应: 缺少text字段")
|
163 |
-
last_segment = result.get('text', '').strip()
|
164 |
-
if not last_segment:
|
165 |
-
raise ValueError("无法从结果中提取文本")
|
166 |
-
|
167 |
-
# 清理结果: 取最后一个词,移除空格和标点
|
168 |
-
last_word = last_segment.split()[-1]
|
169 |
-
return last_word.replace(' ', '').replace('.', '').replace(',', '')
|
170 |
-
|
171 |
-
def fill_form(self) -> bool:
|
172 |
-
"""填写并提交表单"""
|
173 |
-
try:
|
174 |
-
# 等待表单元素出现
|
175 |
-
self.tab.wait.eles_loaded('@name=email')
|
176 |
-
email_input = self._wait_for_element('@name=email')
|
177 |
-
email_input.input(self.email, clear=True)
|
178 |
-
|
179 |
-
role_value = "TEACHER" if self.is_teacher else "STUDENT"
|
180 |
-
self._wait_for_element(f'@@name=studentType@@value={role_value}').click(by_js=True)
|
181 |
-
self._wait_for_element('@name=name.firstName').input(self.firstname, clear=True)
|
182 |
-
self._wait_for_element('@name=name.lastName').input(self.lastname, clear=True)
|
183 |
-
self._wait_for_element('@name=privacyPolicy').click(by_js=True)
|
184 |
-
|
185 |
-
submit_btn = self._wait_for_element('xpath://button[@type="submit"]')
|
186 |
-
submit_btn.click(by_js=True)
|
187 |
-
|
188 |
-
return True
|
189 |
-
except Exception as e:
|
190 |
-
print(f"表单填写失败: {str(e)}")
|
191 |
-
return False
|
192 |
-
def check_success(self) -> bool:
|
193 |
-
"""检查表单提交是否成功"""
|
194 |
-
try:
|
195 |
-
if "primaryConfirmation?email=" in self.tab.url:
|
196 |
-
print(f"{self.email} 提交成功")
|
197 |
-
return True
|
198 |
-
else:
|
199 |
-
print(f"{self.email} 提交失败")
|
200 |
-
return False
|
201 |
-
except Exception as e:
|
202 |
-
print(f"检查提交状态失败:{self.email} {str(e)}")
|
203 |
-
return False
|
204 |
-
|
205 |
-
def solve_audio_captcha(self) -> bool | None:
|
206 |
-
"""主流程: 解决音频验证码并提交表单"""
|
207 |
-
try:
|
208 |
-
while True:
|
209 |
-
# 1. 获取音频数据
|
210 |
-
print(f"{self.email} 正在获取音频验证码")
|
211 |
-
audio_data, needs_captcha = self.get_audio_data()
|
212 |
-
|
213 |
-
if needs_captcha and audio_data:
|
214 |
-
# 2. 调用API识别
|
215 |
-
result = self.call_whisper_api(audio_data)
|
216 |
-
if not result:
|
217 |
-
raise RuntimeError("语音识别API调用失败")
|
218 |
-
print(result)
|
219 |
-
# 3. 解析结果
|
220 |
-
last_word = self.parse_result(result)
|
221 |
-
print(f"识别结果: {last_word}")
|
222 |
-
|
223 |
-
# 4. 提交验证码
|
224 |
-
input_field = self._wait_for_element('tag:input')
|
225 |
-
input_field.input(last_word)
|
226 |
-
time.sleep(1) # 短暂等待确保输入完成
|
227 |
-
|
228 |
-
verify_btn = self._wait_for_element('#amzn-btn-verify-internal')
|
229 |
-
verify_btn.click(by_js=True)
|
230 |
-
time.sleep(2) # 等待验证结果
|
231 |
-
if "Human Verification" in self.tab.title:
|
232 |
-
print("验证码验证失败,重试")
|
233 |
-
continue
|
234 |
-
# 5. 填写表单
|
235 |
-
if not self.fill_form():
|
236 |
-
raise RuntimeError("表单填写失败")
|
237 |
-
# 6. 检查提交状态
|
238 |
-
time.sleep(1) # 等待页面加载
|
239 |
-
if not self.check_success():
|
240 |
-
raise RuntimeError("表单提交失败")
|
241 |
-
print("验证流程完成")
|
242 |
-
return True
|
243 |
-
|
244 |
-
except Exception as e:
|
245 |
-
print(f"处理过程中发生错误: {str(e)}")
|
246 |
-
return False
|
247 |
-
|
248 |
-
|
249 |
-
if __name__ == "__main__":
|
250 |
-
# 示例用法
|
251 |
-
with CaptchaSolver(email="[email protected]",
|
252 |
-
firstname="John",
|
253 |
-
lastname="Doe",
|
254 |
-
is_teacher=True,
|
255 |
-
) as solver:
|
256 |
-
success = solver.solve_audio_captcha()
|
257 |
-
if not success:
|
258 |
-
exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|