sergey21000 commited on
Commit
89dd8f0
·
verified ·
1 Parent(s): 059024e

Upload 9 files

Browse files
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+
4
+ import utils.setup_logging
5
+ from utils.interface import create_interface
6
+
7
+
8
+ if __name__ == '__main__':
9
+ interface = create_interface()
10
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.20.1
2
+ Telethon==1.39.0
3
+ python-dotenv
utils/__init__.py ADDED
File without changes
utils/auth.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ import logging
4
+ from dataclasses import dataclass
5
+
6
+ from pathlib import Path
7
+
8
+ from telethon import TelegramClient, errors
9
+ from telethon.sessions import SQLiteSession, MemorySession
10
+ from telethon.sessions.abstract import Session
11
+
12
+ from utils.validation import Validator
13
+
14
+
15
+ @dataclass
16
+ class AuthState:
17
+ session_type: str = 'sqlite'
18
+ session_name: str = 'telegram_api_session'
19
+ memory_session: MemorySession | None = None
20
+ is_logging: bool = False
21
+
22
+ is_auth: bool = False
23
+ need_send_code: bool = False
24
+ need_verify_code: bool = False
25
+ need_verify_2fa: bool = False
26
+ message: str | None = None
27
+ client: TelegramClient | None = None
28
+
29
+ def __post_init__(self):
30
+ self.session_dir = Path('sessions')
31
+ self.session_dir.mkdir(exist_ok=True)
32
+
33
+ def check_start_auth_status(self) -> None:
34
+ loop = asyncio.new_event_loop()
35
+ asyncio.set_event_loop(loop)
36
+ loop.run_until_complete(self.check_is_auth())
37
+
38
+ async def check_is_auth(self) -> None:
39
+ if Validator.validate_env_vars().is_valid:
40
+ client = ClientConnector.get_client(self.get_session(), os.getenv('API_ID'), os.getenv('API_HASH'))
41
+ validation_result = await Validator.validate_auth(client)
42
+ if validation_result.is_valid:
43
+ self.set_auth_success()
44
+
45
+ def get_session(self) -> Session:
46
+ if self.session_type == 'sqlite':
47
+ session_filepath = self.session_dir / self.session_name
48
+ return SQLiteSession(str(session_filepath))
49
+ elif self.session_type == 'memory':
50
+ if self.memory_session is None:
51
+ self.memory_session = MemorySession()
52
+ return self.memory_session
53
+
54
+ def change_session_type(self, session_type):
55
+ if session_type != self.session_type:
56
+ self.session_type = session_type
57
+
58
+ def reset_state(self) -> None:
59
+ defaults = self.__class__()
60
+ # self.__dict__.update(defaults.__dict__)
61
+ self.is_auth = defaults.is_auth
62
+ self.need_send_code = defaults.need_send_code
63
+ self.need_verify_code = defaults.need_verify_code
64
+ self.need_verify_2fa = defaults.need_verify_2fa
65
+ self.message = defaults.message
66
+ self.client = defaults.client
67
+
68
+ def _log(self) -> None:
69
+ if self.is_logging and self.message:
70
+ logging.info(self.message)
71
+
72
+ def set_auth_failed(self, message: str | None = None) -> None:
73
+ if message:
74
+ self.message = message
75
+ self._log()
76
+
77
+ def set_start_auth(self) -> None:
78
+ self.reset_state()
79
+ self.message = 'Начата процедура аутентификации'
80
+
81
+ def set_client(self, client: TelegramClient) -> None:
82
+ self.client = client
83
+ if self.session_type == 'memory':
84
+ self.memory_session = client.session
85
+
86
+ def set_need_send_code(self) -> None:
87
+ self.need_send_code = True
88
+ self.message = 'Проверка соединения клиента завершена успешно. Отправка проверочного кода'
89
+ self._log()
90
+
91
+ def set_need_verify_code(self) -> None:
92
+ self.need_verify_code = True
93
+ self.message = 'Код отправлен в Telegram. Введите его в поле Проверочный код'
94
+ self._log()
95
+
96
+ def set_need_verify_2fa(self) -> None:
97
+ self.need_verify_2fa = True
98
+ self.need_verify_code = False
99
+ self.message = 'Требуется 2FA-пароль. Введите его в поле Облачный пароль'
100
+ self._log()
101
+
102
+ def set_auth_success(self, message: str | None = None) -> None:
103
+ self.is_auth = True
104
+ self.need_send_code = False
105
+ self.need_verify_code = False
106
+ self.need_verify_2fa = False
107
+ self.message = 'Клиент авторизован' if message is None else message
108
+ self._log()
109
+
110
+ async def delete_session(self) -> None:
111
+ if self.client is not None:
112
+ await ClientConnector.log_out(self.client)
113
+ if self.session_type == 'sqlite':
114
+ session_filepath = self.session_dir / f'{self.session_name}.session'
115
+ if session_filepath.is_file():
116
+ session_filepath.unlink(missing_ok=True)
117
+ elif self.session_type == 'memory':
118
+ self.memory_session = None
119
+ self.reset_state()
120
+ self.message = 'Сессия удалена'
121
+ self._log()
122
+
123
+
124
+ class ClientConnector:
125
+ @staticmethod
126
+ def get_client(session: Session, api_id: str, api_hash: str) -> TelegramClient:
127
+ client = TelegramClient(session, api_id, api_hash, system_version='4.16.30-vxCUSTOM')
128
+ return client
129
+
130
+ @staticmethod
131
+ async def connect(client: TelegramClient) -> None:
132
+ if not client.is_connected():
133
+ await client.connect()
134
+
135
+ @staticmethod
136
+ async def disconnect(client: TelegramClient) -> None:
137
+ if client.is_connected():
138
+ await client.disconnect()
139
+
140
+ @classmethod
141
+ async def log_out(cls, client: TelegramClient) -> None:
142
+ await cls.connect(client)
143
+ await client.log_out()
144
+ await cls.disconnect(client)
145
+
146
+ @classmethod
147
+ async def start_auth(cls, state: AuthState, api_id: str, api_hash: str) -> AuthState:
148
+ if not api_id or not api_hash:
149
+ message = 'Не заданы api_id и/или api_hash'
150
+ state.set_auth_failed(message=message)
151
+ return state
152
+ state.set_start_auth()
153
+ client = cls.get_client(state.get_session(), api_id, api_hash)
154
+ validation_result = await Validator.validate_auth(client)
155
+ if validation_result.is_valid:
156
+ message = 'Клиент авторизован'
157
+ state.set_auth_success(message)
158
+ elif not validation_result.is_valid and validation_result.is_error:
159
+ state.set_auth_failed(message=validation_result.message)
160
+ elif not validation_result.is_valid and not validation_result.is_error:
161
+ state.set_client(client)
162
+ state.set_need_send_code()
163
+ return state
164
+
165
+ @classmethod
166
+ async def send_code(cls, state: AuthState, phone_number: str) -> AuthState:
167
+ if not state.need_send_code:
168
+ return state
169
+ try:
170
+ await cls.connect(state.client)
171
+ await state.client.send_code_request(phone_number)
172
+ state.set_need_verify_code()
173
+ except Exception as ex:
174
+ message = f'Ошибка при отправке кода подтверждения, код ошибки: {ex}'
175
+ state.set_auth_failed(message)
176
+ return state
177
+
178
+ @classmethod
179
+ async def verify_code(cls, state: AuthState, phone_number: str, code: str) -> AuthState:
180
+ if not state.need_verify_code:
181
+ return state
182
+ try:
183
+ await state.client.sign_in(phone=phone_number, code=code)
184
+ await cls.disconnect(state.client)
185
+ state.set_auth_success()
186
+ except errors.SessionPasswordNeededError:
187
+ state.set_need_verify_2fa()
188
+ except Exception as ex:
189
+ message = f'Ошибка при верификации кода подтверждения, код ошибки: {ex}'
190
+ state.set_auth_failed(message)
191
+ return state
192
+
193
+ @classmethod
194
+ async def verify_2fa(cls, state: AuthState, password_2fa: str) -> AuthState:
195
+ if not state.need_verify_2fa:
196
+ return state
197
+ try:
198
+ await state.client.sign_in(password=password_2fa)
199
+ state.set_auth_success()
200
+ await cls.disconnect(state.client)
201
+ except Exception as ex:
202
+ message = f'Ошибка при верификации облачного пароля, код ошибки: {ex}'
203
+ state.set_auth_failed(message)
204
+ return state
205
+
utils/components.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from typing import Callable, Collection
4
+
5
+ import gradio as gr
6
+ from telethon.sessions import SQLiteSession, MemorySession
7
+
8
+ from utils.auth import AuthState
9
+ from utils.parser import Parser
10
+ from utils.validation import Validator
11
+
12
+
13
+ class Components:
14
+
15
+ welcome_message_markdown = '''
16
+ <h5 style='text-align: center'>
17
+ Парсер сообщений Telegram
18
+ </h5>
19
+ <h6 style='text-align: center'>
20
+ <a
21
+ href="https://github.com/sergey21000/telegram-message-parser"
22
+ target='_blank'>GitHub
23
+ </a>
24
+ проекта с инструкциями по получению `API_ID` и `API_HASH` приложения Telegram
25
+ </h6>
26
+ '''
27
+
28
+ @staticmethod
29
+ def _create_env_var_textbox(
30
+ env_var: str | None = None,
31
+ validator_method: Callable | None = None,
32
+ **kwargs,
33
+ ) -> gr.Textbox:
34
+ value = None
35
+ if env_var and validator_method:
36
+ validation_result = validator_method()
37
+ if validation_result.is_valid:
38
+ value = os.getenv(env_var)
39
+ curr_kwargs = dict(value=value)
40
+ curr_kwargs.update(kwargs)
41
+ return gr.Textbox(**curr_kwargs)
42
+
43
+ @classmethod
44
+ def api_id(cls) -> gr.Textbox:
45
+ return cls._create_env_var_textbox(
46
+ env_var='API_ID',
47
+ validator_method=Validator.validate_env_id,
48
+ label='API_ID Telegram app',
49
+ placeholder='API_ID приложения Telegram',
50
+ scale=1,
51
+ )
52
+
53
+ @classmethod
54
+ def api_hash(cls) -> gr.Textbox:
55
+ return cls._create_env_var_textbox(
56
+ env_var='API_HASH',
57
+ validator_method=Validator.validate_env_hash,
58
+ label='API_HASH Telegram app',
59
+ placeholder='API_HASH приложения Telegram',
60
+ scale=1,
61
+ )
62
+
63
+ @classmethod
64
+ def phone_number(cls) -> gr.Textbox:
65
+ return cls._create_env_var_textbox(
66
+ env_var='PHONE_NUMBER',
67
+ validator_method=Validator.validate_env_phone_number,
68
+ label='PHONE_NUMBER Telegram app',
69
+ placeholder='PHONE_NUMBER приложения Telegram',
70
+ scale=1,
71
+ )
72
+
73
+ @staticmethod
74
+ def code(visible: str = False, render: bool = True) -> gr.Textbox:
75
+ component = gr.Textbox(
76
+ value=None,
77
+ label='Проверочный код',
78
+ visible=visible,
79
+ render=render,
80
+ placeholder=None,
81
+ scale=1,
82
+ )
83
+ return component
84
+
85
+ @staticmethod
86
+ def password_2fa(visible: str = False, render: bool = True) -> gr.Textbox:
87
+ component = gr.Textbox(
88
+ type='password',
89
+ value=None,
90
+ label='Облачный пароль',
91
+ visible=visible,
92
+ render=render,
93
+ placeholder=None,
94
+ scale=1,
95
+ )
96
+ return component
97
+
98
+ @staticmethod
99
+ def auth_status(value: str | None = None) -> gr.Textbox:
100
+ component = gr.Textbox(
101
+ value=value,
102
+ label='Статус авторизации',
103
+ placeholder=None,
104
+ interactive=False,
105
+ scale=1,
106
+ )
107
+ return component
108
+
109
+ @staticmethod
110
+ def auth_btn(interactive: bool = True) -> gr.Button:
111
+ component = gr.Button(
112
+ value='Авторизация',
113
+ interactive=interactive,
114
+ scale=1,
115
+ )
116
+ return component
117
+
118
+ @staticmethod
119
+ def code_btn(visible: bool = False, render: bool = True) -> gr.Button:
120
+ component = gr.Button(
121
+ value='Подтвержение кода',
122
+ visible=visible,
123
+ render=render,
124
+ scale=0,
125
+ )
126
+ return component
127
+
128
+ @staticmethod
129
+ def password_2fa_btn(visible: bool = False, render: bool = True) -> gr.Button:
130
+ component = gr.Button(
131
+ value='Подтверждение облачного пароля',
132
+ visible=visible,
133
+ render=render,
134
+ scale=0,
135
+ )
136
+ return component
137
+
138
+ @staticmethod
139
+ def delete_session_btn(visible: bool = False, render: bool = True) -> gr.Button:
140
+ component = gr.Button(
141
+ value='Удаление сессии',
142
+ visible=visible,
143
+ render=render,
144
+ scale=1,
145
+ )
146
+ return component
147
+
148
+ @classmethod
149
+ def session_type_radio(cls) -> gr.Radio:
150
+ session_types = {'sqlite': SQLiteSession, 'memory': MemorySession}
151
+ component = gr.Radio(
152
+ choices=session_types,
153
+ value='sqlite',
154
+ label='Тип сессии',
155
+ info=None, # cls.session_type_markdown
156
+ )
157
+ return component
158
+
159
+ @staticmethod
160
+ def chats_usernames() -> gr.Textbox:
161
+ component = gr.Textbox(
162
+ label='Адреса чатов',
163
+ placeholder='Ссылки или ID чатов/каналов через пробел или перенос строки',
164
+ scale=1,
165
+ lines=2,
166
+ )
167
+ return component
168
+
169
+ @staticmethod
170
+ def add_chat_btn() -> gr.Button:
171
+ component = gr.Button(
172
+ value='Добавить чат/чаты',
173
+ scale=0,
174
+ )
175
+ return component
176
+
177
+ @staticmethod
178
+ def chats_list_status() -> gr.Textbox:
179
+ component = gr.Textbox(
180
+ label='Добавленные чаты',
181
+ placeholder='Здесь будет список чатов для парсинга',
182
+ scale=1,
183
+ lines=3,
184
+ )
185
+ return component
186
+
187
+ @staticmethod
188
+ def parse_status() -> gr.Textbox:
189
+ component = gr.Textbox(
190
+ label='Статус парсинга',
191
+ placeholder='Здесь будет отчет о результатах парсинга',
192
+ scale=1,
193
+ lines=8,
194
+ )
195
+ return component
196
+
197
+ @staticmethod
198
+ def start_parse_btn() -> gr.Button:
199
+ component = gr.Button(
200
+ value='Начать парсинг',
201
+ scale=0,
202
+ )
203
+ return component
204
+
205
+ @staticmethod
206
+ def download_btn(value: str | None = None) -> gr.Button:
207
+ component = gr.DownloadButton(
208
+ label='Загрузить csv результаты',
209
+ value=value,
210
+ visible=value is not None,
211
+ scale=0,
212
+ )
213
+ return component
214
+
215
+ @staticmethod
216
+ def get_parse_args() -> list[gr.component]:
217
+ limit = gr.Number(
218
+ value=None,
219
+ label='limit',
220
+ info='Сколько сообщений парсить',
221
+ )
222
+ offset_date = gr.DateTime(
223
+ value=None,
224
+ label='offset_date',
225
+ info='До какой даты парсить',
226
+ timezone='Europe/Moscow',
227
+ )
228
+ reverse = gr.Checkbox(
229
+ value=False,
230
+ label='reverse',
231
+ info='Парсить от сегодняшнего сообщения к самому раннему',
232
+ )
233
+ parse_args = [limit, offset_date, reverse]
234
+ return parse_args
235
+
236
+
237
+ class ComponentsFn(Components):
238
+ @staticmethod
239
+ def update_status(auth_state: AuthState) -> str | None:
240
+ return auth_state.message
241
+
242
+ @classmethod
243
+ def get_dynamic_visible_components(cls, auth_state: AuthState, render: bool = True) -> tuple[gr.component]:
244
+ code = cls.code(visible=auth_state.need_verify_code, render=render)
245
+ code_btn = cls.code_btn(visible=auth_state.need_verify_code, render=render)
246
+
247
+ password_2fa = cls.password_2fa(visible=auth_state.need_verify_2fa, render=render)
248
+ password_2fa_btn = cls.password_2fa_btn(visible=auth_state.need_verify_2fa, render=render)
249
+
250
+ delete_session_btn = cls.delete_session_btn(visible=auth_state.is_auth, render=render)
251
+ return code, code_btn, password_2fa, password_2fa_btn, delete_session_btn
252
+
253
+ @staticmethod
254
+ def update_auth_state_session_type(auth_state: AuthState, session_type: str) -> None:
255
+ auth_state.change_session_type(session_type)
256
+
257
+ @staticmethod
258
+ async def delete_session(auth_state: AuthState) -> None:
259
+ await auth_state.delete_session()
260
+
261
+ @classmethod
262
+ def update_download_btn(cls, csv_paths: Collection[Path]) -> gr.Button | None:
263
+ if len(csv_paths) == 0:
264
+ return None
265
+ elif len(csv_paths) == 1:
266
+ filepath = csv_paths[0]
267
+ else:
268
+ filepath = Parser.zip_files(csv_paths)
269
+ component = cls.download_btn(value=filepath)
270
+ return component
utils/interface.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from utils.auth import AuthState, ClientConnector
4
+ from utils.components import Components, ComponentsFn
5
+ from utils.parser import Parser
6
+
7
+
8
+ def create_interface() -> gr.Blocks:
9
+ auth_state = AuthState()
10
+ auth_state.check_start_auth_status()
11
+
12
+ # css = '.gradio-container {width: 60% !important}'
13
+ css = '''
14
+ .gradio-container {
15
+ width: 70% !important;
16
+ margin: 0 auto !important; /* Центрирование по горизонтали */
17
+ }
18
+ '''
19
+
20
+ with gr.Blocks(css=css) as interface:
21
+ gr.Markdown(Components.welcome_message_markdown)
22
+
23
+ auth_state = gr.State(auth_state)
24
+ chats_list = gr.State([])
25
+ csv_names = gr.State([])
26
+
27
+ dynamic_visible_components = ComponentsFn.get_dynamic_visible_components(auth_state.value, render=False)
28
+ code, code_btn, password_2fa, password_2fa_btn, delete_session_btn = dynamic_visible_components
29
+
30
+ with gr.Group():
31
+ gr.Markdown('Авторизация')
32
+ with gr.Row():
33
+ with gr.Column():
34
+ session_type = Components.session_type_radio()
35
+ auth_status = Components.auth_status(value=auth_state.value.message)
36
+ with gr.Row():
37
+ auth_btn = Components.auth_btn()
38
+ delete_session_btn.render()
39
+ code.render()
40
+ code_btn.render()
41
+ password_2fa.render()
42
+ password_2fa_btn.render()
43
+
44
+ with gr.Column():
45
+ api_id = Components.api_id()
46
+ api_hash = Components.api_hash()
47
+ phone_number = Components.phone_number()
48
+
49
+ auth_btn.click(
50
+ fn=ClientConnector.start_auth,
51
+ inputs=[auth_state, api_id, api_hash],
52
+ outputs=[auth_state],
53
+ ).then(
54
+ fn=ClientConnector.send_code,
55
+ inputs=[auth_state, phone_number],
56
+ outputs=[auth_state],
57
+ ).then(
58
+ fn=ComponentsFn.get_dynamic_visible_components,
59
+ inputs=[auth_state],
60
+ outputs=dynamic_visible_components,
61
+ ).then(
62
+ fn=ComponentsFn.update_status,
63
+ inputs=[auth_state],
64
+ outputs=[auth_status],
65
+ )
66
+
67
+ code_btn.click(
68
+ fn=ClientConnector.verify_code,
69
+ inputs=[auth_state, phone_number, code],
70
+ outputs=[auth_state],
71
+ ).then(
72
+ fn=ComponentsFn.get_dynamic_visible_components,
73
+ inputs=[auth_state],
74
+ outputs=dynamic_visible_components,
75
+ ).then(
76
+ fn=ComponentsFn.update_status,
77
+ inputs=[auth_state],
78
+ outputs=[auth_status],
79
+ )
80
+
81
+ password_2fa_btn.click(
82
+ fn=ClientConnector.verify_2fa,
83
+ inputs=[auth_state, password_2fa],
84
+ outputs=[auth_state],
85
+ ).then(
86
+ fn=ComponentsFn.get_dynamic_visible_components,
87
+ inputs=[auth_state],
88
+ outputs=dynamic_visible_components,
89
+ ).then(
90
+ fn=ComponentsFn.update_status,
91
+ inputs=[auth_state],
92
+ outputs=[auth_status],
93
+ )
94
+
95
+ delete_session_btn.click(
96
+ fn=ComponentsFn.delete_session,
97
+ inputs=[auth_state],
98
+ outputs=None,
99
+ ).then(
100
+ fn=ComponentsFn.get_dynamic_visible_components,
101
+ inputs=[auth_state],
102
+ outputs=dynamic_visible_components,
103
+ ).then(
104
+ fn=ComponentsFn.update_status,
105
+ inputs=[auth_state],
106
+ outputs=[auth_status],
107
+ )
108
+
109
+ session_type.change(
110
+ fn=ComponentsFn.update_auth_state_session_type,
111
+ inputs=[auth_state, session_type],
112
+ outputs=None,
113
+ )
114
+
115
+
116
+ with gr.Group():
117
+ gr.Markdown('Парсинг')
118
+ with gr.Row():
119
+ with gr.Column():
120
+ with gr.Group():
121
+ gr.Markdown('Чаты для парсинга')
122
+ chats_usernames = Components.chats_usernames()
123
+ add_chat_btn = Components.add_chat_btn()
124
+ chats_list_status = Components.chats_list_status()
125
+ with gr.Column():
126
+ with gr.Group():
127
+ gr.Markdown('Параметры парсинга')
128
+ parse_args = Components.get_parse_args()
129
+ with gr.Column():
130
+ with gr.Group():
131
+ gr.Markdown('Результаты парсинга')
132
+ start_parse_btn = Components.start_parse_btn()
133
+ parse_status = Components.parse_status()
134
+ download_btn = Components.download_btn()
135
+
136
+ add_chat_btn.click(
137
+ fn=Parser.add_chat_to_chats_list,
138
+ inputs=[auth_state, chats_usernames, chats_list, api_id, api_hash],
139
+ outputs=[chats_list_status],
140
+ )
141
+
142
+ start_parse_btn.click(
143
+ fn=Parser.parse_chats,
144
+ inputs=[auth_state, chats_list, api_id, api_hash, *parse_args],
145
+ outputs=[parse_status, csv_names],
146
+ ).then(
147
+ fn=ComponentsFn.update_download_btn,
148
+ inputs=[csv_names],
149
+ outputs=[download_btn],
150
+ )
151
+
152
+ return interface
utils/parser.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import zipfile
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Collection
7
+
8
+ import pandas as pd
9
+ import gradio as gr
10
+ from telethon import TelegramClient, types, errors
11
+
12
+ from utils.auth import AuthState, ClientConnector
13
+ from utils.validation import Validator
14
+
15
+
16
+ MESSAGE_DICT = dict[str, str | int | datetime | None]
17
+ DEFAULT_PARSE_KWARGS = dict(
18
+ limit=None,
19
+ offset_date=None,
20
+ reverse=False,
21
+ )
22
+
23
+
24
+ @dataclass
25
+ class Chat:
26
+ chat: types.TLObject
27
+ chat_name: str | None
28
+ chat_username: str
29
+ chat_type: str
30
+ chat_id: int
31
+
32
+ @classmethod
33
+ def from_telethon_chat(cls, chat: types.TLObject, chat_username: str):
34
+ chat_id = chat.id
35
+ if isinstance(chat, types.User):
36
+ chat_type = 'Chat'
37
+ chat_name = f'{chat.first_name} {chat.last_name}'
38
+ else:
39
+ chat_type = 'Channel/Group'
40
+ chat_name = chat.title
41
+ return cls(chat, chat_name, chat_username, chat_type, chat_id)
42
+
43
+ def get_chat_info(self) -> str:
44
+ chat_info = f'Chat name: {self.chat_name}, Chat type: {self.chat_type}, Chat ID: {self.chat_id}'
45
+ return chat_info
46
+
47
+
48
+ class Parser:
49
+ parse_results_dir = Path('parse_results_dir')
50
+
51
+ @staticmethod
52
+ def message_to_dict(message: types.Message) -> MESSAGE_DICT:
53
+ text = message.text if message.text else message.message
54
+ if not text:
55
+ return None
56
+
57
+ date = message.date
58
+ sender = message.sender
59
+ sender_type = type(sender).__name__
60
+ chat = message._chat
61
+ chat_id = chat.id
62
+ chat_type = type(chat).__name__
63
+ chat_name = chat.title if isinstance(chat, types.Channel) else f'{chat.first_name} {chat.last_name}'
64
+
65
+ if isinstance(sender, types.User):
66
+ sender_id = message.sender.id
67
+ username = sender.username
68
+ first_name = sender.first_name
69
+ last_name = sender.last_name
70
+ else:
71
+ sender_id = message._sender_id
72
+ username = getattr(message.sender, 'username', None)
73
+ first_name = None
74
+ last_name = None
75
+
76
+ message_dict = {
77
+ 'date': date,
78
+ 'chat_type': chat_type,
79
+ 'chat_name': chat_name,
80
+ 'chat_id': chat_id,
81
+ 'sender_type': sender_type,
82
+ 'sender_username': username,
83
+ 'sender_first_name': first_name,
84
+ 'sender_last_name': last_name,
85
+ 'sender_id': sender_id,
86
+ 'text': text,
87
+ }
88
+ return message_dict
89
+
90
+ @classmethod
91
+ async def get_messages_from_chat(
92
+ cls,
93
+ client: TelegramClient,
94
+ chat: types.TLObject,
95
+ parse_chats_pb_info: str,
96
+ **parse_kwargs,
97
+ ) -> list[MESSAGE_DICT]:
98
+
99
+ async with client:
100
+ progress = gr.Progress()
101
+ messages = client.iter_messages(entity=chat, **parse_kwargs)
102
+ message_dicts = []
103
+ message_count = 0
104
+ async for message in messages:
105
+ message_count += 1
106
+ if message_count % 1000 == 0:
107
+ await asyncio.sleep(1)
108
+ message_dict = cls.message_to_dict(message)
109
+ if message_dict is not None:
110
+ message_dicts.append(message_dict)
111
+
112
+ if message_count % 1000 == 0:
113
+ await asyncio.sleep(1)
114
+
115
+ if parse_kwargs['limit'] is not None:
116
+ total = parse_kwargs['limit']
117
+ progress(message_count / total, desc=f'{parse_chats_pb_info}, Parsing messages {message_count}/{total}')
118
+ else:
119
+ progress(message_count, desc=f'{parse_chats_pb_info}, Parsing messages {message_count}/?')
120
+
121
+ if not parse_kwargs['reverse']:
122
+ message_dicts = message_dicts[::-1]
123
+ return message_dicts
124
+
125
+ @classmethod
126
+ async def parse_chats(
127
+ cls,
128
+ auth_state: AuthState,
129
+ chats_list: list[Chat],
130
+ api_id: str,
131
+ api_hash: str,
132
+ *parse_args,
133
+ ) -> tuple[str, list[Path]]:
134
+
135
+ cvs_paths = []
136
+ parse_result = ''
137
+
138
+ if len(chats_list) == 0:
139
+ return 'Список чатов для парсинга пустой', cvs_paths
140
+
141
+ client = ClientConnector.get_client(auth_state.get_session(), api_id, api_hash)
142
+ validation_result = await Validator.validate_auth(client)
143
+ if not validation_result.is_valid:
144
+ return 'Клиент не авторизован', cvs_paths
145
+
146
+ parse_kwargs = dict(zip(DEFAULT_PARSE_KWARGS.keys(), parse_args))
147
+ progress = gr.Progress()
148
+
149
+ for i, chat in enumerate(chats_list, start=1):
150
+ try:
151
+ parse_chats_pb_info = f'Parsing chats {i}/{len(chats_list)}'
152
+ message_dicts = await cls.get_messages_from_chat(client, chat.chat, parse_chats_pb_info, **parse_kwargs)
153
+ if len(message_dicts) == 0:
154
+ log_msg = f'Из чата {chat.chat_username} не было извлечено ни одного сообщения'
155
+ parse_result += log_msg + '\n'
156
+ else:
157
+ cvs_path = cls.messages_to_csv(message_dicts)
158
+ cvs_paths.append(cvs_path)
159
+ log_msg = f'Успешный парсинг чата {chat.chat_username}, кол-во сообщений: {len(message_dicts)}'
160
+ parse_result += log_msg + '\n'
161
+ except Exception as ex:
162
+ log_msg = f'Ошибка при парсинге чата {chat.chat_username}, код ошибки: {ex}'
163
+ parse_result += log_msg + '\n'
164
+
165
+ progress(i / len(chats_list), desc=parse_chats_pb_info)
166
+ return parse_result, cvs_paths
167
+
168
+ @classmethod
169
+ def messages_to_csv(cls, message_dicts: Collection[MESSAGE_DICT]) -> Path:
170
+ df = pd.DataFrame.from_dict(message_dicts)
171
+ chat_name = message_dicts[0].get('chat_name', '')
172
+ cvs_path = cls.parse_results_dir / f'telegram_history_{chat_name}.csv'
173
+ df.to_csv(cvs_path, index=False)
174
+ return cvs_path
175
+
176
+ @classmethod
177
+ def zip_files(cls, file_paths: Collection[Path]) -> Path:
178
+ zip_filepath = cls.parse_results_dir / 'parse_results_csv.zip'
179
+ with zipfile.ZipFile(zip_filepath, 'w') as zipf:
180
+ for file_path in file_paths:
181
+ zipf.write(file_path, arcname=file_path)
182
+ return zip_filepath
183
+
184
+ @staticmethod
185
+ def get_chats_info(chats_list: list[Chat]) -> str:
186
+ chats_info = ''
187
+ for i, chat in enumerate(chats_list, start=1):
188
+ chats_info += f'{i}: ' + chat.get_chat_info() + '\n'
189
+ return chats_info
190
+
191
+ @staticmethod
192
+ async def get_chat(client: TelegramClient, chat_username: str) -> types.TLObject:
193
+ try:
194
+ if client.is_connected():
195
+ chat = await client.get_entity(chat_username)
196
+ else:
197
+ async with client:
198
+ chat = await client.get_entity(chat_username)
199
+ except (errors.UsernameNotOccupiedError, errors.UsernameInvalidError) as ex:
200
+ log_msg = f'Чат или канал {chat_username} не найден или введен неверно'
201
+ raise errors.UsernameInvalidError(log_msg)
202
+ except Exception as ex:
203
+ log_msg = f'Ошибка при получении объекта чата, код ошибки: {ex}'
204
+ raise Exception(log_msg)
205
+ return chat
206
+
207
+ @classmethod
208
+ async def add_chat_to_chats_list(
209
+ cls,
210
+ auth_state: AuthState,
211
+ chats_usernames,
212
+ chats_list: list[Chat],
213
+ api_id: str,
214
+ api_hash: str,
215
+ ) -> str:
216
+
217
+ if chats_usernames.strip() == '':
218
+ return 'Не заданы адрес/адреса чатов для добавления'
219
+
220
+ client = ClientConnector.get_client(auth_state.get_session(), api_id, api_hash)
221
+ validation_result = await Validator.validate_auth(client)
222
+ if not validation_result.is_valid:
223
+ return 'Клиент не авторизован'
224
+
225
+ for chat_username in chats_usernames.split():
226
+ try:
227
+ telethon_chat = await cls.get_chat(client, chat_username.strip())
228
+ if not telethon_chat in chats_list:
229
+ chat = Chat.from_telethon_chat(telethon_chat, chat_username)
230
+ chats_list.append(chat)
231
+ else:
232
+ log_msg = f'Чат {chat_username} уже есть в списке'
233
+ gr.Info(log_msg)
234
+ except Exception as ex:
235
+ log_msg = str(ex)
236
+ gr.Info(log_msg)
237
+ return cls.get_chats_info(chats_list)
238
+
239
+
240
+ Parser.parse_results_dir.mkdir(exist_ok=True)
utils/setup_logging.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import logging
3
+ import pytz
4
+ from pytz.tzinfo import BaseTzInfo
5
+ from datetime import datetime
6
+
7
+
8
+
9
+ def setup_logging(log_to_file: bool, level: int, timezone: BaseTzInfo) -> None:
10
+ '''Настройка логгирования под конкретный часовой пояс'''
11
+ logging.Formatter.converter = lambda *args: datetime.now(tz=timezone).timetuple()
12
+ handlers = [logging.StreamHandler(sys.stdout)]
13
+
14
+ if log_to_file:
15
+ log_file_name = 'bot_log.log'
16
+ handlers.append(logging.FileHandler(log_file_name))
17
+
18
+ format = '%(asctime)s - %(filename)s:%(lineno)d - %(levelname)s - %(funcName)s: %(message)s'
19
+ logging.basicConfig(
20
+ level=level,
21
+ format=format,
22
+ datefmt='%Y-%m-%d %H:%M:%S',
23
+ handlers=handlers,
24
+ force=True,
25
+ )
26
+
27
+ LOG_TO_FILE = False
28
+ LEVEL = logging.INFO
29
+ TIMEZONE: BaseTzInfo = pytz.timezone('Europe/Moscow')
30
+
31
+ setup_logging(log_to_file=LOG_TO_FILE, level=LEVEL, timezone=TIMEZONE)
utils/validation.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+ from telethon import TelegramClient
6
+
7
+
8
+ @dataclass
9
+ class ValidationResult:
10
+ is_valid: bool
11
+ is_error: bool = False
12
+ message: str = ''
13
+
14
+
15
+ class Validator:
16
+ env_vars = ['API_ID', 'API_HASH', 'PHONE_NUMBER']
17
+
18
+ @staticmethod
19
+ def validate_env_file(env_filename: str = '.env') -> ValidationResult:
20
+ if not Path(env_filename).is_file():
21
+ log_msg = 'Отсутвует файл .env'
22
+ return ValidationResult(is_valid=False, message=log_msg)
23
+ return ValidationResult(is_valid=True)
24
+
25
+ @staticmethod
26
+ def validate_env_var(var_name: str, error_msg: str | None = None) -> ValidationResult:
27
+ if os.getenv(var_name) is None:
28
+ return ValidationResult(is_valid=False, message=error_msg)
29
+ return ValidationResult(is_valid=True)
30
+
31
+ @classmethod
32
+ def validate_env_id(cls) -> ValidationResult:
33
+ return cls.validate_env_var('API_ID', 'Отсутствует переменная API_ID')
34
+
35
+ @classmethod
36
+ def validate_env_hash(cls) -> ValidationResult:
37
+ return cls.validate_env_var('API_HASH', 'Отсутствует переменная API_HASH')
38
+
39
+ @classmethod
40
+ def validate_env_phone_number(cls) -> ValidationResult:
41
+ return cls.validate_env_var('PHONE_NUMBER', 'Отсутствует переменная PHONE_NUMBER')
42
+
43
+ @classmethod
44
+ def validate_env_vars(cls) -> ValidationResult:
45
+ if all([cls.validate_env_var(var).is_valid for var in cls.env_vars]):
46
+ return ValidationResult(is_valid=True)
47
+ return ValidationResult(is_valid=False)
48
+
49
+ @staticmethod
50
+ async def validate_auth(client: TelegramClient) -> ValidationResult:
51
+ try:
52
+ if not client.is_connected():
53
+ await client.connect()
54
+ is_user_authorized = await client.is_user_authorized()
55
+ if not is_user_authorized:
56
+ log_msg = 'Клиент не авторизован'
57
+ return ValidationResult(is_valid=False, message=log_msg)
58
+ return ValidationResult(is_valid=True)
59
+ except Exception as ex:
60
+ log_msg = f'Ошибка при подключении клиента, код ошибки: {ex}'
61
+ return ValidationResult(is_valid=False, is_error=True, message=log_msg)
62
+ finally:
63
+ if client.is_connected():
64
+ await client.disconnect()