LiKenun commited on
Commit
92e41ba
·
1 Parent(s): 6cc5c8d

Clean up and restore ability to shut down gracefully

Browse files
pyproject.toml CHANGED
@@ -24,7 +24,6 @@ dependencies = [
24
  "more-itertools>=10.6.0",
25
  "python-dotenv>=1.1.0",
26
  "loguru>=0.7.3",
27
- "fastapi>=0.115.12",
28
  "dependency-injector>=4.46.0",
29
  "pytz>=2025.2",
30
  "apscheduler>=3.11.0",
@@ -36,7 +35,7 @@ dependencies = [
36
  "slack_bolt>=1.23.0",
37
  "pymongo>=4.11.3 ",
38
  "motor>=3.7.0",
39
- "openai>=1.70.0"
40
  "google-api-python-client>=2.167.0",
41
  "google-auth>=2.39.0",
42
  "google-auth-oauthlib>=1.2.1"
@@ -50,7 +49,7 @@ dev = [
50
  "types-pytz>=2025.2",
51
  "black>=25.1.0",
52
  "isort>=6.0.1",
53
- "ruff>=0.11.4",
54
  ]
55
 
56
  [project.urls]
 
24
  "more-itertools>=10.6.0",
25
  "python-dotenv>=1.1.0",
26
  "loguru>=0.7.3",
 
27
  "dependency-injector>=4.46.0",
28
  "pytz>=2025.2",
29
  "apscheduler>=3.11.0",
 
35
  "slack_bolt>=1.23.0",
36
  "pymongo>=4.11.3 ",
37
  "motor>=3.7.0",
38
+ "openai>=1.70.0",
39
  "google-api-python-client>=2.167.0",
40
  "google-auth>=2.39.0",
41
  "google-auth-oauthlib>=1.2.1"
 
49
  "types-pytz>=2025.2",
50
  "black>=25.1.0",
51
  "isort>=6.0.1",
52
+ "ruff>=0.11.4"
53
  ]
54
 
55
  [project.urls]
src/ctp_slack_bot/app.py CHANGED
@@ -1,9 +1,24 @@
1
- from asyncio import run
2
  from loguru import logger
 
 
3
 
4
  from ctp_slack_bot.containers import Container
5
  from ctp_slack_bot.core.logging import setup_logging
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  async def main() -> None:
8
  # Setup logging.
9
  setup_logging()
@@ -16,19 +31,23 @@ async def main() -> None:
16
  # Kick off services which should be active from the start.
17
  container.content_ingestion_service()
18
  container.question_dispatch_service()
 
19
 
20
- # Start the scheduler.
21
- schedule_service = container.schedule_service()
22
- schedule_service.start()
23
-
24
- # Start the Slack socket mode handler in a background thread.
25
  socket_mode_handler = container.socket_mode_handler()
26
- logger.info("Starting Slack Socket Mode handler…")
27
- await socket_mode_handler.start_async()
28
-
29
- # Shutdown. (This will never execute, because the socket mode handler never returns.)
30
- logger.info("Shutting down application…")
31
- schedule_service.stop()
 
 
 
 
 
 
 
32
 
33
  if __name__ == "__main__":
34
  run(main())
 
1
+ from asyncio import all_tasks, CancelledError, create_task, current_task, get_running_loop, run
2
  from loguru import logger
3
+ from signal import SIGINT, SIGTERM
4
+ from typing import Any, Callable
5
 
6
  from ctp_slack_bot.containers import Container
7
  from ctp_slack_bot.core.logging import setup_logging
8
 
9
+ async def handle_shutdown_signal() -> None:
10
+ logger.info("Received shutdown signal.")
11
+ for task in all_tasks():
12
+ if task is not current_task() and not task.done():
13
+ task.cancel()
14
+ logger.trace("Cancelled task {}.", task.get_name())
15
+ logger.info("Cancelled all tasks.")
16
+
17
+ def create_shutdown_signal_handler() -> Callable[[], None]:
18
+ def shutdown_signal_handler() -> None:
19
+ create_task(handle_shutdown_signal())
20
+ return shutdown_signal_handler
21
+
22
  async def main() -> None:
23
  # Setup logging.
24
  setup_logging()
 
31
  # Kick off services which should be active from the start.
32
  container.content_ingestion_service()
33
  container.question_dispatch_service()
34
+ container.schedule_service()
35
 
36
+ # Start the Slack socket mode handler in the background.
 
 
 
 
37
  socket_mode_handler = container.socket_mode_handler()
38
+ slack_bolt_task = create_task(socket_mode_handler.start_async())
39
+ shutdown_signal_handler = create_shutdown_signal_handler()
40
+ loop = get_running_loop()
41
+ loop.add_signal_handler(SIGINT, shutdown_signal_handler)
42
+ loop.add_signal_handler(SIGTERM, shutdown_signal_handler)
43
+ try:
44
+ logger.info("Starting Slack Socket Mode handler…")
45
+ await slack_bolt_task
46
+ except CancelledError:
47
+ logger.info("Shutting down application…")
48
+ finally:
49
+ await socket_mode_handler.close_async()
50
+ await container.shutdown_resources()
51
 
52
  if __name__ == "__main__":
53
  run(main())
src/ctp_slack_bot/containers.py CHANGED
@@ -13,7 +13,7 @@ from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelServi
13
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
14
  from ctp_slack_bot.services.language_model_service import LanguageModelService
15
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
16
- from ctp_slack_bot.services.schedule_service import ScheduleService
17
  from ctp_slack_bot.services.slack_service import SlackServiceResource
18
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
19
  from ctp_slack_bot.services.vectorization_service import VectorizationService
@@ -22,7 +22,7 @@ from ctp_slack_bot.services.vectorization_service import VectorizationService
22
  class Container(DeclarativeContainer):
23
  settings = Singleton(Settings)
24
  event_brokerage_service = Singleton(EventBrokerageService)
25
- schedule_service = Singleton(ScheduleService, settings=settings)
26
  mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
27
  vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
28
  vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
 
13
  from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
14
  from ctp_slack_bot.services.language_model_service import LanguageModelService
15
  from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
16
+ from ctp_slack_bot.services.schedule_service import ScheduleServiceResource
17
  from ctp_slack_bot.services.slack_service import SlackServiceResource
18
  from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
19
  from ctp_slack_bot.services.vectorization_service import VectorizationService
 
22
  class Container(DeclarativeContainer):
23
  settings = Singleton(Settings)
24
  event_brokerage_service = Singleton(EventBrokerageService)
25
+ schedule_service = Resource(ScheduleServiceResource, settings=settings)
26
  mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
27
  vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
28
  vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
src/ctp_slack_bot/core/logging.py CHANGED
@@ -1,4 +1,4 @@
1
- from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord
2
  from loguru import logger
3
  from os import getenv
4
  from sys import stderr
@@ -90,7 +90,9 @@ def setup_logging() -> None:
90
  basicConfig(handlers=[InterceptHandler()], level=0, force=True)
91
 
92
  # Update logging levels for some noisy libraries.
93
- for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "apscheduler", "pymongo"):
94
  getLogger(logger_name).setLevel(INFO)
 
 
95
 
96
  logger.info(f"Logging configured with level {log_level}")
 
1
+ from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord, WARNING
2
  from loguru import logger
3
  from os import getenv
4
  from sys import stderr
 
90
  basicConfig(handlers=[InterceptHandler()], level=0, force=True)
91
 
92
  # Update logging levels for some noisy libraries.
93
+ for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "pymongo"):
94
  getLogger(logger_name).setLevel(INFO)
95
+ for logger_name in ("apscheduler"):
96
+ getLogger(logger_name).setLevel(WARNING)
97
 
98
  logger.info(f"Logging configured with level {log_level}")
src/ctp_slack_bot/core/response_rendering.py DELETED
@@ -1,13 +0,0 @@
1
- from json import dumps
2
- from starlette.responses import JSONResponse
3
- from typing import Any, Self
4
-
5
- class PrettyJSONResponse(JSONResponse):
6
- def render(self: Self, content: Any) -> bytes:
7
- return dumps(
8
- content,
9
- ensure_ascii=False,
10
- allow_nan=False,
11
- indent=4,
12
- separators=(", ", ": "),
13
- ).encode()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/db/mongo_db.py CHANGED
@@ -1,13 +1,14 @@
1
- from dependency_injector.resources import Resource
 
2
  from motor.motor_asyncio import AsyncIOMotorClient
3
  from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
4
  from pymongo.operations import SearchIndexModel
5
  from loguru import logger
6
  from pydantic import BaseModel, PrivateAttr
7
  from typing import Any, Dict, Optional, Self
8
- import asyncio
9
 
10
  from ctp_slack_bot.core.config import Settings
 
11
 
12
  class MongoDB(BaseModel):
13
  """
@@ -16,23 +17,20 @@ class MongoDB(BaseModel):
16
  settings: Settings
17
  _client: PrivateAttr = PrivateAttr()
18
  _db: PrivateAttr = PrivateAttr()
19
-
20
  class Config:
21
  arbitrary_types_allowed = True
22
-
23
  def __init__(self: Self, **data: Dict[str, Any]) -> None:
24
  super().__init__(**data)
25
  logger.debug("Created {}", self.__class__.__name__)
26
-
27
  def connect(self: Self) -> None:
28
  """Initialize MongoDB client with settings."""
29
  try:
30
  connection_string = self.settings.MONGODB_URI.get_secret_value()
31
- logger.debug("Connecting to MongoDB using URI: {}", connection_string.replace(
32
- connection_string.split('@')[-1].split('/')[0] if '@' in connection_string else '',
33
- '[REDACTED]'
34
- ))
35
-
36
  # Create client with appropriate settings
37
  self._client = AsyncIOMotorClient(
38
  connection_string,
@@ -43,48 +41,48 @@ class MongoDB(BaseModel):
43
  retryWrites=True,
44
  w="majority"
45
  )
46
-
47
  # Set database
48
  db_name = self.settings.MONGODB_NAME
49
-
50
  self._db = self._client[db_name]
51
  logger.debug("MongoDB client initialized for database: {}", db_name)
52
-
53
  except Exception as e:
54
  logger.error("Failed to initialize MongoDB client: {}", e)
55
  self._client = None
56
  self._db = None
57
  raise
58
-
59
  @property
60
  def client(self: Self) -> AsyncIOMotorClient:
61
  """Get the MongoDB client instance."""
62
  if not hasattr(self, '_client') or self._client is None:
63
- logger.warning("MongoDB client not initialized. Attempting to initialize.")
64
  self.connect()
65
  if not hasattr(self, '_client') or self._client is None:
66
- raise ConnectionError("Failed to initialize MongoDB client")
67
  return self._client
68
-
69
  @property
70
  def db(self: Self) -> Any:
71
  """Get the MongoDB database instance."""
72
  if not hasattr(self, '_db') or self._db is None:
73
- logger.warning("MongoDB database not initialized. Attempting to initialize client.")
74
  self.connect()
75
  if not hasattr(self, '_db') or self._db is None:
76
- raise ConnectionError("Failed to initialize MongoDB database")
77
  return self._db
78
-
79
  async def ping(self: Self) -> bool:
80
  """Check if MongoDB connection is alive."""
81
  try:
82
  # Get client to ensure we're connected
83
  client = self.client
84
-
85
  # Try a simple ping command
86
  await client.admin.command('ping')
87
- logger.debug("MongoDB connection is active")
88
  return True
89
  except (ConnectionFailure, ServerSelectionTimeoutError) as e:
90
  logger.error("MongoDB connection failed: {}", e)
@@ -92,7 +90,7 @@ class MongoDB(BaseModel):
92
  except Exception as e:
93
  logger.error("Unexpected error during MongoDB ping: {}", e)
94
  return False
95
-
96
  async def get_collection(self: Self, name: str) -> Any:
97
  """
98
  Get a collection by name with validation.
@@ -100,29 +98,29 @@ class MongoDB(BaseModel):
100
  """
101
  # First ensure we can connect at all
102
  if not await self.ping():
103
- logger.error("Cannot get collection '{}' - MongoDB connection is not available", name)
104
- raise ConnectionError("MongoDB connection is not available")
105
-
106
  try:
107
  # Get all collection names to check if this one exists
108
- logger.debug("Checking if collection '{}' exists", name)
109
  collection_names = await self.db.list_collection_names()
110
 
111
  if name not in collection_names:
112
- logger.info("Collection '{}' does not exist. Creating it.", name)
113
  # Create the collection
114
  await self.db.create_collection(name)
115
- logger.debug("Successfully created collection '{}'", name)
116
  else:
117
- logger.debug("Collection '{}' already exists", name)
118
-
119
  # Get and return the collection
120
  collection = self.db[name]
121
  return collection
122
  except Exception as e:
123
  logger.error("Error accessing collection '{}': {}", name, e)
124
  raise
125
-
126
  async def create_indexes(self: Self, collection_name: str) -> None:
127
  """
128
  Create a vector search index on a collection.
@@ -131,7 +129,7 @@ class MongoDB(BaseModel):
131
  collection_name: Name of the collection
132
  """
133
  collection = await self.get_collection(collection_name)
134
-
135
  try:
136
  # Create search index model using MongoDB's recommended approach
137
  search_index_model = SearchIndexModel(
@@ -149,41 +147,38 @@ class MongoDB(BaseModel):
149
  name=f"{collection_name}_vector_index",
150
  type="vectorSearch"
151
  )
152
-
153
  # Create the search index using the motor collection
154
  result = await collection.create_search_index(search_index_model)
155
- logger.info("Vector search index '{}' created for collection {}", result, collection_name)
156
-
157
  except Exception as e:
158
  if "command not found" in str(e).lower():
159
  logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
160
  # Create a fallback standard index on embedding field
161
  await collection.create_index("embedding")
162
- logger.info("Created standard index on 'embedding' field as fallback")
163
  else:
164
  logger.error("Failed to create vector index: {}", e)
165
  raise
166
-
167
  async def close(self: Self) -> None:
168
  """Close MongoDB connection."""
169
  if self._client:
170
  self._client.close()
171
- logger.info("MongoDB connection closed")
172
  self._client = None
173
  self._db = None
174
 
175
- class MongoDBResource(Resource):
176
- def init(self: Self, settings: Settings) -> MongoDB:
177
  logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
178
  mongo_db = MongoDB(settings=settings)
179
  mongo_db.connect()
180
-
181
- # Test the connection asynchronously - this will run after init returns
182
- asyncio.create_task(self._test_connection(mongo_db))
183
-
184
  return mongo_db
185
-
186
- async def _test_connection(self, mongo_db: MongoDB) -> None:
187
  """Test MongoDB connection and log the result."""
188
  try:
189
  is_connected = await mongo_db.ping()
@@ -193,11 +188,11 @@ class MongoDBResource(Resource):
193
  logger.error("MongoDB connection test failed!")
194
  except Exception as e:
195
  logger.error("Error testing MongoDB connection: {}", e)
196
-
 
197
  async def shutdown(self: Self, mongo_db: MongoDB) -> None:
198
  """Close MongoDB connection on shutdown."""
199
  try:
200
- logger.info("Closing MongoDB connection...")
201
  await mongo_db.close()
202
  except Exception as e:
203
  logger.error("Error closing MongoDB connection: {}", e)
 
1
+ from asyncio import create_task
2
+ from dependency_injector.resources import AsyncResource
3
  from motor.motor_asyncio import AsyncIOMotorClient
4
  from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
5
  from pymongo.operations import SearchIndexModel
6
  from loguru import logger
7
  from pydantic import BaseModel, PrivateAttr
8
  from typing import Any, Dict, Optional, Self
 
9
 
10
  from ctp_slack_bot.core.config import Settings
11
+ from ctp_slack_bot.utils import sanitize_mongo_db_uri
12
 
13
  class MongoDB(BaseModel):
14
  """
 
17
  settings: Settings
18
  _client: PrivateAttr = PrivateAttr()
19
  _db: PrivateAttr = PrivateAttr()
20
+
21
  class Config:
22
  arbitrary_types_allowed = True
23
+
24
  def __init__(self: Self, **data: Dict[str, Any]) -> None:
25
  super().__init__(**data)
26
  logger.debug("Created {}", self.__class__.__name__)
27
+
28
  def connect(self: Self) -> None:
29
  """Initialize MongoDB client with settings."""
30
  try:
31
  connection_string = self.settings.MONGODB_URI.get_secret_value()
32
+ logger.debug("Connecting to MongoDB using URI: {}", sanitize_mongo_db_uri(connection_string))
33
+
 
 
 
34
  # Create client with appropriate settings
35
  self._client = AsyncIOMotorClient(
36
  connection_string,
 
41
  retryWrites=True,
42
  w="majority"
43
  )
44
+
45
  # Set database
46
  db_name = self.settings.MONGODB_NAME
47
+
48
  self._db = self._client[db_name]
49
  logger.debug("MongoDB client initialized for database: {}", db_name)
50
+
51
  except Exception as e:
52
  logger.error("Failed to initialize MongoDB client: {}", e)
53
  self._client = None
54
  self._db = None
55
  raise
56
+
57
  @property
58
  def client(self: Self) -> AsyncIOMotorClient:
59
  """Get the MongoDB client instance."""
60
  if not hasattr(self, '_client') or self._client is None:
61
+ logger.warning("MongoDB client not initialized. Attempting to initialize")
62
  self.connect()
63
  if not hasattr(self, '_client') or self._client is None:
64
+ raise ConnectionError("Failed to initialize MongoDB client.")
65
  return self._client
66
+
67
  @property
68
  def db(self: Self) -> Any:
69
  """Get the MongoDB database instance."""
70
  if not hasattr(self, '_db') or self._db is None:
71
+ logger.warning("MongoDB database not initialized. Attempting to initialize client")
72
  self.connect()
73
  if not hasattr(self, '_db') or self._db is None:
74
+ raise ConnectionError("Failed to initialize MongoDB database.")
75
  return self._db
76
+
77
  async def ping(self: Self) -> bool:
78
  """Check if MongoDB connection is alive."""
79
  try:
80
  # Get client to ensure we're connected
81
  client = self.client
82
+
83
  # Try a simple ping command
84
  await client.admin.command('ping')
85
+ logger.debug("MongoDB connection is active!")
86
  return True
87
  except (ConnectionFailure, ServerSelectionTimeoutError) as e:
88
  logger.error("MongoDB connection failed: {}", e)
 
90
  except Exception as e:
91
  logger.error("Unexpected error during MongoDB ping: {}", e)
92
  return False
93
+
94
  async def get_collection(self: Self, name: str) -> Any:
95
  """
96
  Get a collection by name with validation.
 
98
  """
99
  # First ensure we can connect at all
100
  if not await self.ping():
101
+ logger.error("Cannot get collection '{}' because a MongoDB connection is not available.", name)
102
+ raise ConnectionError("MongoDB connection is not available.")
103
+
104
  try:
105
  # Get all collection names to check if this one exists
106
+ logger.debug("Checking if collection '{}' exists", name)
107
  collection_names = await self.db.list_collection_names()
108
 
109
  if name not in collection_names:
110
+ logger.info("Collection '{}' does not exist. Creating it", name)
111
  # Create the collection
112
  await self.db.create_collection(name)
113
+ logger.debug("Successfully created collection: {}", name)
114
  else:
115
+ logger.debug("Collection '{}' already exists!", name)
116
+
117
  # Get and return the collection
118
  collection = self.db[name]
119
  return collection
120
  except Exception as e:
121
  logger.error("Error accessing collection '{}': {}", name, e)
122
  raise
123
+
124
  async def create_indexes(self: Self, collection_name: str) -> None:
125
  """
126
  Create a vector search index on a collection.
 
129
  collection_name: Name of the collection
130
  """
131
  collection = await self.get_collection(collection_name)
132
+
133
  try:
134
  # Create search index model using MongoDB's recommended approach
135
  search_index_model = SearchIndexModel(
 
147
  name=f"{collection_name}_vector_index",
148
  type="vectorSearch"
149
  )
150
+
151
  # Create the search index using the motor collection
152
  result = await collection.create_search_index(search_index_model)
153
+ logger.info("Vector search index '{}' created for collection {}.", result, collection_name)
154
+
155
  except Exception as e:
156
  if "command not found" in str(e).lower():
157
  logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
158
  # Create a fallback standard index on embedding field
159
  await collection.create_index("embedding")
160
+ logger.info("Created standard index on 'embedding' field as fallback.")
161
  else:
162
  logger.error("Failed to create vector index: {}", e)
163
  raise
164
+
165
  async def close(self: Self) -> None:
166
  """Close MongoDB connection."""
167
  if self._client:
168
  self._client.close()
169
+ logger.info("Closed MongoDB connection.")
170
  self._client = None
171
  self._db = None
172
 
173
+ class MongoDBResource(AsyncResource):
174
+ async def init(self: Self, settings: Settings) -> MongoDB:
175
  logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
176
  mongo_db = MongoDB(settings=settings)
177
  mongo_db.connect()
178
+ await self._test_connection(mongo_db)
 
 
 
179
  return mongo_db
180
+
181
+ async def _test_connection(self: Self, mongo_db: MongoDB) -> None:
182
  """Test MongoDB connection and log the result."""
183
  try:
184
  is_connected = await mongo_db.ping()
 
188
  logger.error("MongoDB connection test failed!")
189
  except Exception as e:
190
  logger.error("Error testing MongoDB connection: {}", e)
191
+ raise
192
+
193
  async def shutdown(self: Self, mongo_db: MongoDB) -> None:
194
  """Close MongoDB connection on shutdown."""
195
  try:
 
196
  await mongo_db.close()
197
  except Exception as e:
198
  logger.error("Error closing MongoDB connection: {}", e)
src/ctp_slack_bot/services/GOOGLE_DRIVE_README.md DELETED
@@ -1,228 +0,0 @@
1
- # Google Drive Access Module
2
-
3
- This Python module provides a simplified way to interact with Google Drive, focusing on easy access to files in nested folders using path-like syntax. It handles various Google file formats and provides comprehensive metadata for files and folders.
4
-
5
- ## Features
6
-
7
- - **Path-based folder access**: Access files using simple paths like `folder1/folder2/folder3`
8
- - **Efficient caching**: Folder IDs are cached to improve performance
9
- - **Comprehensive metadata**: Get detailed information about files and folders
10
- - **Read various file types**:
11
- - Text files
12
- - Google Docs
13
- - VTT files
14
- - **Robust folder finding**: Works with exact and partial name matching
15
- - **Simple API**: Designed for ease of use with minimal code
16
-
17
- ## Setup Instructions
18
-
19
- ### 1. Create a Google Cloud Project
20
-
21
- 1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
22
- 2. Click on the project dropdown at the top of the page and select "New Project"
23
- 3. Enter a project name and click "Create"
24
- 4. Once created, make sure your new project is selected in the dropdown
25
-
26
- ### 2. Enable the Google Drive API
27
-
28
- 1. In the Google Cloud Console, navigate to "APIs & Services" > "Library" in the left sidebar
29
- 2. Search for "Google Drive API" in the search bar
30
- 3. Click on "Google Drive API" in the results
31
- 4. Click the "Enable" button
32
-
33
- ### 3. Create OAuth Credentials
34
-
35
- 1. In the Google Cloud Console, go to "APIs & Services" > "Credentials" in the left sidebar
36
- 2. Click "Create Credentials" at the top and select "OAuth client ID"
37
- 3. If prompted to configure the OAuth consent screen:
38
- - Choose "External" user type (or "Internal" if you're in a Google Workspace organization)
39
- - Fill in the required information (App name, User support email, Developer contact email)
40
- - Click "Save and Continue"
41
- - Add the following scopes:
42
- - `.../auth/drive` (Full access to Google Drive)
43
- - Click "Save and Continue" and complete the registration
44
- 4. Return to the "Create OAuth client ID" screen
45
- 5. Select "Desktop application" as the Application type
46
- 6. Enter a name for your OAuth client (e.g., "Google Drive Access Desktop")
47
- 7. Click "Create"
48
- 8. Download the JSON file (this is your `client_secret.json`)
49
-
50
- ### 4. Project Setup
51
-
52
- 1. Setup a virtual environment and install dependencies:
53
- ```bash
54
- python -m venv venv
55
- source venv/bin/activate # On Windows: venv\Scripts\activate
56
- pip install -r requirements.txt
57
- ```
58
-
59
- 2. Place your credentials:
60
- - Create a `credentials` directory in your project root
61
- - Move the downloaded OAuth client JSON file to the `credentials` directory
62
- - Rename it to `client_secret.json`
63
-
64
- ### 5. Authentication Process
65
-
66
- When you run the application for the first time:
67
- 1. A browser window will open automatically
68
- 2. You'll be asked to sign in to your Google account
69
- 3. You'll see a consent screen asking for permission to access your Google Drive
70
- 4. After granting permission, the browser will display a success message
71
- 5. The application will save a token file (`token.pickle`) in the credentials directory for future use
72
-
73
- ## Usage Guide
74
-
75
- The `EasyGoogleDrive` class provides several methods to interact with Google Drive. Here's how to use the core functionality:
76
-
77
- ### Basic Usage
78
-
79
- ```python
80
- from google_drive_access import EasyGoogleDrive
81
-
82
- # Initialize the Google Drive client
83
- drive = EasyGoogleDrive()
84
-
85
- # Example folder path - replace with your actual folder path
86
- folder_path = "Spring-2025-BAI"
87
- subfolder_path = "Spring-2025-BAI/transcripts"
88
- ```
89
-
90
- ### Listing Folders
91
-
92
- ```python
93
- # List folders in a directory
94
- folders = drive.get_folders_in_folder(folder_path)
95
-
96
- # Access folder properties
97
- for folder in folders:
98
- print(f"Folder: {folder['name']}")
99
- print(f" Created: {folder.get('createdTimeFormatted', 'Unknown')}")
100
- print(f" Modified: {folder.get('modifiedTimeFormatted', 'Unknown')}")
101
- ```
102
-
103
- ### Listing Files
104
-
105
- ```python
106
- # List files in a directory
107
- files = drive.get_files_in_folder(subfolder_path)
108
-
109
- # Access file properties
110
- for file in files:
111
- print(f"File: {file['name']}")
112
- print(f" Type: {file.get('fileType', 'Unknown')}")
113
- print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}")
114
- print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}")
115
- print(f" Size: {file.get('sizeFormatted', 'Unknown')}")
116
- ```
117
-
118
- ### Getting a Specific File
119
-
120
- ```python
121
- # Get a specific file with metadata
122
- file = drive.get_file("example.txt", subfolder_path, include_metadata=True)
123
-
124
- if file:
125
- print(f"File: {file['name']}")
126
- print(f" Type: {file.get('fileType', 'Unknown')}")
127
- print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}")
128
- print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}")
129
- print(f" Size: {file.get('sizeFormatted', 'Unknown')}")
130
- ```
131
-
132
- ### Getting All Items in a Folder
133
-
134
- ```python
135
- # Get all items (files and folders) in a folder
136
- all_items = drive.get_all_files_in_folder(folder_path)
137
-
138
- # Access item properties
139
- for item in all_items:
140
- item_type = "Folder" if item.get('mimeType') == drive.MIME_TYPES['folder'] else item.get('fileType', 'Unknown')
141
- print(f"Item: {item['name']} ({item_type})")
142
- ```
143
-
144
- ### Checking if a File Exists
145
-
146
- ```python
147
- # Check if a file exists
148
- exists = drive.file_exists("example.txt", subfolder_path)
149
- print(f"File exists: {exists}")
150
- ```
151
-
152
- ### Getting File Modified Time
153
-
154
- ```python
155
- # Get file modified time
156
- modified_time = drive.get_file_modified_time("example.txt", subfolder_path)
157
- if modified_time:
158
- print(f"Last modified: {modified_time}")
159
- ```
160
-
161
- ### Reading File Content
162
-
163
- ```python
164
- # Get file with content
165
- file_with_content = drive.get_file("example.txt", subfolder_path, include_content=True)
166
-
167
- if file_with_content and 'file_content' in file_with_content:
168
- content = file_with_content['file_content']
169
- if content:
170
- print(f"Content: {content[:100]}...") # Print first 100 characters
171
- ```
172
-
173
- ## Complete Example
174
-
175
- For a complete example of how to use the `EasyGoogleDrive` class, see the `basic_usage.py` file included in this package. This file demonstrates all the core functionality with practical examples.
176
-
177
- ## Key Concepts
178
-
179
- ### Path-based Folder Access
180
-
181
- The module uses a simple path-like syntax to access folders:
182
-
183
- ```python
184
- # Access a deeply nested folder
185
- folder_path = "folder1/folder2/folder3"
186
- files = drive.get_files_in_folder(folder_path)
187
- ```
188
-
189
- This makes it much easier to work with nested folder structures compared to using folder IDs.
190
-
191
- ### Metadata Fields
192
-
193
- The module provides comprehensive metadata for files and folders, including:
194
-
195
- - **Creation and modification dates**: Both as datetime objects and formatted strings
196
- - **File size**: Both in bytes and human-readable format (KB, MB, GB)
197
- - **File type**: Simplified type based on MIME type
198
- - **Owner information**: Names and email addresses of file owners
199
- - **Sharing status**: Whether the file is shared
200
- - **Web links**: Direct links to view the file in a browser
201
-
202
- ## Error Handling
203
-
204
- The module includes comprehensive error handling:
205
-
206
- - **Authentication errors**: Clear messages when credentials are missing or invalid
207
- - **Folder not found**: Helpful messages when a folder in the path cannot be found
208
- - **File not found**: Attempts partial name matching before giving up
209
- - **Decoding errors**: Handles issues with file content encoding
210
-
211
- ## Dependencies
212
-
213
- - **Required**:
214
- - google-auth-oauthlib
215
- - google-auth-httplib2
216
- - google-api-python-client
217
- - python-dateutil
218
-
219
- ## Security Notes
220
-
221
- - Never commit your `client_secret.json` or token files to version control
222
- - Add `credentials/` to your `.gitignore` file
223
- - Keep your credentials secure and don't share them
224
- - For production applications, consider using service accounts with the minimum required permissions
225
-
226
- ## Contributing
227
-
228
- Feel free to contribute to this project by submitting issues or pull requests.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/services/google_drive_access.py DELETED
@@ -1,623 +0,0 @@
1
- """
2
- Easy Google Drive Access
3
-
4
- A simplified module for accessing Google Drive files in nested folders.
5
- Designed to make it as easy as possible to access files using path-like syntax.
6
- """
7
-
8
- import os
9
- import pickle
10
- import io
11
- import datetime
12
- from typing import List, Dict, Optional, Any, Union
13
-
14
- from google.oauth2.credentials import Credentials
15
- from google_auth_oauthlib.flow import InstalledAppFlow
16
- from google.auth.transport.requests import Request
17
- from googleapiclient.discovery import build
18
- from googleapiclient.http import MediaIoBaseDownload
19
- from googleapiclient.errors import HttpError
20
-
21
-
22
- class EasyGoogleDrive:
23
- """
24
- Simplified Google Drive access focused on accessing files in nested folders.
25
- """
26
-
27
- # Define the scopes needed for the application
28
- SCOPES = ['https://www.googleapis.com/auth/drive']
29
-
30
- # Define common MIME types
31
- MIME_TYPES = {
32
- 'folder': 'application/vnd.google-apps.folder',
33
- 'document': 'application/vnd.google-apps.document',
34
- 'spreadsheet': 'application/vnd.google-apps.spreadsheet',
35
- 'text': 'text/plain',
36
- 'pdf': 'application/pdf',
37
- 'image': 'image/jpeg',
38
- 'video': 'video/mp4',
39
- 'audio': 'audio/mpeg',
40
- }
41
-
42
- # Define metadata fields to retrieve
43
- FILE_FIELDS = 'id, name, mimeType, createdTime, modifiedTime, size, description, webViewLink, thumbnailLink, owners, shared, sharingUser, lastModifyingUser, capabilities, permissions'
44
- FOLDER_FIELDS = 'id, name, createdTime, modifiedTime, description, webViewLink, owners, shared, sharingUser, lastModifyingUser, capabilities, permissions'
45
-
46
- def __init__(self, credentials_dir: str = 'credentials'):
47
- """Initialize the Google Drive access."""
48
- self.credentials_dir = credentials_dir
49
- self.credentials_path = os.path.join(credentials_dir, 'client_secret.json')
50
- self.token_path = os.path.join(credentials_dir, 'token.pickle')
51
-
52
- # Ensure credentials directory exists
53
- os.makedirs(credentials_dir, exist_ok=True)
54
-
55
- # Initialize the Drive API service
56
- self.service = build('drive', 'v3', credentials=self._get_credentials())
57
-
58
- # Cache for folder IDs to avoid repeated lookups
59
- self.folder_id_cache = {}
60
-
61
- def _get_credentials(self) -> Credentials:
62
- """Get and refresh Google Drive API credentials."""
63
- creds = None
64
-
65
- # Load existing token if it exists
66
- if os.path.exists(self.token_path):
67
- with open(self.token_path, 'rb') as token:
68
- creds = pickle.load(token)
69
-
70
- # If credentials need refresh or don't exist
71
- if not creds or not creds.valid:
72
- if creds and creds.expired and creds.refresh_token:
73
- creds.refresh(Request())
74
- else:
75
- if not os.path.exists(self.credentials_path):
76
- raise FileNotFoundError(
77
- f"Client secrets file not found at {self.credentials_path}. "
78
- "Please follow the setup instructions in the README."
79
- )
80
-
81
- flow = InstalledAppFlow.from_client_secrets_file(
82
- self.credentials_path, self.SCOPES)
83
- creds = flow.run_local_server(port=0)
84
-
85
- # Save the credentials for future use
86
- with open(self.token_path, 'wb') as token:
87
- pickle.dump(creds, token)
88
-
89
- return creds
90
-
91
- def _format_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
92
- """
93
- Format metadata for easier reading and usage.
94
-
95
- Args:
96
- metadata: Raw metadata from Google Drive API
97
-
98
- Returns:
99
- Formatted metadata dictionary
100
- """
101
- formatted = metadata.copy()
102
-
103
- # Format dates
104
- for date_field in ['createdTime', 'modifiedTime']:
105
- if date_field in formatted:
106
- try:
107
- # Convert ISO 8601 string to datetime object
108
- dt = datetime.datetime.fromisoformat(formatted[date_field].replace('Z', '+00:00'))
109
- formatted[date_field] = dt
110
- # Add a formatted date string for easier reading
111
- formatted[f"{date_field}Formatted"] = dt.strftime('%Y-%m-%d %H:%M:%S')
112
- except (ValueError, AttributeError):
113
- pass
114
-
115
- # Format size
116
- if 'size' in formatted and formatted['size']:
117
- try:
118
- size_bytes = int(formatted['size'])
119
- # Add human-readable size
120
- if size_bytes < 1024:
121
- formatted['sizeFormatted'] = f"{size_bytes} B"
122
- elif size_bytes < 1024 * 1024:
123
- formatted['sizeFormatted'] = f"{size_bytes / 1024:.1f} KB"
124
- elif size_bytes < 1024 * 1024 * 1024:
125
- formatted['sizeFormatted'] = f"{size_bytes / (1024 * 1024):.1f} MB"
126
- else:
127
- formatted['sizeFormatted'] = f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
128
- except (ValueError, TypeError):
129
- pass
130
-
131
- # Extract owner names
132
- if 'owners' in formatted and formatted['owners']:
133
- formatted['ownerNames'] = [owner.get('displayName', 'Unknown') for owner in formatted['owners']]
134
- formatted['ownerEmails'] = [owner.get('emailAddress', 'Unknown') for owner in formatted['owners']]
135
-
136
- # Add file type description
137
- if 'mimeType' in formatted:
138
- mime_type = formatted['mimeType']
139
- for key, value in self.MIME_TYPES.items():
140
- if mime_type == value:
141
- formatted['fileType'] = key
142
- break
143
- else:
144
- # If not found in our predefined types
145
- formatted['fileType'] = mime_type.split('/')[-1]
146
-
147
- return formatted
148
-
149
- def get_folder_id(self, folder_path: str) -> Optional[str]:
150
- """
151
- Get a folder ID from a path like 'folder1/folder2/folder3'.
152
-
153
- Args:
154
- folder_path: Path to the folder, using '/' as separator
155
-
156
- Returns:
157
- The folder ID if found, None otherwise
158
- """
159
- # Check if we've already resolved this path
160
- if folder_path in self.folder_id_cache:
161
- return self.folder_id_cache[folder_path]
162
-
163
- # If it looks like an ID already, return it
164
- if len(folder_path) > 25 and '/' not in folder_path:
165
- return folder_path
166
-
167
- # Split the path into components
168
- parts = folder_path.split('/')
169
-
170
- # Start from the root
171
- current_folder_id = None
172
- current_path = ""
173
-
174
- # Traverse the path one folder at a time
175
- for i, folder_name in enumerate(parts):
176
- if not folder_name: # Skip empty parts
177
- continue
178
-
179
- # Update the current path for caching
180
- if current_path:
181
- current_path += f"/{folder_name}"
182
- else:
183
- current_path = folder_name
184
-
185
- # Check if we've already resolved this subpath
186
- if current_path in self.folder_id_cache:
187
- current_folder_id = self.folder_id_cache[current_path]
188
- continue
189
-
190
- # Search for the folder by name
191
- query = f"mimeType='{self.MIME_TYPES['folder']}' and name='{folder_name}'"
192
- if current_folder_id:
193
- query += f" and '{current_folder_id}' in parents"
194
-
195
- try:
196
- results = self.service.files().list(
197
- q=query,
198
- spaces='drive',
199
- fields='files(id, name)',
200
- pageSize=10
201
- ).execute()
202
-
203
- files = results.get('files', [])
204
- if not files:
205
- # Try a more flexible search if exact match fails
206
- query = query.replace(f"name='{folder_name}'", f"name contains '{folder_name}'")
207
- results = self.service.files().list(
208
- q=query,
209
- spaces='drive',
210
- fields='files(id, name)',
211
- pageSize=10
212
- ).execute()
213
-
214
- files = results.get('files', [])
215
- if not files:
216
- print(f"Could not find folder '{folder_name}' in path '{folder_path}'")
217
- return None
218
-
219
- # Use the first match
220
- current_folder_id = files[0]['id']
221
-
222
- # Cache this result
223
- self.folder_id_cache[current_path] = current_folder_id
224
-
225
- except HttpError as error:
226
- print(f"Error finding folder: {error}")
227
- return None
228
-
229
- return current_folder_id
230
-
231
- def get_folders_in_folder(self, folder_path: str, include_metadata: bool = True) -> List[Dict[str, Any]]:
232
- """
233
- Get all subfolders in a folder specified by path.
234
-
235
- Args:
236
- folder_path: Path to the folder, using '/' as separator
237
- include_metadata: Whether to include detailed metadata (default: True)
238
-
239
- Returns:
240
- List of folder metadata dictionaries
241
- """
242
- # Get the folder ID
243
- folder_id = self.get_folder_id(folder_path)
244
- if not folder_id:
245
- print(f"Could not find folder: '{folder_path}'")
246
- return []
247
-
248
- # List all folders in this folder
249
- query = f"'{folder_id}' in parents and mimeType = '{self.MIME_TYPES['folder']}'"
250
-
251
- try:
252
- results = self.service.files().list(
253
- q=query,
254
- spaces='drive',
255
- fields=f'files({self.FOLDER_FIELDS})' if include_metadata else 'files(id, name)',
256
- pageSize=1000
257
- ).execute()
258
-
259
- folders = results.get('files', [])
260
-
261
- # Format metadata if requested
262
- if include_metadata and folders:
263
- folders = [self._format_metadata(folder) for folder in folders]
264
-
265
- if folders:
266
- print(f"Found {len(folders)} subfolders in '{folder_path}':")
267
- for folder in folders:
268
- if include_metadata and 'createdTimeFormatted' in folder:
269
- print(f" - {folder['name']} (Created: {folder['createdTimeFormatted']})")
270
- else:
271
- print(f" - {folder['name']}")
272
- else:
273
- print(f"No subfolders found in '{folder_path}'")
274
-
275
- return folders
276
-
277
- except HttpError as error:
278
- print(f"Error listing folders: {error}")
279
- return []
280
-
281
- def get_files_in_folder(self, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> List[Dict[str, Any]]:
282
- """
283
- Get all files in a folder specified by path.
284
-
285
- Args:
286
- folder_path: Path to the folder, using '/' as separator
287
- include_metadata: Whether to include detailed metadata (default: True)
288
- include_content: Whether to include file content (default: False)
289
-
290
- Returns:
291
- List of file metadata dictionaries, optionally including file content
292
- """
293
- # Get the folder ID
294
- folder_id = self.get_folder_id(folder_path)
295
- if not folder_id:
296
- print(f"Could not find folder: '{folder_path}'")
297
- return []
298
-
299
- # List all non-folder files in this folder
300
- query = f"'{folder_id}' in parents and mimeType != '{self.MIME_TYPES['folder']}'"
301
-
302
- try:
303
- results = self.service.files().list(
304
- q=query,
305
- spaces='drive',
306
- fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
307
- pageSize=1000
308
- ).execute()
309
-
310
- files = results.get('files', [])
311
-
312
- # Format metadata if requested
313
- if include_metadata and files:
314
- files = [self._format_metadata(file) for file in files]
315
-
316
- # Add file content if requested
317
- if include_content and files:
318
- for file in files:
319
- try:
320
- # Skip files that are likely not text-based
321
- if any(ext in file['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']):
322
- print(f"Skipping content for non-text file: {file['name']}")
323
- file['file_content'] = None
324
- continue
325
-
326
- # Read the file content
327
- content = self.read_file_from_object(file)
328
- file['file_content'] = content
329
-
330
- if content is not None:
331
- print(f"Successfully read content for: {file['name']} ({len(content)} characters)")
332
- else:
333
- print(f"Unable to read content for: {file['name']}")
334
- except Exception as e:
335
- print(f"Error reading content for {file['name']}: {e}")
336
- file['file_content'] = None
337
-
338
- if files:
339
- print(f"Found {len(files)} files in '{folder_path}':")
340
- for file in files:
341
- if include_metadata and 'createdTimeFormatted' in file:
342
- print(f" - {file['name']} ({file.get('fileType', 'Unknown')}, Created: {file['createdTimeFormatted']})")
343
- else:
344
- print(f" - {file['name']} ({file.get('mimeType', 'Unknown')})")
345
- else:
346
- print(f"No files found in '{folder_path}'")
347
-
348
- return files
349
-
350
- except HttpError as error:
351
- print(f"Error listing files: {error}")
352
- return []
353
-
354
- def get_file(self, file_name: str, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> Optional[Dict[str, Any]]:
355
- """
356
- Get a specific file by name from a folder.
357
-
358
- Args:
359
- file_name: Name of the file to get
360
- folder_path: Path to the folder containing the file
361
- include_metadata: Whether to include detailed metadata (default: True)
362
- include_content: Whether to include file content (default: False)
363
-
364
- Returns:
365
- File metadata dictionary, optionally including content, or None if file not found
366
- """
367
- # Get the folder ID
368
- folder_id = self.get_folder_id(folder_path)
369
- if not folder_id:
370
- print(f"Could not find folder: '{folder_path}'")
371
- return None
372
-
373
- # Find the file by name in this folder
374
- query = f"'{folder_id}' in parents and name = '{file_name}'"
375
-
376
- try:
377
- results = self.service.files().list(
378
- q=query,
379
- spaces='drive',
380
- fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
381
- pageSize=1
382
- ).execute()
383
-
384
- files = results.get('files', [])
385
- if not files:
386
- # Try a more flexible search
387
- query = query.replace(f"name = '{file_name}'", f"name contains '{file_name}'")
388
- results = self.service.files().list(
389
- q=query,
390
- spaces='drive',
391
- fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
392
- pageSize=10
393
- ).execute()
394
-
395
- files = results.get('files', [])
396
- if not files:
397
- print(f"Could not find file '{file_name}' in '{folder_path}'")
398
- return None
399
-
400
- # Use the first match
401
- file = files[0]
402
-
403
- # Format metadata if requested
404
- if include_metadata:
405
- file = self._format_metadata(file)
406
-
407
- # Add file content if requested
408
- if include_content:
409
- try:
410
- # Skip files that are likely not text-based
411
- if any(ext in file['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']):
412
- print(f"Skipping content for non-text file: {file['name']}")
413
- file['file_content'] = None
414
- else:
415
- # Read the file content
416
- content = self.read_file_from_object(file)
417
- file['file_content'] = content
418
-
419
- if content is not None:
420
- print(f"Successfully read content for: {file['name']} ({len(content)} characters)")
421
- else:
422
- print(f"Unable to read content for: {file['name']}")
423
- except Exception as e:
424
- print(f"Error reading content for {file['name']}: {e}")
425
- file['file_content'] = None
426
-
427
- print(f"Found file: {file['name']}")
428
- return file
429
-
430
- except HttpError as error:
431
- print(f"Error getting file: {error}")
432
- return None
433
-
434
- def get_all_files_in_folder(self, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> List[Dict[str, Any]]:
435
- """
436
- Get all items (files and folders) in a folder specified by path.
437
-
438
- Args:
439
- folder_path: Path to the folder, using '/' as separator
440
- include_metadata: Whether to include detailed metadata (default: True)
441
- include_content: Whether to include file content (default: False)
442
-
443
- Returns:
444
- List of file and folder metadata dictionaries, optionally including file content
445
- """
446
- # Get the folder ID
447
- folder_id = self.get_folder_id(folder_path)
448
- if not folder_id:
449
- print(f"Could not find folder: '{folder_path}'")
450
- return []
451
-
452
- # List all items in this folder
453
- query = f"'{folder_id}' in parents"
454
-
455
- try:
456
- results = self.service.files().list(
457
- q=query,
458
- spaces='drive',
459
- fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
460
- pageSize=1000
461
- ).execute()
462
-
463
- items = results.get('files', [])
464
-
465
- # Format metadata if requested
466
- if include_metadata and items:
467
- items = [self._format_metadata(item) for item in items]
468
-
469
- # Add file content if requested
470
- if include_content and items:
471
- for item in items:
472
- # Skip folders and non-text files
473
- if item.get('mimeType') == self.MIME_TYPES['folder'] or any(ext in item['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']):
474
- item['file_content'] = None
475
- continue
476
-
477
- try:
478
- # Read the file content
479
- content = self.read_file_from_object(item)
480
- item['file_content'] = content
481
-
482
- if content is not None:
483
- print(f"Successfully read content for: {item['name']} ({len(content)} characters)")
484
- else:
485
- print(f"Unable to read content for: {item['name']}")
486
- except Exception as e:
487
- print(f"Error reading content for {item['name']}: {e}")
488
- item['file_content'] = None
489
-
490
- if items:
491
- print(f"Found {len(items)} items in '{folder_path}':")
492
- for item in items:
493
- if include_metadata and 'createdTimeFormatted' in item:
494
- item_type = 'Folder' if item.get('mimeType') == self.MIME_TYPES['folder'] else item.get('fileType', 'Unknown')
495
- print(f" - {item['name']} ({item_type}, Created: {item['createdTimeFormatted']})")
496
- else:
497
- item_type = 'Folder' if item.get('mimeType') == self.MIME_TYPES['folder'] else item.get('mimeType', 'Unknown')
498
- print(f" - {item['name']} ({item_type})")
499
- else:
500
- print(f"No items found in '{folder_path}'")
501
-
502
- return items
503
-
504
- except HttpError as error:
505
- print(f"Error listing items: {error}")
506
- return []
507
-
508
- def file_exists(self, file_name: str, folder_path: str) -> bool:
509
- """
510
- Check if a file exists at the specified path in Google Drive.
511
-
512
- Args:
513
- file_name: Name of the file to check
514
- folder_path: Path to the folder containing the file
515
-
516
- Returns:
517
- True if the file exists, False otherwise
518
- """
519
- # Get the folder ID
520
- folder_id = self.get_folder_id(folder_path)
521
- if not folder_id:
522
- print(f"Could not find folder: '{folder_path}'")
523
- return False
524
-
525
- # Check if the file exists in this folder
526
- query = f"'{folder_id}' in parents and name = '{file_name}'"
527
-
528
- try:
529
- results = self.service.files().list(
530
- q=query,
531
- spaces='drive',
532
- fields='files(id, name)',
533
- pageSize=1
534
- ).execute()
535
-
536
- files = results.get('files', [])
537
- if not files:
538
- # Try a more flexible search
539
- query = query.replace(f"name = '{file_name}'", f"name contains '{file_name}'")
540
- results = self.service.files().list(
541
- q=query,
542
- spaces='drive',
543
- fields='files(id, name)',
544
- pageSize=10
545
- ).execute()
546
-
547
- files = results.get('files', [])
548
- if not files:
549
- print(f"File '{file_name}' does not exist in '{folder_path}'")
550
- return False
551
-
552
- # File exists
553
- print(f"File '{file_name}' exists in '{folder_path}'")
554
- return True
555
-
556
- except HttpError as error:
557
- print(f"Error checking if file exists: {error}")
558
- return False
559
-
560
- def get_file_modified_time(self, file_name: str, folder_path: str) -> Optional[datetime.datetime]:
561
- """
562
- Get the last modified time of a file.
563
-
564
- Args:
565
- file_name: Name of the file
566
- folder_path: Path to the folder containing the file
567
-
568
- Returns:
569
- The last modified time as a datetime object, or None if the file doesn't exist
570
- """
571
- # Get the file metadata
572
- file = self.get_file(file_name, folder_path, include_metadata=True)
573
- if not file:
574
- return None
575
-
576
- # Return the modified time
577
- return file.get('modifiedTime')
578
-
579
- def read_file_from_object(self, file_object: Dict[str, Any]) -> Optional[str]:
580
- """
581
- Read the contents of a file using a file object.
582
-
583
- Args:
584
- file_object: A Google file object with at least 'id' and 'mimeType' fields
585
-
586
- Returns:
587
- The file contents as a string, or None if the file couldn't be read
588
- """
589
- file_id = file_object.get('id')
590
- mime_type = file_object.get('mimeType')
591
-
592
- if not file_id or not mime_type:
593
- print("File object is missing 'id' or 'mimeType' fields.")
594
- return None
595
-
596
- try:
597
- # Read the file based on its type
598
- if mime_type == self.MIME_TYPES['document']:
599
- # Export Google Doc as plain text
600
- response = self.service.files().export(
601
- fileId=file_id,
602
- mimeType='text/plain'
603
- ).execute()
604
- return response.decode('utf-8')
605
-
606
- else:
607
- # Download regular files
608
- request = self.service.files().get_media(fileId=file_id)
609
- fh = io.BytesIO()
610
- downloader = MediaIoBaseDownload(fh, request)
611
-
612
- done = False
613
- while not done:
614
- _, done = downloader.next_chunk()
615
-
616
- return fh.getvalue().decode('utf-8')
617
-
618
- except HttpError as error:
619
- print(f"Error reading file: {error}")
620
- return None
621
- except Exception as e:
622
- print(f"Error decoding file content: {e}")
623
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/services/google_drive_basic_usage.py DELETED
@@ -1,178 +0,0 @@
1
- """
2
- Basic Usage Examples for EasyGoogleDrive
3
-
4
- This file demonstrates how to use the EasyGoogleDrive class to interact with Google Drive.
5
- It provides examples of the main functionality without printing all results to keep the output clean.
6
- """
7
-
8
- from google_drive_access import EasyGoogleDrive
9
- import datetime
10
-
11
- def main():
12
- """
13
- Main function demonstrating the basic usage of EasyGoogleDrive.
14
- """
15
- # Initialize the Google Drive client
16
- # This will prompt for authentication the first time it's run
17
- drive = EasyGoogleDrive()
18
-
19
- # Example folder path - replace with your actual folder path
20
- folder_path = "Spring-2025-BAI"
21
- subfolder_path = "Spring-2025-BAI/transcripts"
22
-
23
- print("=== Basic Usage Examples for EasyGoogleDrive ===\n")
24
-
25
- # Example 1: List folders in a directory
26
- print("Example 1: Listing folders in a directory")
27
- print("----------------------------------------")
28
- folders = drive.get_folders_in_folder(folder_path)
29
-
30
- # Print only the first 3 folders (if any exist)
31
- if folders:
32
- print(f"Found {len(folders)} folders. Showing first 3:")
33
- for i, folder in enumerate(folders[:3]):
34
- print(f" - {folder['name']} (Created: {folder.get('createdTimeFormatted', 'Unknown')})")
35
- if len(folders) > 3:
36
- print(f" ... and {len(folders) - 3} more folders")
37
- else:
38
- print("No folders found.")
39
- print()
40
-
41
- # Example 2: List files in a directory
42
- print("Example 2: Listing files in a directory")
43
- print("--------------------------------------")
44
- files = drive.get_files_in_folder(subfolder_path)
45
-
46
- # Print only the first 3 files (if any exist)
47
- if files:
48
- print(f"Found {len(files)} files. Showing first 3:")
49
- for i, file in enumerate(files[:3]):
50
- file_type = file.get('fileType', 'Unknown')
51
- created_time = file.get('createdTimeFormatted', 'Unknown')
52
- print(f" - {file['name']} ({file_type}, Created: {created_time})")
53
- if len(files) > 3:
54
- print(f" ... and {len(files) - 3} more files")
55
- else:
56
- print("No files found.")
57
- print()
58
-
59
- # Example 3: Get a specific file
60
- print("Example 3: Getting a specific file")
61
- print("--------------------------------")
62
- # Use the first file found in the previous example, or a default if none were found
63
- file_name = files[-1]['name'] if files and len(files) > 0 else "example.txt"
64
-
65
- file = drive.get_file(file_name, subfolder_path, include_metadata=True)
66
- if file:
67
- print(f"File found: {file['name']}")
68
- print(f" Type: {file.get('fileType', 'Unknown')}")
69
- print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}")
70
- print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}")
71
- print(f" Size: {file.get('sizeFormatted', 'Unknown')}")
72
- else:
73
- print(f"File '{file_name}' not found.")
74
- print()
75
-
76
- # Example 4: Get all items in a folder (files and folders)
77
- print("Example 4: Getting all items in a folder")
78
- print("--------------------------------------")
79
- all_items = drive.get_all_files_in_folder(folder_path)
80
-
81
- # Print only the first 3 items (if any exist)
82
- if all_items:
83
- print(f"Found {len(all_items)} items. Showing first 3:")
84
- for i, item in enumerate(all_items[:3]):
85
- item_type = "Folder" if item.get('mimeType') == drive.MIME_TYPES['folder'] else item.get('fileType', 'Unknown')
86
- created_time = item.get('createdTimeFormatted', 'Unknown')
87
- print(f" - {item['name']} ({item_type}, Created: {created_time})")
88
- if len(all_items) > 3:
89
- print(f" ... and {len(all_items) - 3} more items")
90
- else:
91
- print("No items found.")
92
- print()
93
-
94
- # Example 5: Check if a file exists
95
- print("Example 5: Checking if a file exists")
96
- print("----------------------------------")
97
- # Use the same file name from Example 3
98
- file_to_check = file_name
99
-
100
- exists = drive.file_exists(file_to_check, subfolder_path)
101
- print(f"File '{file_to_check}' {'exists' if exists else 'does not exist'} in '{subfolder_path}'.")
102
- print()
103
-
104
- # Example 6: Get file modified time
105
- print("Example 6: Getting file modified time")
106
- print("-----------------------------------")
107
- # Use the same file name from Example 3
108
- file_to_check_time = file_name
109
-
110
- modified_time = drive.get_file_modified_time(file_to_check_time, subfolder_path)
111
- if modified_time:
112
- print(f"File '{file_to_check_time}' was last modified on: {modified_time}")
113
- else:
114
- print(f"Could not get modified time for '{file_to_check_time}'.")
115
- print()
116
-
117
- # Example 7: Get file with content
118
- print("Example 7: Getting file with content")
119
- print("----------------------------------")
120
- # Use the same file name from Example 3
121
- file_with_content = file_name
122
-
123
- file_with_content_obj = drive.get_file(file_with_content, subfolder_path, include_content=True)
124
- if file_with_content_obj and 'file_content' in file_with_content_obj:
125
- content = file_with_content_obj['file_content']
126
- if content:
127
- print(f"File '{file_with_content}' content (first 100 chars):")
128
- print(f" {content[:100]}...")
129
- else:
130
- print(f"File '{file_with_content}' has no content or content could not be read.")
131
- else:
132
- print(f"File '{file_with_content}' not found or content could not be retrieved.")
133
- print()
134
-
135
- # Example 8: Get contents of all files in a folder
136
- print("Example 8: Getting contents of all files in a folder")
137
- print("------------------------------------------------")
138
- # Get all files with content
139
- all_files_with_content = drive.get_files_in_folder(subfolder_path, include_content=True)
140
-
141
- if all_files_with_content:
142
- print(f"Found {len(all_files_with_content)} files. Showing content preview for first 3:")
143
- for i, file in enumerate(all_files_with_content[:3]):
144
- print(f" File: {file['name']}")
145
- if 'file_content' in file and file['file_content']:
146
- content = file['file_content']
147
- print(f" Content preview: {content[:50]}...")
148
- else:
149
- print(f" No content available or file is not text-based.")
150
-
151
- if len(all_files_with_content) > 3:
152
- print(f" ... and {len(all_files_with_content) - 3} more files with content")
153
- else:
154
- print("No files found or no content could be retrieved.")
155
- print()
156
-
157
- # Example 9: Get content from a specific file using read_file_from_object
158
- print("Example 9: Getting content from a specific file using read_file_from_object")
159
- print("------------------------------------------------------------------------")
160
- # Get a file object first
161
- file_obj = drive.get_file(file_name, subfolder_path)
162
-
163
- if file_obj:
164
- # Read the content directly from the file object
165
- content = drive.read_file_from_object(file_obj)
166
- if content:
167
- print(f"File '{file_obj['name']}' content (first 100 chars):")
168
- print(f" {content[:100]}...")
169
- else:
170
- print(f"File '{file_obj['name']}' has no content or content could not be read.")
171
- else:
172
- print(f"File '{file_name}' not found.")
173
- print()
174
-
175
- print("=== End of Examples ===")
176
-
177
- if __name__ == "__main__":
178
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctp_slack_bot/services/schedule_service.py CHANGED
@@ -2,6 +2,7 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler
2
  from apscheduler.triggers.cron import CronTrigger
3
  from asyncio import create_task, iscoroutinefunction, to_thread
4
  from datetime import datetime
 
5
  from loguru import logger
6
  from pydantic import BaseModel, PrivateAttr
7
  from pytz import timezone
@@ -47,11 +48,21 @@ class ScheduleService(BaseModel):
47
 
48
  def start(self: Self) -> None:
49
  self._scheduler.start()
50
- logger.info("Started scheduler.")
51
 
52
  def stop(self: Self) -> None:
53
  if self._scheduler.running:
54
- self._scheduler.shutdown(wait=False)
55
- logger.info("Shut down scheduler.")
56
  else:
57
  logger.debug("The scheduler is not running. There is no scheduler to shut down.")
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from apscheduler.triggers.cron import CronTrigger
3
  from asyncio import create_task, iscoroutinefunction, to_thread
4
  from datetime import datetime
5
+ from dependency_injector.resources import Resource
6
  from loguru import logger
7
  from pydantic import BaseModel, PrivateAttr
8
  from pytz import timezone
 
48
 
49
  def start(self: Self) -> None:
50
  self._scheduler.start()
 
51
 
52
  def stop(self: Self) -> None:
53
  if self._scheduler.running:
54
+ self._scheduler.shutdown()
 
55
  else:
56
  logger.debug("The scheduler is not running. There is no scheduler to shut down.")
57
+
58
+ class ScheduleServiceResource(Resource):
59
+ def init(self: Self, settings: Settings) -> ScheduleService:
60
+ logger.info("Starting scheduler…")
61
+ schedule_service = ScheduleService(settings=settings)
62
+ schedule_service.start()
63
+ return schedule_service
64
+
65
+ def shutdown(self: Self, schedule_service: ScheduleService) -> None:
66
+ """Stop scheduler on shutdown."""
67
+ schedule_service.stop()
68
+ logger.info("Stopped scheduler.")
src/ctp_slack_bot/utils/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ from ctp_slack_bot.utils.secret_stripper import sanitize_mongo_db_uri
src/ctp_slack_bot/utils/secret_stripper.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from urllib.parse import urlparse, urlunparse
2
+
3
+ def sanitize_mongo_db_uri(uri: str) -> str:
4
+ parts = urlparse(uri)
5
+ sanitized_netloc = ":".join(filter(None, (parts.hostname, parts.port)))
6
+ return urlunparse((parts.scheme, sanitized_netloc, parts.path, parts.params, parts.query, parts.fragment))