Spaces:
Runtime error
Runtime error
Clean up and restore ability to shut down gracefully
Browse files- pyproject.toml +2 -3
- src/ctp_slack_bot/app.py +31 -12
- src/ctp_slack_bot/containers.py +2 -2
- src/ctp_slack_bot/core/logging.py +4 -2
- src/ctp_slack_bot/core/response_rendering.py +0 -13
- src/ctp_slack_bot/db/mongo_db.py +44 -49
- src/ctp_slack_bot/services/GOOGLE_DRIVE_README.md +0 -228
- src/ctp_slack_bot/services/google_drive_access.py +0 -623
- src/ctp_slack_bot/services/google_drive_basic_usage.py +0 -178
- src/ctp_slack_bot/services/schedule_service.py +14 -3
- src/ctp_slack_bot/utils/__init__.py +1 -0
- src/ctp_slack_bot/utils/secret_stripper.py +6 -0
pyproject.toml
CHANGED
|
@@ -24,7 +24,6 @@ dependencies = [
|
|
| 24 |
"more-itertools>=10.6.0",
|
| 25 |
"python-dotenv>=1.1.0",
|
| 26 |
"loguru>=0.7.3",
|
| 27 |
-
"fastapi>=0.115.12",
|
| 28 |
"dependency-injector>=4.46.0",
|
| 29 |
"pytz>=2025.2",
|
| 30 |
"apscheduler>=3.11.0",
|
|
@@ -36,7 +35,7 @@ dependencies = [
|
|
| 36 |
"slack_bolt>=1.23.0",
|
| 37 |
"pymongo>=4.11.3 ",
|
| 38 |
"motor>=3.7.0",
|
| 39 |
-
"openai>=1.70.0"
|
| 40 |
"google-api-python-client>=2.167.0",
|
| 41 |
"google-auth>=2.39.0",
|
| 42 |
"google-auth-oauthlib>=1.2.1"
|
|
@@ -50,7 +49,7 @@ dev = [
|
|
| 50 |
"types-pytz>=2025.2",
|
| 51 |
"black>=25.1.0",
|
| 52 |
"isort>=6.0.1",
|
| 53 |
-
"ruff>=0.11.4"
|
| 54 |
]
|
| 55 |
|
| 56 |
[project.urls]
|
|
|
|
| 24 |
"more-itertools>=10.6.0",
|
| 25 |
"python-dotenv>=1.1.0",
|
| 26 |
"loguru>=0.7.3",
|
|
|
|
| 27 |
"dependency-injector>=4.46.0",
|
| 28 |
"pytz>=2025.2",
|
| 29 |
"apscheduler>=3.11.0",
|
|
|
|
| 35 |
"slack_bolt>=1.23.0",
|
| 36 |
"pymongo>=4.11.3 ",
|
| 37 |
"motor>=3.7.0",
|
| 38 |
+
"openai>=1.70.0",
|
| 39 |
"google-api-python-client>=2.167.0",
|
| 40 |
"google-auth>=2.39.0",
|
| 41 |
"google-auth-oauthlib>=1.2.1"
|
|
|
|
| 49 |
"types-pytz>=2025.2",
|
| 50 |
"black>=25.1.0",
|
| 51 |
"isort>=6.0.1",
|
| 52 |
+
"ruff>=0.11.4"
|
| 53 |
]
|
| 54 |
|
| 55 |
[project.urls]
|
src/ctp_slack_bot/app.py
CHANGED
|
@@ -1,9 +1,24 @@
|
|
| 1 |
-
from asyncio import run
|
| 2 |
from loguru import logger
|
|
|
|
|
|
|
| 3 |
|
| 4 |
from ctp_slack_bot.containers import Container
|
| 5 |
from ctp_slack_bot.core.logging import setup_logging
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
async def main() -> None:
|
| 8 |
# Setup logging.
|
| 9 |
setup_logging()
|
|
@@ -16,19 +31,23 @@ async def main() -> None:
|
|
| 16 |
# Kick off services which should be active from the start.
|
| 17 |
container.content_ingestion_service()
|
| 18 |
container.question_dispatch_service()
|
|
|
|
| 19 |
|
| 20 |
-
# Start the
|
| 21 |
-
schedule_service = container.schedule_service()
|
| 22 |
-
schedule_service.start()
|
| 23 |
-
|
| 24 |
-
# Start the Slack socket mode handler in a background thread.
|
| 25 |
socket_mode_handler = container.socket_mode_handler()
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
if __name__ == "__main__":
|
| 34 |
run(main())
|
|
|
|
| 1 |
+
from asyncio import all_tasks, CancelledError, create_task, current_task, get_running_loop, run
|
| 2 |
from loguru import logger
|
| 3 |
+
from signal import SIGINT, SIGTERM
|
| 4 |
+
from typing import Any, Callable
|
| 5 |
|
| 6 |
from ctp_slack_bot.containers import Container
|
| 7 |
from ctp_slack_bot.core.logging import setup_logging
|
| 8 |
|
| 9 |
+
async def handle_shutdown_signal() -> None:
|
| 10 |
+
logger.info("Received shutdown signal.")
|
| 11 |
+
for task in all_tasks():
|
| 12 |
+
if task is not current_task() and not task.done():
|
| 13 |
+
task.cancel()
|
| 14 |
+
logger.trace("Cancelled task {}.", task.get_name())
|
| 15 |
+
logger.info("Cancelled all tasks.")
|
| 16 |
+
|
| 17 |
+
def create_shutdown_signal_handler() -> Callable[[], None]:
|
| 18 |
+
def shutdown_signal_handler() -> None:
|
| 19 |
+
create_task(handle_shutdown_signal())
|
| 20 |
+
return shutdown_signal_handler
|
| 21 |
+
|
| 22 |
async def main() -> None:
|
| 23 |
# Setup logging.
|
| 24 |
setup_logging()
|
|
|
|
| 31 |
# Kick off services which should be active from the start.
|
| 32 |
container.content_ingestion_service()
|
| 33 |
container.question_dispatch_service()
|
| 34 |
+
container.schedule_service()
|
| 35 |
|
| 36 |
+
# Start the Slack socket mode handler in the background.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
socket_mode_handler = container.socket_mode_handler()
|
| 38 |
+
slack_bolt_task = create_task(socket_mode_handler.start_async())
|
| 39 |
+
shutdown_signal_handler = create_shutdown_signal_handler()
|
| 40 |
+
loop = get_running_loop()
|
| 41 |
+
loop.add_signal_handler(SIGINT, shutdown_signal_handler)
|
| 42 |
+
loop.add_signal_handler(SIGTERM, shutdown_signal_handler)
|
| 43 |
+
try:
|
| 44 |
+
logger.info("Starting Slack Socket Mode handler…")
|
| 45 |
+
await slack_bolt_task
|
| 46 |
+
except CancelledError:
|
| 47 |
+
logger.info("Shutting down application…")
|
| 48 |
+
finally:
|
| 49 |
+
await socket_mode_handler.close_async()
|
| 50 |
+
await container.shutdown_resources()
|
| 51 |
|
| 52 |
if __name__ == "__main__":
|
| 53 |
run(main())
|
src/ctp_slack_bot/containers.py
CHANGED
|
@@ -13,7 +13,7 @@ from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelServi
|
|
| 13 |
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
| 14 |
from ctp_slack_bot.services.language_model_service import LanguageModelService
|
| 15 |
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
| 16 |
-
from ctp_slack_bot.services.schedule_service import
|
| 17 |
from ctp_slack_bot.services.slack_service import SlackServiceResource
|
| 18 |
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
| 19 |
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
|
@@ -22,7 +22,7 @@ from ctp_slack_bot.services.vectorization_service import VectorizationService
|
|
| 22 |
class Container(DeclarativeContainer):
|
| 23 |
settings = Singleton(Settings)
|
| 24 |
event_brokerage_service = Singleton(EventBrokerageService)
|
| 25 |
-
schedule_service =
|
| 26 |
mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
|
| 27 |
vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
|
| 28 |
vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
|
|
|
|
| 13 |
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
| 14 |
from ctp_slack_bot.services.language_model_service import LanguageModelService
|
| 15 |
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
| 16 |
+
from ctp_slack_bot.services.schedule_service import ScheduleServiceResource
|
| 17 |
from ctp_slack_bot.services.slack_service import SlackServiceResource
|
| 18 |
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
| 19 |
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
|
|
|
| 22 |
class Container(DeclarativeContainer):
|
| 23 |
settings = Singleton(Settings)
|
| 24 |
event_brokerage_service = Singleton(EventBrokerageService)
|
| 25 |
+
schedule_service = Resource(ScheduleServiceResource, settings=settings)
|
| 26 |
mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
|
| 27 |
vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
|
| 28 |
vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
|
src/ctp_slack_bot/core/logging.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord
|
| 2 |
from loguru import logger
|
| 3 |
from os import getenv
|
| 4 |
from sys import stderr
|
|
@@ -90,7 +90,9 @@ def setup_logging() -> None:
|
|
| 90 |
basicConfig(handlers=[InterceptHandler()], level=0, force=True)
|
| 91 |
|
| 92 |
# Update logging levels for some noisy libraries.
|
| 93 |
-
for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "
|
| 94 |
getLogger(logger_name).setLevel(INFO)
|
|
|
|
|
|
|
| 95 |
|
| 96 |
logger.info(f"Logging configured with level {log_level}")
|
|
|
|
| 1 |
+
from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord, WARNING
|
| 2 |
from loguru import logger
|
| 3 |
from os import getenv
|
| 4 |
from sys import stderr
|
|
|
|
| 90 |
basicConfig(handlers=[InterceptHandler()], level=0, force=True)
|
| 91 |
|
| 92 |
# Update logging levels for some noisy libraries.
|
| 93 |
+
for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "pymongo"):
|
| 94 |
getLogger(logger_name).setLevel(INFO)
|
| 95 |
+
for logger_name in ("apscheduler"):
|
| 96 |
+
getLogger(logger_name).setLevel(WARNING)
|
| 97 |
|
| 98 |
logger.info(f"Logging configured with level {log_level}")
|
src/ctp_slack_bot/core/response_rendering.py
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
from json import dumps
|
| 2 |
-
from starlette.responses import JSONResponse
|
| 3 |
-
from typing import Any, Self
|
| 4 |
-
|
| 5 |
-
class PrettyJSONResponse(JSONResponse):
|
| 6 |
-
def render(self: Self, content: Any) -> bytes:
|
| 7 |
-
return dumps(
|
| 8 |
-
content,
|
| 9 |
-
ensure_ascii=False,
|
| 10 |
-
allow_nan=False,
|
| 11 |
-
indent=4,
|
| 12 |
-
separators=(", ", ": "),
|
| 13 |
-
).encode()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/db/mongo_db.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
-
from
|
|
|
|
| 2 |
from motor.motor_asyncio import AsyncIOMotorClient
|
| 3 |
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
| 4 |
from pymongo.operations import SearchIndexModel
|
| 5 |
from loguru import logger
|
| 6 |
from pydantic import BaseModel, PrivateAttr
|
| 7 |
from typing import Any, Dict, Optional, Self
|
| 8 |
-
import asyncio
|
| 9 |
|
| 10 |
from ctp_slack_bot.core.config import Settings
|
|
|
|
| 11 |
|
| 12 |
class MongoDB(BaseModel):
|
| 13 |
"""
|
|
@@ -16,23 +17,20 @@ class MongoDB(BaseModel):
|
|
| 16 |
settings: Settings
|
| 17 |
_client: PrivateAttr = PrivateAttr()
|
| 18 |
_db: PrivateAttr = PrivateAttr()
|
| 19 |
-
|
| 20 |
class Config:
|
| 21 |
arbitrary_types_allowed = True
|
| 22 |
-
|
| 23 |
def __init__(self: Self, **data: Dict[str, Any]) -> None:
|
| 24 |
super().__init__(**data)
|
| 25 |
logger.debug("Created {}", self.__class__.__name__)
|
| 26 |
-
|
| 27 |
def connect(self: Self) -> None:
|
| 28 |
"""Initialize MongoDB client with settings."""
|
| 29 |
try:
|
| 30 |
connection_string = self.settings.MONGODB_URI.get_secret_value()
|
| 31 |
-
logger.debug("Connecting to MongoDB using URI: {}", connection_string
|
| 32 |
-
|
| 33 |
-
'[REDACTED]'
|
| 34 |
-
))
|
| 35 |
-
|
| 36 |
# Create client with appropriate settings
|
| 37 |
self._client = AsyncIOMotorClient(
|
| 38 |
connection_string,
|
|
@@ -43,48 +41,48 @@ class MongoDB(BaseModel):
|
|
| 43 |
retryWrites=True,
|
| 44 |
w="majority"
|
| 45 |
)
|
| 46 |
-
|
| 47 |
# Set database
|
| 48 |
db_name = self.settings.MONGODB_NAME
|
| 49 |
-
|
| 50 |
self._db = self._client[db_name]
|
| 51 |
logger.debug("MongoDB client initialized for database: {}", db_name)
|
| 52 |
-
|
| 53 |
except Exception as e:
|
| 54 |
logger.error("Failed to initialize MongoDB client: {}", e)
|
| 55 |
self._client = None
|
| 56 |
self._db = None
|
| 57 |
raise
|
| 58 |
-
|
| 59 |
@property
|
| 60 |
def client(self: Self) -> AsyncIOMotorClient:
|
| 61 |
"""Get the MongoDB client instance."""
|
| 62 |
if not hasattr(self, '_client') or self._client is None:
|
| 63 |
-
logger.warning("MongoDB client not initialized. Attempting to initialize
|
| 64 |
self.connect()
|
| 65 |
if not hasattr(self, '_client') or self._client is None:
|
| 66 |
-
raise ConnectionError("Failed to initialize MongoDB client")
|
| 67 |
return self._client
|
| 68 |
-
|
| 69 |
@property
|
| 70 |
def db(self: Self) -> Any:
|
| 71 |
"""Get the MongoDB database instance."""
|
| 72 |
if not hasattr(self, '_db') or self._db is None:
|
| 73 |
-
logger.warning("MongoDB database not initialized. Attempting to initialize client
|
| 74 |
self.connect()
|
| 75 |
if not hasattr(self, '_db') or self._db is None:
|
| 76 |
-
raise ConnectionError("Failed to initialize MongoDB database")
|
| 77 |
return self._db
|
| 78 |
-
|
| 79 |
async def ping(self: Self) -> bool:
|
| 80 |
"""Check if MongoDB connection is alive."""
|
| 81 |
try:
|
| 82 |
# Get client to ensure we're connected
|
| 83 |
client = self.client
|
| 84 |
-
|
| 85 |
# Try a simple ping command
|
| 86 |
await client.admin.command('ping')
|
| 87 |
-
logger.debug("MongoDB connection is active")
|
| 88 |
return True
|
| 89 |
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
| 90 |
logger.error("MongoDB connection failed: {}", e)
|
|
@@ -92,7 +90,7 @@ class MongoDB(BaseModel):
|
|
| 92 |
except Exception as e:
|
| 93 |
logger.error("Unexpected error during MongoDB ping: {}", e)
|
| 94 |
return False
|
| 95 |
-
|
| 96 |
async def get_collection(self: Self, name: str) -> Any:
|
| 97 |
"""
|
| 98 |
Get a collection by name with validation.
|
|
@@ -100,29 +98,29 @@ class MongoDB(BaseModel):
|
|
| 100 |
"""
|
| 101 |
# First ensure we can connect at all
|
| 102 |
if not await self.ping():
|
| 103 |
-
logger.error("Cannot get collection '{}'
|
| 104 |
-
raise ConnectionError("MongoDB connection is not available")
|
| 105 |
-
|
| 106 |
try:
|
| 107 |
# Get all collection names to check if this one exists
|
| 108 |
-
logger.debug("Checking if collection '{}' exists", name)
|
| 109 |
collection_names = await self.db.list_collection_names()
|
| 110 |
|
| 111 |
if name not in collection_names:
|
| 112 |
-
logger.info("Collection '{}' does not exist. Creating it
|
| 113 |
# Create the collection
|
| 114 |
await self.db.create_collection(name)
|
| 115 |
-
logger.debug("Successfully created collection
|
| 116 |
else:
|
| 117 |
-
logger.debug("Collection '{}' already exists", name)
|
| 118 |
-
|
| 119 |
# Get and return the collection
|
| 120 |
collection = self.db[name]
|
| 121 |
return collection
|
| 122 |
except Exception as e:
|
| 123 |
logger.error("Error accessing collection '{}': {}", name, e)
|
| 124 |
raise
|
| 125 |
-
|
| 126 |
async def create_indexes(self: Self, collection_name: str) -> None:
|
| 127 |
"""
|
| 128 |
Create a vector search index on a collection.
|
|
@@ -131,7 +129,7 @@ class MongoDB(BaseModel):
|
|
| 131 |
collection_name: Name of the collection
|
| 132 |
"""
|
| 133 |
collection = await self.get_collection(collection_name)
|
| 134 |
-
|
| 135 |
try:
|
| 136 |
# Create search index model using MongoDB's recommended approach
|
| 137 |
search_index_model = SearchIndexModel(
|
|
@@ -149,41 +147,38 @@ class MongoDB(BaseModel):
|
|
| 149 |
name=f"{collection_name}_vector_index",
|
| 150 |
type="vectorSearch"
|
| 151 |
)
|
| 152 |
-
|
| 153 |
# Create the search index using the motor collection
|
| 154 |
result = await collection.create_search_index(search_index_model)
|
| 155 |
-
logger.info("Vector search index '{}' created for collection {}", result, collection_name)
|
| 156 |
-
|
| 157 |
except Exception as e:
|
| 158 |
if "command not found" in str(e).lower():
|
| 159 |
logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
|
| 160 |
# Create a fallback standard index on embedding field
|
| 161 |
await collection.create_index("embedding")
|
| 162 |
-
logger.info("Created standard index on 'embedding' field as fallback")
|
| 163 |
else:
|
| 164 |
logger.error("Failed to create vector index: {}", e)
|
| 165 |
raise
|
| 166 |
-
|
| 167 |
async def close(self: Self) -> None:
|
| 168 |
"""Close MongoDB connection."""
|
| 169 |
if self._client:
|
| 170 |
self._client.close()
|
| 171 |
-
logger.info("MongoDB connection
|
| 172 |
self._client = None
|
| 173 |
self._db = None
|
| 174 |
|
| 175 |
-
class MongoDBResource(
|
| 176 |
-
def init(self: Self, settings: Settings) -> MongoDB:
|
| 177 |
logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
|
| 178 |
mongo_db = MongoDB(settings=settings)
|
| 179 |
mongo_db.connect()
|
| 180 |
-
|
| 181 |
-
# Test the connection asynchronously - this will run after init returns
|
| 182 |
-
asyncio.create_task(self._test_connection(mongo_db))
|
| 183 |
-
|
| 184 |
return mongo_db
|
| 185 |
-
|
| 186 |
-
async def _test_connection(self, mongo_db: MongoDB) -> None:
|
| 187 |
"""Test MongoDB connection and log the result."""
|
| 188 |
try:
|
| 189 |
is_connected = await mongo_db.ping()
|
|
@@ -193,11 +188,11 @@ class MongoDBResource(Resource):
|
|
| 193 |
logger.error("MongoDB connection test failed!")
|
| 194 |
except Exception as e:
|
| 195 |
logger.error("Error testing MongoDB connection: {}", e)
|
| 196 |
-
|
|
|
|
| 197 |
async def shutdown(self: Self, mongo_db: MongoDB) -> None:
|
| 198 |
"""Close MongoDB connection on shutdown."""
|
| 199 |
try:
|
| 200 |
-
logger.info("Closing MongoDB connection...")
|
| 201 |
await mongo_db.close()
|
| 202 |
except Exception as e:
|
| 203 |
logger.error("Error closing MongoDB connection: {}", e)
|
|
|
|
| 1 |
+
from asyncio import create_task
|
| 2 |
+
from dependency_injector.resources import AsyncResource
|
| 3 |
from motor.motor_asyncio import AsyncIOMotorClient
|
| 4 |
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
| 5 |
from pymongo.operations import SearchIndexModel
|
| 6 |
from loguru import logger
|
| 7 |
from pydantic import BaseModel, PrivateAttr
|
| 8 |
from typing import Any, Dict, Optional, Self
|
|
|
|
| 9 |
|
| 10 |
from ctp_slack_bot.core.config import Settings
|
| 11 |
+
from ctp_slack_bot.utils import sanitize_mongo_db_uri
|
| 12 |
|
| 13 |
class MongoDB(BaseModel):
|
| 14 |
"""
|
|
|
|
| 17 |
settings: Settings
|
| 18 |
_client: PrivateAttr = PrivateAttr()
|
| 19 |
_db: PrivateAttr = PrivateAttr()
|
| 20 |
+
|
| 21 |
class Config:
|
| 22 |
arbitrary_types_allowed = True
|
| 23 |
+
|
| 24 |
def __init__(self: Self, **data: Dict[str, Any]) -> None:
|
| 25 |
super().__init__(**data)
|
| 26 |
logger.debug("Created {}", self.__class__.__name__)
|
| 27 |
+
|
| 28 |
def connect(self: Self) -> None:
|
| 29 |
"""Initialize MongoDB client with settings."""
|
| 30 |
try:
|
| 31 |
connection_string = self.settings.MONGODB_URI.get_secret_value()
|
| 32 |
+
logger.debug("Connecting to MongoDB using URI: {}", sanitize_mongo_db_uri(connection_string))
|
| 33 |
+
|
|
|
|
|
|
|
|
|
|
| 34 |
# Create client with appropriate settings
|
| 35 |
self._client = AsyncIOMotorClient(
|
| 36 |
connection_string,
|
|
|
|
| 41 |
retryWrites=True,
|
| 42 |
w="majority"
|
| 43 |
)
|
| 44 |
+
|
| 45 |
# Set database
|
| 46 |
db_name = self.settings.MONGODB_NAME
|
| 47 |
+
|
| 48 |
self._db = self._client[db_name]
|
| 49 |
logger.debug("MongoDB client initialized for database: {}", db_name)
|
| 50 |
+
|
| 51 |
except Exception as e:
|
| 52 |
logger.error("Failed to initialize MongoDB client: {}", e)
|
| 53 |
self._client = None
|
| 54 |
self._db = None
|
| 55 |
raise
|
| 56 |
+
|
| 57 |
@property
|
| 58 |
def client(self: Self) -> AsyncIOMotorClient:
|
| 59 |
"""Get the MongoDB client instance."""
|
| 60 |
if not hasattr(self, '_client') or self._client is None:
|
| 61 |
+
logger.warning("MongoDB client not initialized. Attempting to initialize…")
|
| 62 |
self.connect()
|
| 63 |
if not hasattr(self, '_client') or self._client is None:
|
| 64 |
+
raise ConnectionError("Failed to initialize MongoDB client.")
|
| 65 |
return self._client
|
| 66 |
+
|
| 67 |
@property
|
| 68 |
def db(self: Self) -> Any:
|
| 69 |
"""Get the MongoDB database instance."""
|
| 70 |
if not hasattr(self, '_db') or self._db is None:
|
| 71 |
+
logger.warning("MongoDB database not initialized. Attempting to initialize client…")
|
| 72 |
self.connect()
|
| 73 |
if not hasattr(self, '_db') or self._db is None:
|
| 74 |
+
raise ConnectionError("Failed to initialize MongoDB database.")
|
| 75 |
return self._db
|
| 76 |
+
|
| 77 |
async def ping(self: Self) -> bool:
|
| 78 |
"""Check if MongoDB connection is alive."""
|
| 79 |
try:
|
| 80 |
# Get client to ensure we're connected
|
| 81 |
client = self.client
|
| 82 |
+
|
| 83 |
# Try a simple ping command
|
| 84 |
await client.admin.command('ping')
|
| 85 |
+
logger.debug("MongoDB connection is active!")
|
| 86 |
return True
|
| 87 |
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
| 88 |
logger.error("MongoDB connection failed: {}", e)
|
|
|
|
| 90 |
except Exception as e:
|
| 91 |
logger.error("Unexpected error during MongoDB ping: {}", e)
|
| 92 |
return False
|
| 93 |
+
|
| 94 |
async def get_collection(self: Self, name: str) -> Any:
|
| 95 |
"""
|
| 96 |
Get a collection by name with validation.
|
|
|
|
| 98 |
"""
|
| 99 |
# First ensure we can connect at all
|
| 100 |
if not await self.ping():
|
| 101 |
+
logger.error("Cannot get collection '{}' because a MongoDB connection is not available.", name)
|
| 102 |
+
raise ConnectionError("MongoDB connection is not available.")
|
| 103 |
+
|
| 104 |
try:
|
| 105 |
# Get all collection names to check if this one exists
|
| 106 |
+
logger.debug("Checking if collection '{}' exists…", name)
|
| 107 |
collection_names = await self.db.list_collection_names()
|
| 108 |
|
| 109 |
if name not in collection_names:
|
| 110 |
+
logger.info("Collection '{}' does not exist. Creating it…", name)
|
| 111 |
# Create the collection
|
| 112 |
await self.db.create_collection(name)
|
| 113 |
+
logger.debug("Successfully created collection: {}", name)
|
| 114 |
else:
|
| 115 |
+
logger.debug("Collection '{}' already exists!", name)
|
| 116 |
+
|
| 117 |
# Get and return the collection
|
| 118 |
collection = self.db[name]
|
| 119 |
return collection
|
| 120 |
except Exception as e:
|
| 121 |
logger.error("Error accessing collection '{}': {}", name, e)
|
| 122 |
raise
|
| 123 |
+
|
| 124 |
async def create_indexes(self: Self, collection_name: str) -> None:
|
| 125 |
"""
|
| 126 |
Create a vector search index on a collection.
|
|
|
|
| 129 |
collection_name: Name of the collection
|
| 130 |
"""
|
| 131 |
collection = await self.get_collection(collection_name)
|
| 132 |
+
|
| 133 |
try:
|
| 134 |
# Create search index model using MongoDB's recommended approach
|
| 135 |
search_index_model = SearchIndexModel(
|
|
|
|
| 147 |
name=f"{collection_name}_vector_index",
|
| 148 |
type="vectorSearch"
|
| 149 |
)
|
| 150 |
+
|
| 151 |
# Create the search index using the motor collection
|
| 152 |
result = await collection.create_search_index(search_index_model)
|
| 153 |
+
logger.info("Vector search index '{}' created for collection {}.", result, collection_name)
|
| 154 |
+
|
| 155 |
except Exception as e:
|
| 156 |
if "command not found" in str(e).lower():
|
| 157 |
logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
|
| 158 |
# Create a fallback standard index on embedding field
|
| 159 |
await collection.create_index("embedding")
|
| 160 |
+
logger.info("Created standard index on 'embedding' field as fallback.")
|
| 161 |
else:
|
| 162 |
logger.error("Failed to create vector index: {}", e)
|
| 163 |
raise
|
| 164 |
+
|
| 165 |
async def close(self: Self) -> None:
|
| 166 |
"""Close MongoDB connection."""
|
| 167 |
if self._client:
|
| 168 |
self._client.close()
|
| 169 |
+
logger.info("Closed MongoDB connection.")
|
| 170 |
self._client = None
|
| 171 |
self._db = None
|
| 172 |
|
| 173 |
+
class MongoDBResource(AsyncResource):
|
| 174 |
+
async def init(self: Self, settings: Settings) -> MongoDB:
|
| 175 |
logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
|
| 176 |
mongo_db = MongoDB(settings=settings)
|
| 177 |
mongo_db.connect()
|
| 178 |
+
await self._test_connection(mongo_db)
|
|
|
|
|
|
|
|
|
|
| 179 |
return mongo_db
|
| 180 |
+
|
| 181 |
+
async def _test_connection(self: Self, mongo_db: MongoDB) -> None:
|
| 182 |
"""Test MongoDB connection and log the result."""
|
| 183 |
try:
|
| 184 |
is_connected = await mongo_db.ping()
|
|
|
|
| 188 |
logger.error("MongoDB connection test failed!")
|
| 189 |
except Exception as e:
|
| 190 |
logger.error("Error testing MongoDB connection: {}", e)
|
| 191 |
+
raise
|
| 192 |
+
|
| 193 |
async def shutdown(self: Self, mongo_db: MongoDB) -> None:
|
| 194 |
"""Close MongoDB connection on shutdown."""
|
| 195 |
try:
|
|
|
|
| 196 |
await mongo_db.close()
|
| 197 |
except Exception as e:
|
| 198 |
logger.error("Error closing MongoDB connection: {}", e)
|
src/ctp_slack_bot/services/GOOGLE_DRIVE_README.md
DELETED
|
@@ -1,228 +0,0 @@
|
|
| 1 |
-
# Google Drive Access Module
|
| 2 |
-
|
| 3 |
-
This Python module provides a simplified way to interact with Google Drive, focusing on easy access to files in nested folders using path-like syntax. It handles various Google file formats and provides comprehensive metadata for files and folders.
|
| 4 |
-
|
| 5 |
-
## Features
|
| 6 |
-
|
| 7 |
-
- **Path-based folder access**: Access files using simple paths like `folder1/folder2/folder3`
|
| 8 |
-
- **Efficient caching**: Folder IDs are cached to improve performance
|
| 9 |
-
- **Comprehensive metadata**: Get detailed information about files and folders
|
| 10 |
-
- **Read various file types**:
|
| 11 |
-
- Text files
|
| 12 |
-
- Google Docs
|
| 13 |
-
- VTT files
|
| 14 |
-
- **Robust folder finding**: Works with exact and partial name matching
|
| 15 |
-
- **Simple API**: Designed for ease of use with minimal code
|
| 16 |
-
|
| 17 |
-
## Setup Instructions
|
| 18 |
-
|
| 19 |
-
### 1. Create a Google Cloud Project
|
| 20 |
-
|
| 21 |
-
1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
|
| 22 |
-
2. Click on the project dropdown at the top of the page and select "New Project"
|
| 23 |
-
3. Enter a project name and click "Create"
|
| 24 |
-
4. Once created, make sure your new project is selected in the dropdown
|
| 25 |
-
|
| 26 |
-
### 2. Enable the Google Drive API
|
| 27 |
-
|
| 28 |
-
1. In the Google Cloud Console, navigate to "APIs & Services" > "Library" in the left sidebar
|
| 29 |
-
2. Search for "Google Drive API" in the search bar
|
| 30 |
-
3. Click on "Google Drive API" in the results
|
| 31 |
-
4. Click the "Enable" button
|
| 32 |
-
|
| 33 |
-
### 3. Create OAuth Credentials
|
| 34 |
-
|
| 35 |
-
1. In the Google Cloud Console, go to "APIs & Services" > "Credentials" in the left sidebar
|
| 36 |
-
2. Click "Create Credentials" at the top and select "OAuth client ID"
|
| 37 |
-
3. If prompted to configure the OAuth consent screen:
|
| 38 |
-
- Choose "External" user type (or "Internal" if you're in a Google Workspace organization)
|
| 39 |
-
- Fill in the required information (App name, User support email, Developer contact email)
|
| 40 |
-
- Click "Save and Continue"
|
| 41 |
-
- Add the following scopes:
|
| 42 |
-
- `.../auth/drive` (Full access to Google Drive)
|
| 43 |
-
- Click "Save and Continue" and complete the registration
|
| 44 |
-
4. Return to the "Create OAuth client ID" screen
|
| 45 |
-
5. Select "Desktop application" as the Application type
|
| 46 |
-
6. Enter a name for your OAuth client (e.g., "Google Drive Access Desktop")
|
| 47 |
-
7. Click "Create"
|
| 48 |
-
8. Download the JSON file (this is your `client_secret.json`)
|
| 49 |
-
|
| 50 |
-
### 4. Project Setup
|
| 51 |
-
|
| 52 |
-
1. Setup a virtual environment and install dependencies:
|
| 53 |
-
```bash
|
| 54 |
-
python -m venv venv
|
| 55 |
-
source venv/bin/activate # On Windows: venv\Scripts\activate
|
| 56 |
-
pip install -r requirements.txt
|
| 57 |
-
```
|
| 58 |
-
|
| 59 |
-
2. Place your credentials:
|
| 60 |
-
- Create a `credentials` directory in your project root
|
| 61 |
-
- Move the downloaded OAuth client JSON file to the `credentials` directory
|
| 62 |
-
- Rename it to `client_secret.json`
|
| 63 |
-
|
| 64 |
-
### 5. Authentication Process
|
| 65 |
-
|
| 66 |
-
When you run the application for the first time:
|
| 67 |
-
1. A browser window will open automatically
|
| 68 |
-
2. You'll be asked to sign in to your Google account
|
| 69 |
-
3. You'll see a consent screen asking for permission to access your Google Drive
|
| 70 |
-
4. After granting permission, the browser will display a success message
|
| 71 |
-
5. The application will save a token file (`token.pickle`) in the credentials directory for future use
|
| 72 |
-
|
| 73 |
-
## Usage Guide
|
| 74 |
-
|
| 75 |
-
The `EasyGoogleDrive` class provides several methods to interact with Google Drive. Here's how to use the core functionality:
|
| 76 |
-
|
| 77 |
-
### Basic Usage
|
| 78 |
-
|
| 79 |
-
```python
|
| 80 |
-
from google_drive_access import EasyGoogleDrive
|
| 81 |
-
|
| 82 |
-
# Initialize the Google Drive client
|
| 83 |
-
drive = EasyGoogleDrive()
|
| 84 |
-
|
| 85 |
-
# Example folder path - replace with your actual folder path
|
| 86 |
-
folder_path = "Spring-2025-BAI"
|
| 87 |
-
subfolder_path = "Spring-2025-BAI/transcripts"
|
| 88 |
-
```
|
| 89 |
-
|
| 90 |
-
### Listing Folders
|
| 91 |
-
|
| 92 |
-
```python
|
| 93 |
-
# List folders in a directory
|
| 94 |
-
folders = drive.get_folders_in_folder(folder_path)
|
| 95 |
-
|
| 96 |
-
# Access folder properties
|
| 97 |
-
for folder in folders:
|
| 98 |
-
print(f"Folder: {folder['name']}")
|
| 99 |
-
print(f" Created: {folder.get('createdTimeFormatted', 'Unknown')}")
|
| 100 |
-
print(f" Modified: {folder.get('modifiedTimeFormatted', 'Unknown')}")
|
| 101 |
-
```
|
| 102 |
-
|
| 103 |
-
### Listing Files
|
| 104 |
-
|
| 105 |
-
```python
|
| 106 |
-
# List files in a directory
|
| 107 |
-
files = drive.get_files_in_folder(subfolder_path)
|
| 108 |
-
|
| 109 |
-
# Access file properties
|
| 110 |
-
for file in files:
|
| 111 |
-
print(f"File: {file['name']}")
|
| 112 |
-
print(f" Type: {file.get('fileType', 'Unknown')}")
|
| 113 |
-
print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}")
|
| 114 |
-
print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}")
|
| 115 |
-
print(f" Size: {file.get('sizeFormatted', 'Unknown')}")
|
| 116 |
-
```
|
| 117 |
-
|
| 118 |
-
### Getting a Specific File
|
| 119 |
-
|
| 120 |
-
```python
|
| 121 |
-
# Get a specific file with metadata
|
| 122 |
-
file = drive.get_file("example.txt", subfolder_path, include_metadata=True)
|
| 123 |
-
|
| 124 |
-
if file:
|
| 125 |
-
print(f"File: {file['name']}")
|
| 126 |
-
print(f" Type: {file.get('fileType', 'Unknown')}")
|
| 127 |
-
print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}")
|
| 128 |
-
print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}")
|
| 129 |
-
print(f" Size: {file.get('sizeFormatted', 'Unknown')}")
|
| 130 |
-
```
|
| 131 |
-
|
| 132 |
-
### Getting All Items in a Folder
|
| 133 |
-
|
| 134 |
-
```python
|
| 135 |
-
# Get all items (files and folders) in a folder
|
| 136 |
-
all_items = drive.get_all_files_in_folder(folder_path)
|
| 137 |
-
|
| 138 |
-
# Access item properties
|
| 139 |
-
for item in all_items:
|
| 140 |
-
item_type = "Folder" if item.get('mimeType') == drive.MIME_TYPES['folder'] else item.get('fileType', 'Unknown')
|
| 141 |
-
print(f"Item: {item['name']} ({item_type})")
|
| 142 |
-
```
|
| 143 |
-
|
| 144 |
-
### Checking if a File Exists
|
| 145 |
-
|
| 146 |
-
```python
|
| 147 |
-
# Check if a file exists
|
| 148 |
-
exists = drive.file_exists("example.txt", subfolder_path)
|
| 149 |
-
print(f"File exists: {exists}")
|
| 150 |
-
```
|
| 151 |
-
|
| 152 |
-
### Getting File Modified Time
|
| 153 |
-
|
| 154 |
-
```python
|
| 155 |
-
# Get file modified time
|
| 156 |
-
modified_time = drive.get_file_modified_time("example.txt", subfolder_path)
|
| 157 |
-
if modified_time:
|
| 158 |
-
print(f"Last modified: {modified_time}")
|
| 159 |
-
```
|
| 160 |
-
|
| 161 |
-
### Reading File Content
|
| 162 |
-
|
| 163 |
-
```python
|
| 164 |
-
# Get file with content
|
| 165 |
-
file_with_content = drive.get_file("example.txt", subfolder_path, include_content=True)
|
| 166 |
-
|
| 167 |
-
if file_with_content and 'file_content' in file_with_content:
|
| 168 |
-
content = file_with_content['file_content']
|
| 169 |
-
if content:
|
| 170 |
-
print(f"Content: {content[:100]}...") # Print first 100 characters
|
| 171 |
-
```
|
| 172 |
-
|
| 173 |
-
## Complete Example
|
| 174 |
-
|
| 175 |
-
For a complete example of how to use the `EasyGoogleDrive` class, see the `basic_usage.py` file included in this package. This file demonstrates all the core functionality with practical examples.
|
| 176 |
-
|
| 177 |
-
## Key Concepts
|
| 178 |
-
|
| 179 |
-
### Path-based Folder Access
|
| 180 |
-
|
| 181 |
-
The module uses a simple path-like syntax to access folders:
|
| 182 |
-
|
| 183 |
-
```python
|
| 184 |
-
# Access a deeply nested folder
|
| 185 |
-
folder_path = "folder1/folder2/folder3"
|
| 186 |
-
files = drive.get_files_in_folder(folder_path)
|
| 187 |
-
```
|
| 188 |
-
|
| 189 |
-
This makes it much easier to work with nested folder structures compared to using folder IDs.
|
| 190 |
-
|
| 191 |
-
### Metadata Fields
|
| 192 |
-
|
| 193 |
-
The module provides comprehensive metadata for files and folders, including:
|
| 194 |
-
|
| 195 |
-
- **Creation and modification dates**: Both as datetime objects and formatted strings
|
| 196 |
-
- **File size**: Both in bytes and human-readable format (KB, MB, GB)
|
| 197 |
-
- **File type**: Simplified type based on MIME type
|
| 198 |
-
- **Owner information**: Names and email addresses of file owners
|
| 199 |
-
- **Sharing status**: Whether the file is shared
|
| 200 |
-
- **Web links**: Direct links to view the file in a browser
|
| 201 |
-
|
| 202 |
-
## Error Handling
|
| 203 |
-
|
| 204 |
-
The module includes comprehensive error handling:
|
| 205 |
-
|
| 206 |
-
- **Authentication errors**: Clear messages when credentials are missing or invalid
|
| 207 |
-
- **Folder not found**: Helpful messages when a folder in the path cannot be found
|
| 208 |
-
- **File not found**: Attempts partial name matching before giving up
|
| 209 |
-
- **Decoding errors**: Handles issues with file content encoding
|
| 210 |
-
|
| 211 |
-
## Dependencies
|
| 212 |
-
|
| 213 |
-
- **Required**:
|
| 214 |
-
- google-auth-oauthlib
|
| 215 |
-
- google-auth-httplib2
|
| 216 |
-
- google-api-python-client
|
| 217 |
-
- python-dateutil
|
| 218 |
-
|
| 219 |
-
## Security Notes
|
| 220 |
-
|
| 221 |
-
- Never commit your `client_secret.json` or token files to version control
|
| 222 |
-
- Add `credentials/` to your `.gitignore` file
|
| 223 |
-
- Keep your credentials secure and don't share them
|
| 224 |
-
- For production applications, consider using service accounts with the minimum required permissions
|
| 225 |
-
|
| 226 |
-
## Contributing
|
| 227 |
-
|
| 228 |
-
Feel free to contribute to this project by submitting issues or pull requests.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/services/google_drive_access.py
DELETED
|
@@ -1,623 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Easy Google Drive Access
|
| 3 |
-
|
| 4 |
-
A simplified module for accessing Google Drive files in nested folders.
|
| 5 |
-
Designed to make it as easy as possible to access files using path-like syntax.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
import os
|
| 9 |
-
import pickle
|
| 10 |
-
import io
|
| 11 |
-
import datetime
|
| 12 |
-
from typing import List, Dict, Optional, Any, Union
|
| 13 |
-
|
| 14 |
-
from google.oauth2.credentials import Credentials
|
| 15 |
-
from google_auth_oauthlib.flow import InstalledAppFlow
|
| 16 |
-
from google.auth.transport.requests import Request
|
| 17 |
-
from googleapiclient.discovery import build
|
| 18 |
-
from googleapiclient.http import MediaIoBaseDownload
|
| 19 |
-
from googleapiclient.errors import HttpError
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
class EasyGoogleDrive:
|
| 23 |
-
"""
|
| 24 |
-
Simplified Google Drive access focused on accessing files in nested folders.
|
| 25 |
-
"""
|
| 26 |
-
|
| 27 |
-
# Define the scopes needed for the application
|
| 28 |
-
SCOPES = ['https://www.googleapis.com/auth/drive']
|
| 29 |
-
|
| 30 |
-
# Define common MIME types
|
| 31 |
-
MIME_TYPES = {
|
| 32 |
-
'folder': 'application/vnd.google-apps.folder',
|
| 33 |
-
'document': 'application/vnd.google-apps.document',
|
| 34 |
-
'spreadsheet': 'application/vnd.google-apps.spreadsheet',
|
| 35 |
-
'text': 'text/plain',
|
| 36 |
-
'pdf': 'application/pdf',
|
| 37 |
-
'image': 'image/jpeg',
|
| 38 |
-
'video': 'video/mp4',
|
| 39 |
-
'audio': 'audio/mpeg',
|
| 40 |
-
}
|
| 41 |
-
|
| 42 |
-
# Define metadata fields to retrieve
|
| 43 |
-
FILE_FIELDS = 'id, name, mimeType, createdTime, modifiedTime, size, description, webViewLink, thumbnailLink, owners, shared, sharingUser, lastModifyingUser, capabilities, permissions'
|
| 44 |
-
FOLDER_FIELDS = 'id, name, createdTime, modifiedTime, description, webViewLink, owners, shared, sharingUser, lastModifyingUser, capabilities, permissions'
|
| 45 |
-
|
| 46 |
-
def __init__(self, credentials_dir: str = 'credentials'):
|
| 47 |
-
"""Initialize the Google Drive access."""
|
| 48 |
-
self.credentials_dir = credentials_dir
|
| 49 |
-
self.credentials_path = os.path.join(credentials_dir, 'client_secret.json')
|
| 50 |
-
self.token_path = os.path.join(credentials_dir, 'token.pickle')
|
| 51 |
-
|
| 52 |
-
# Ensure credentials directory exists
|
| 53 |
-
os.makedirs(credentials_dir, exist_ok=True)
|
| 54 |
-
|
| 55 |
-
# Initialize the Drive API service
|
| 56 |
-
self.service = build('drive', 'v3', credentials=self._get_credentials())
|
| 57 |
-
|
| 58 |
-
# Cache for folder IDs to avoid repeated lookups
|
| 59 |
-
self.folder_id_cache = {}
|
| 60 |
-
|
| 61 |
-
def _get_credentials(self) -> Credentials:
|
| 62 |
-
"""Get and refresh Google Drive API credentials."""
|
| 63 |
-
creds = None
|
| 64 |
-
|
| 65 |
-
# Load existing token if it exists
|
| 66 |
-
if os.path.exists(self.token_path):
|
| 67 |
-
with open(self.token_path, 'rb') as token:
|
| 68 |
-
creds = pickle.load(token)
|
| 69 |
-
|
| 70 |
-
# If credentials need refresh or don't exist
|
| 71 |
-
if not creds or not creds.valid:
|
| 72 |
-
if creds and creds.expired and creds.refresh_token:
|
| 73 |
-
creds.refresh(Request())
|
| 74 |
-
else:
|
| 75 |
-
if not os.path.exists(self.credentials_path):
|
| 76 |
-
raise FileNotFoundError(
|
| 77 |
-
f"Client secrets file not found at {self.credentials_path}. "
|
| 78 |
-
"Please follow the setup instructions in the README."
|
| 79 |
-
)
|
| 80 |
-
|
| 81 |
-
flow = InstalledAppFlow.from_client_secrets_file(
|
| 82 |
-
self.credentials_path, self.SCOPES)
|
| 83 |
-
creds = flow.run_local_server(port=0)
|
| 84 |
-
|
| 85 |
-
# Save the credentials for future use
|
| 86 |
-
with open(self.token_path, 'wb') as token:
|
| 87 |
-
pickle.dump(creds, token)
|
| 88 |
-
|
| 89 |
-
return creds
|
| 90 |
-
|
| 91 |
-
def _format_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
| 92 |
-
"""
|
| 93 |
-
Format metadata for easier reading and usage.
|
| 94 |
-
|
| 95 |
-
Args:
|
| 96 |
-
metadata: Raw metadata from Google Drive API
|
| 97 |
-
|
| 98 |
-
Returns:
|
| 99 |
-
Formatted metadata dictionary
|
| 100 |
-
"""
|
| 101 |
-
formatted = metadata.copy()
|
| 102 |
-
|
| 103 |
-
# Format dates
|
| 104 |
-
for date_field in ['createdTime', 'modifiedTime']:
|
| 105 |
-
if date_field in formatted:
|
| 106 |
-
try:
|
| 107 |
-
# Convert ISO 8601 string to datetime object
|
| 108 |
-
dt = datetime.datetime.fromisoformat(formatted[date_field].replace('Z', '+00:00'))
|
| 109 |
-
formatted[date_field] = dt
|
| 110 |
-
# Add a formatted date string for easier reading
|
| 111 |
-
formatted[f"{date_field}Formatted"] = dt.strftime('%Y-%m-%d %H:%M:%S')
|
| 112 |
-
except (ValueError, AttributeError):
|
| 113 |
-
pass
|
| 114 |
-
|
| 115 |
-
# Format size
|
| 116 |
-
if 'size' in formatted and formatted['size']:
|
| 117 |
-
try:
|
| 118 |
-
size_bytes = int(formatted['size'])
|
| 119 |
-
# Add human-readable size
|
| 120 |
-
if size_bytes < 1024:
|
| 121 |
-
formatted['sizeFormatted'] = f"{size_bytes} B"
|
| 122 |
-
elif size_bytes < 1024 * 1024:
|
| 123 |
-
formatted['sizeFormatted'] = f"{size_bytes / 1024:.1f} KB"
|
| 124 |
-
elif size_bytes < 1024 * 1024 * 1024:
|
| 125 |
-
formatted['sizeFormatted'] = f"{size_bytes / (1024 * 1024):.1f} MB"
|
| 126 |
-
else:
|
| 127 |
-
formatted['sizeFormatted'] = f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
|
| 128 |
-
except (ValueError, TypeError):
|
| 129 |
-
pass
|
| 130 |
-
|
| 131 |
-
# Extract owner names
|
| 132 |
-
if 'owners' in formatted and formatted['owners']:
|
| 133 |
-
formatted['ownerNames'] = [owner.get('displayName', 'Unknown') for owner in formatted['owners']]
|
| 134 |
-
formatted['ownerEmails'] = [owner.get('emailAddress', 'Unknown') for owner in formatted['owners']]
|
| 135 |
-
|
| 136 |
-
# Add file type description
|
| 137 |
-
if 'mimeType' in formatted:
|
| 138 |
-
mime_type = formatted['mimeType']
|
| 139 |
-
for key, value in self.MIME_TYPES.items():
|
| 140 |
-
if mime_type == value:
|
| 141 |
-
formatted['fileType'] = key
|
| 142 |
-
break
|
| 143 |
-
else:
|
| 144 |
-
# If not found in our predefined types
|
| 145 |
-
formatted['fileType'] = mime_type.split('/')[-1]
|
| 146 |
-
|
| 147 |
-
return formatted
|
| 148 |
-
|
| 149 |
-
def get_folder_id(self, folder_path: str) -> Optional[str]:
|
| 150 |
-
"""
|
| 151 |
-
Get a folder ID from a path like 'folder1/folder2/folder3'.
|
| 152 |
-
|
| 153 |
-
Args:
|
| 154 |
-
folder_path: Path to the folder, using '/' as separator
|
| 155 |
-
|
| 156 |
-
Returns:
|
| 157 |
-
The folder ID if found, None otherwise
|
| 158 |
-
"""
|
| 159 |
-
# Check if we've already resolved this path
|
| 160 |
-
if folder_path in self.folder_id_cache:
|
| 161 |
-
return self.folder_id_cache[folder_path]
|
| 162 |
-
|
| 163 |
-
# If it looks like an ID already, return it
|
| 164 |
-
if len(folder_path) > 25 and '/' not in folder_path:
|
| 165 |
-
return folder_path
|
| 166 |
-
|
| 167 |
-
# Split the path into components
|
| 168 |
-
parts = folder_path.split('/')
|
| 169 |
-
|
| 170 |
-
# Start from the root
|
| 171 |
-
current_folder_id = None
|
| 172 |
-
current_path = ""
|
| 173 |
-
|
| 174 |
-
# Traverse the path one folder at a time
|
| 175 |
-
for i, folder_name in enumerate(parts):
|
| 176 |
-
if not folder_name: # Skip empty parts
|
| 177 |
-
continue
|
| 178 |
-
|
| 179 |
-
# Update the current path for caching
|
| 180 |
-
if current_path:
|
| 181 |
-
current_path += f"/{folder_name}"
|
| 182 |
-
else:
|
| 183 |
-
current_path = folder_name
|
| 184 |
-
|
| 185 |
-
# Check if we've already resolved this subpath
|
| 186 |
-
if current_path in self.folder_id_cache:
|
| 187 |
-
current_folder_id = self.folder_id_cache[current_path]
|
| 188 |
-
continue
|
| 189 |
-
|
| 190 |
-
# Search for the folder by name
|
| 191 |
-
query = f"mimeType='{self.MIME_TYPES['folder']}' and name='{folder_name}'"
|
| 192 |
-
if current_folder_id:
|
| 193 |
-
query += f" and '{current_folder_id}' in parents"
|
| 194 |
-
|
| 195 |
-
try:
|
| 196 |
-
results = self.service.files().list(
|
| 197 |
-
q=query,
|
| 198 |
-
spaces='drive',
|
| 199 |
-
fields='files(id, name)',
|
| 200 |
-
pageSize=10
|
| 201 |
-
).execute()
|
| 202 |
-
|
| 203 |
-
files = results.get('files', [])
|
| 204 |
-
if not files:
|
| 205 |
-
# Try a more flexible search if exact match fails
|
| 206 |
-
query = query.replace(f"name='{folder_name}'", f"name contains '{folder_name}'")
|
| 207 |
-
results = self.service.files().list(
|
| 208 |
-
q=query,
|
| 209 |
-
spaces='drive',
|
| 210 |
-
fields='files(id, name)',
|
| 211 |
-
pageSize=10
|
| 212 |
-
).execute()
|
| 213 |
-
|
| 214 |
-
files = results.get('files', [])
|
| 215 |
-
if not files:
|
| 216 |
-
print(f"Could not find folder '{folder_name}' in path '{folder_path}'")
|
| 217 |
-
return None
|
| 218 |
-
|
| 219 |
-
# Use the first match
|
| 220 |
-
current_folder_id = files[0]['id']
|
| 221 |
-
|
| 222 |
-
# Cache this result
|
| 223 |
-
self.folder_id_cache[current_path] = current_folder_id
|
| 224 |
-
|
| 225 |
-
except HttpError as error:
|
| 226 |
-
print(f"Error finding folder: {error}")
|
| 227 |
-
return None
|
| 228 |
-
|
| 229 |
-
return current_folder_id
|
| 230 |
-
|
| 231 |
-
def get_folders_in_folder(self, folder_path: str, include_metadata: bool = True) -> List[Dict[str, Any]]:
|
| 232 |
-
"""
|
| 233 |
-
Get all subfolders in a folder specified by path.
|
| 234 |
-
|
| 235 |
-
Args:
|
| 236 |
-
folder_path: Path to the folder, using '/' as separator
|
| 237 |
-
include_metadata: Whether to include detailed metadata (default: True)
|
| 238 |
-
|
| 239 |
-
Returns:
|
| 240 |
-
List of folder metadata dictionaries
|
| 241 |
-
"""
|
| 242 |
-
# Get the folder ID
|
| 243 |
-
folder_id = self.get_folder_id(folder_path)
|
| 244 |
-
if not folder_id:
|
| 245 |
-
print(f"Could not find folder: '{folder_path}'")
|
| 246 |
-
return []
|
| 247 |
-
|
| 248 |
-
# List all folders in this folder
|
| 249 |
-
query = f"'{folder_id}' in parents and mimeType = '{self.MIME_TYPES['folder']}'"
|
| 250 |
-
|
| 251 |
-
try:
|
| 252 |
-
results = self.service.files().list(
|
| 253 |
-
q=query,
|
| 254 |
-
spaces='drive',
|
| 255 |
-
fields=f'files({self.FOLDER_FIELDS})' if include_metadata else 'files(id, name)',
|
| 256 |
-
pageSize=1000
|
| 257 |
-
).execute()
|
| 258 |
-
|
| 259 |
-
folders = results.get('files', [])
|
| 260 |
-
|
| 261 |
-
# Format metadata if requested
|
| 262 |
-
if include_metadata and folders:
|
| 263 |
-
folders = [self._format_metadata(folder) for folder in folders]
|
| 264 |
-
|
| 265 |
-
if folders:
|
| 266 |
-
print(f"Found {len(folders)} subfolders in '{folder_path}':")
|
| 267 |
-
for folder in folders:
|
| 268 |
-
if include_metadata and 'createdTimeFormatted' in folder:
|
| 269 |
-
print(f" - {folder['name']} (Created: {folder['createdTimeFormatted']})")
|
| 270 |
-
else:
|
| 271 |
-
print(f" - {folder['name']}")
|
| 272 |
-
else:
|
| 273 |
-
print(f"No subfolders found in '{folder_path}'")
|
| 274 |
-
|
| 275 |
-
return folders
|
| 276 |
-
|
| 277 |
-
except HttpError as error:
|
| 278 |
-
print(f"Error listing folders: {error}")
|
| 279 |
-
return []
|
| 280 |
-
|
| 281 |
-
def get_files_in_folder(self, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> List[Dict[str, Any]]:
|
| 282 |
-
"""
|
| 283 |
-
Get all files in a folder specified by path.
|
| 284 |
-
|
| 285 |
-
Args:
|
| 286 |
-
folder_path: Path to the folder, using '/' as separator
|
| 287 |
-
include_metadata: Whether to include detailed metadata (default: True)
|
| 288 |
-
include_content: Whether to include file content (default: False)
|
| 289 |
-
|
| 290 |
-
Returns:
|
| 291 |
-
List of file metadata dictionaries, optionally including file content
|
| 292 |
-
"""
|
| 293 |
-
# Get the folder ID
|
| 294 |
-
folder_id = self.get_folder_id(folder_path)
|
| 295 |
-
if not folder_id:
|
| 296 |
-
print(f"Could not find folder: '{folder_path}'")
|
| 297 |
-
return []
|
| 298 |
-
|
| 299 |
-
# List all non-folder files in this folder
|
| 300 |
-
query = f"'{folder_id}' in parents and mimeType != '{self.MIME_TYPES['folder']}'"
|
| 301 |
-
|
| 302 |
-
try:
|
| 303 |
-
results = self.service.files().list(
|
| 304 |
-
q=query,
|
| 305 |
-
spaces='drive',
|
| 306 |
-
fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
|
| 307 |
-
pageSize=1000
|
| 308 |
-
).execute()
|
| 309 |
-
|
| 310 |
-
files = results.get('files', [])
|
| 311 |
-
|
| 312 |
-
# Format metadata if requested
|
| 313 |
-
if include_metadata and files:
|
| 314 |
-
files = [self._format_metadata(file) for file in files]
|
| 315 |
-
|
| 316 |
-
# Add file content if requested
|
| 317 |
-
if include_content and files:
|
| 318 |
-
for file in files:
|
| 319 |
-
try:
|
| 320 |
-
# Skip files that are likely not text-based
|
| 321 |
-
if any(ext in file['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']):
|
| 322 |
-
print(f"Skipping content for non-text file: {file['name']}")
|
| 323 |
-
file['file_content'] = None
|
| 324 |
-
continue
|
| 325 |
-
|
| 326 |
-
# Read the file content
|
| 327 |
-
content = self.read_file_from_object(file)
|
| 328 |
-
file['file_content'] = content
|
| 329 |
-
|
| 330 |
-
if content is not None:
|
| 331 |
-
print(f"Successfully read content for: {file['name']} ({len(content)} characters)")
|
| 332 |
-
else:
|
| 333 |
-
print(f"Unable to read content for: {file['name']}")
|
| 334 |
-
except Exception as e:
|
| 335 |
-
print(f"Error reading content for {file['name']}: {e}")
|
| 336 |
-
file['file_content'] = None
|
| 337 |
-
|
| 338 |
-
if files:
|
| 339 |
-
print(f"Found {len(files)} files in '{folder_path}':")
|
| 340 |
-
for file in files:
|
| 341 |
-
if include_metadata and 'createdTimeFormatted' in file:
|
| 342 |
-
print(f" - {file['name']} ({file.get('fileType', 'Unknown')}, Created: {file['createdTimeFormatted']})")
|
| 343 |
-
else:
|
| 344 |
-
print(f" - {file['name']} ({file.get('mimeType', 'Unknown')})")
|
| 345 |
-
else:
|
| 346 |
-
print(f"No files found in '{folder_path}'")
|
| 347 |
-
|
| 348 |
-
return files
|
| 349 |
-
|
| 350 |
-
except HttpError as error:
|
| 351 |
-
print(f"Error listing files: {error}")
|
| 352 |
-
return []
|
| 353 |
-
|
| 354 |
-
def get_file(self, file_name: str, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> Optional[Dict[str, Any]]:
|
| 355 |
-
"""
|
| 356 |
-
Get a specific file by name from a folder.
|
| 357 |
-
|
| 358 |
-
Args:
|
| 359 |
-
file_name: Name of the file to get
|
| 360 |
-
folder_path: Path to the folder containing the file
|
| 361 |
-
include_metadata: Whether to include detailed metadata (default: True)
|
| 362 |
-
include_content: Whether to include file content (default: False)
|
| 363 |
-
|
| 364 |
-
Returns:
|
| 365 |
-
File metadata dictionary, optionally including content, or None if file not found
|
| 366 |
-
"""
|
| 367 |
-
# Get the folder ID
|
| 368 |
-
folder_id = self.get_folder_id(folder_path)
|
| 369 |
-
if not folder_id:
|
| 370 |
-
print(f"Could not find folder: '{folder_path}'")
|
| 371 |
-
return None
|
| 372 |
-
|
| 373 |
-
# Find the file by name in this folder
|
| 374 |
-
query = f"'{folder_id}' in parents and name = '{file_name}'"
|
| 375 |
-
|
| 376 |
-
try:
|
| 377 |
-
results = self.service.files().list(
|
| 378 |
-
q=query,
|
| 379 |
-
spaces='drive',
|
| 380 |
-
fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
|
| 381 |
-
pageSize=1
|
| 382 |
-
).execute()
|
| 383 |
-
|
| 384 |
-
files = results.get('files', [])
|
| 385 |
-
if not files:
|
| 386 |
-
# Try a more flexible search
|
| 387 |
-
query = query.replace(f"name = '{file_name}'", f"name contains '{file_name}'")
|
| 388 |
-
results = self.service.files().list(
|
| 389 |
-
q=query,
|
| 390 |
-
spaces='drive',
|
| 391 |
-
fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
|
| 392 |
-
pageSize=10
|
| 393 |
-
).execute()
|
| 394 |
-
|
| 395 |
-
files = results.get('files', [])
|
| 396 |
-
if not files:
|
| 397 |
-
print(f"Could not find file '{file_name}' in '{folder_path}'")
|
| 398 |
-
return None
|
| 399 |
-
|
| 400 |
-
# Use the first match
|
| 401 |
-
file = files[0]
|
| 402 |
-
|
| 403 |
-
# Format metadata if requested
|
| 404 |
-
if include_metadata:
|
| 405 |
-
file = self._format_metadata(file)
|
| 406 |
-
|
| 407 |
-
# Add file content if requested
|
| 408 |
-
if include_content:
|
| 409 |
-
try:
|
| 410 |
-
# Skip files that are likely not text-based
|
| 411 |
-
if any(ext in file['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']):
|
| 412 |
-
print(f"Skipping content for non-text file: {file['name']}")
|
| 413 |
-
file['file_content'] = None
|
| 414 |
-
else:
|
| 415 |
-
# Read the file content
|
| 416 |
-
content = self.read_file_from_object(file)
|
| 417 |
-
file['file_content'] = content
|
| 418 |
-
|
| 419 |
-
if content is not None:
|
| 420 |
-
print(f"Successfully read content for: {file['name']} ({len(content)} characters)")
|
| 421 |
-
else:
|
| 422 |
-
print(f"Unable to read content for: {file['name']}")
|
| 423 |
-
except Exception as e:
|
| 424 |
-
print(f"Error reading content for {file['name']}: {e}")
|
| 425 |
-
file['file_content'] = None
|
| 426 |
-
|
| 427 |
-
print(f"Found file: {file['name']}")
|
| 428 |
-
return file
|
| 429 |
-
|
| 430 |
-
except HttpError as error:
|
| 431 |
-
print(f"Error getting file: {error}")
|
| 432 |
-
return None
|
| 433 |
-
|
| 434 |
-
def get_all_files_in_folder(self, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> List[Dict[str, Any]]:
|
| 435 |
-
"""
|
| 436 |
-
Get all items (files and folders) in a folder specified by path.
|
| 437 |
-
|
| 438 |
-
Args:
|
| 439 |
-
folder_path: Path to the folder, using '/' as separator
|
| 440 |
-
include_metadata: Whether to include detailed metadata (default: True)
|
| 441 |
-
include_content: Whether to include file content (default: False)
|
| 442 |
-
|
| 443 |
-
Returns:
|
| 444 |
-
List of file and folder metadata dictionaries, optionally including file content
|
| 445 |
-
"""
|
| 446 |
-
# Get the folder ID
|
| 447 |
-
folder_id = self.get_folder_id(folder_path)
|
| 448 |
-
if not folder_id:
|
| 449 |
-
print(f"Could not find folder: '{folder_path}'")
|
| 450 |
-
return []
|
| 451 |
-
|
| 452 |
-
# List all items in this folder
|
| 453 |
-
query = f"'{folder_id}' in parents"
|
| 454 |
-
|
| 455 |
-
try:
|
| 456 |
-
results = self.service.files().list(
|
| 457 |
-
q=query,
|
| 458 |
-
spaces='drive',
|
| 459 |
-
fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
|
| 460 |
-
pageSize=1000
|
| 461 |
-
).execute()
|
| 462 |
-
|
| 463 |
-
items = results.get('files', [])
|
| 464 |
-
|
| 465 |
-
# Format metadata if requested
|
| 466 |
-
if include_metadata and items:
|
| 467 |
-
items = [self._format_metadata(item) for item in items]
|
| 468 |
-
|
| 469 |
-
# Add file content if requested
|
| 470 |
-
if include_content and items:
|
| 471 |
-
for item in items:
|
| 472 |
-
# Skip folders and non-text files
|
| 473 |
-
if item.get('mimeType') == self.MIME_TYPES['folder'] or any(ext in item['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']):
|
| 474 |
-
item['file_content'] = None
|
| 475 |
-
continue
|
| 476 |
-
|
| 477 |
-
try:
|
| 478 |
-
# Read the file content
|
| 479 |
-
content = self.read_file_from_object(item)
|
| 480 |
-
item['file_content'] = content
|
| 481 |
-
|
| 482 |
-
if content is not None:
|
| 483 |
-
print(f"Successfully read content for: {item['name']} ({len(content)} characters)")
|
| 484 |
-
else:
|
| 485 |
-
print(f"Unable to read content for: {item['name']}")
|
| 486 |
-
except Exception as e:
|
| 487 |
-
print(f"Error reading content for {item['name']}: {e}")
|
| 488 |
-
item['file_content'] = None
|
| 489 |
-
|
| 490 |
-
if items:
|
| 491 |
-
print(f"Found {len(items)} items in '{folder_path}':")
|
| 492 |
-
for item in items:
|
| 493 |
-
if include_metadata and 'createdTimeFormatted' in item:
|
| 494 |
-
item_type = 'Folder' if item.get('mimeType') == self.MIME_TYPES['folder'] else item.get('fileType', 'Unknown')
|
| 495 |
-
print(f" - {item['name']} ({item_type}, Created: {item['createdTimeFormatted']})")
|
| 496 |
-
else:
|
| 497 |
-
item_type = 'Folder' if item.get('mimeType') == self.MIME_TYPES['folder'] else item.get('mimeType', 'Unknown')
|
| 498 |
-
print(f" - {item['name']} ({item_type})")
|
| 499 |
-
else:
|
| 500 |
-
print(f"No items found in '{folder_path}'")
|
| 501 |
-
|
| 502 |
-
return items
|
| 503 |
-
|
| 504 |
-
except HttpError as error:
|
| 505 |
-
print(f"Error listing items: {error}")
|
| 506 |
-
return []
|
| 507 |
-
|
| 508 |
-
def file_exists(self, file_name: str, folder_path: str) -> bool:
|
| 509 |
-
"""
|
| 510 |
-
Check if a file exists at the specified path in Google Drive.
|
| 511 |
-
|
| 512 |
-
Args:
|
| 513 |
-
file_name: Name of the file to check
|
| 514 |
-
folder_path: Path to the folder containing the file
|
| 515 |
-
|
| 516 |
-
Returns:
|
| 517 |
-
True if the file exists, False otherwise
|
| 518 |
-
"""
|
| 519 |
-
# Get the folder ID
|
| 520 |
-
folder_id = self.get_folder_id(folder_path)
|
| 521 |
-
if not folder_id:
|
| 522 |
-
print(f"Could not find folder: '{folder_path}'")
|
| 523 |
-
return False
|
| 524 |
-
|
| 525 |
-
# Check if the file exists in this folder
|
| 526 |
-
query = f"'{folder_id}' in parents and name = '{file_name}'"
|
| 527 |
-
|
| 528 |
-
try:
|
| 529 |
-
results = self.service.files().list(
|
| 530 |
-
q=query,
|
| 531 |
-
spaces='drive',
|
| 532 |
-
fields='files(id, name)',
|
| 533 |
-
pageSize=1
|
| 534 |
-
).execute()
|
| 535 |
-
|
| 536 |
-
files = results.get('files', [])
|
| 537 |
-
if not files:
|
| 538 |
-
# Try a more flexible search
|
| 539 |
-
query = query.replace(f"name = '{file_name}'", f"name contains '{file_name}'")
|
| 540 |
-
results = self.service.files().list(
|
| 541 |
-
q=query,
|
| 542 |
-
spaces='drive',
|
| 543 |
-
fields='files(id, name)',
|
| 544 |
-
pageSize=10
|
| 545 |
-
).execute()
|
| 546 |
-
|
| 547 |
-
files = results.get('files', [])
|
| 548 |
-
if not files:
|
| 549 |
-
print(f"File '{file_name}' does not exist in '{folder_path}'")
|
| 550 |
-
return False
|
| 551 |
-
|
| 552 |
-
# File exists
|
| 553 |
-
print(f"File '{file_name}' exists in '{folder_path}'")
|
| 554 |
-
return True
|
| 555 |
-
|
| 556 |
-
except HttpError as error:
|
| 557 |
-
print(f"Error checking if file exists: {error}")
|
| 558 |
-
return False
|
| 559 |
-
|
| 560 |
-
def get_file_modified_time(self, file_name: str, folder_path: str) -> Optional[datetime.datetime]:
|
| 561 |
-
"""
|
| 562 |
-
Get the last modified time of a file.
|
| 563 |
-
|
| 564 |
-
Args:
|
| 565 |
-
file_name: Name of the file
|
| 566 |
-
folder_path: Path to the folder containing the file
|
| 567 |
-
|
| 568 |
-
Returns:
|
| 569 |
-
The last modified time as a datetime object, or None if the file doesn't exist
|
| 570 |
-
"""
|
| 571 |
-
# Get the file metadata
|
| 572 |
-
file = self.get_file(file_name, folder_path, include_metadata=True)
|
| 573 |
-
if not file:
|
| 574 |
-
return None
|
| 575 |
-
|
| 576 |
-
# Return the modified time
|
| 577 |
-
return file.get('modifiedTime')
|
| 578 |
-
|
| 579 |
-
def read_file_from_object(self, file_object: Dict[str, Any]) -> Optional[str]:
|
| 580 |
-
"""
|
| 581 |
-
Read the contents of a file using a file object.
|
| 582 |
-
|
| 583 |
-
Args:
|
| 584 |
-
file_object: A Google file object with at least 'id' and 'mimeType' fields
|
| 585 |
-
|
| 586 |
-
Returns:
|
| 587 |
-
The file contents as a string, or None if the file couldn't be read
|
| 588 |
-
"""
|
| 589 |
-
file_id = file_object.get('id')
|
| 590 |
-
mime_type = file_object.get('mimeType')
|
| 591 |
-
|
| 592 |
-
if not file_id or not mime_type:
|
| 593 |
-
print("File object is missing 'id' or 'mimeType' fields.")
|
| 594 |
-
return None
|
| 595 |
-
|
| 596 |
-
try:
|
| 597 |
-
# Read the file based on its type
|
| 598 |
-
if mime_type == self.MIME_TYPES['document']:
|
| 599 |
-
# Export Google Doc as plain text
|
| 600 |
-
response = self.service.files().export(
|
| 601 |
-
fileId=file_id,
|
| 602 |
-
mimeType='text/plain'
|
| 603 |
-
).execute()
|
| 604 |
-
return response.decode('utf-8')
|
| 605 |
-
|
| 606 |
-
else:
|
| 607 |
-
# Download regular files
|
| 608 |
-
request = self.service.files().get_media(fileId=file_id)
|
| 609 |
-
fh = io.BytesIO()
|
| 610 |
-
downloader = MediaIoBaseDownload(fh, request)
|
| 611 |
-
|
| 612 |
-
done = False
|
| 613 |
-
while not done:
|
| 614 |
-
_, done = downloader.next_chunk()
|
| 615 |
-
|
| 616 |
-
return fh.getvalue().decode('utf-8')
|
| 617 |
-
|
| 618 |
-
except HttpError as error:
|
| 619 |
-
print(f"Error reading file: {error}")
|
| 620 |
-
return None
|
| 621 |
-
except Exception as e:
|
| 622 |
-
print(f"Error decoding file content: {e}")
|
| 623 |
-
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/services/google_drive_basic_usage.py
DELETED
|
@@ -1,178 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Basic Usage Examples for EasyGoogleDrive
|
| 3 |
-
|
| 4 |
-
This file demonstrates how to use the EasyGoogleDrive class to interact with Google Drive.
|
| 5 |
-
It provides examples of the main functionality without printing all results to keep the output clean.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
from google_drive_access import EasyGoogleDrive
|
| 9 |
-
import datetime
|
| 10 |
-
|
| 11 |
-
def main():
|
| 12 |
-
"""
|
| 13 |
-
Main function demonstrating the basic usage of EasyGoogleDrive.
|
| 14 |
-
"""
|
| 15 |
-
# Initialize the Google Drive client
|
| 16 |
-
# This will prompt for authentication the first time it's run
|
| 17 |
-
drive = EasyGoogleDrive()
|
| 18 |
-
|
| 19 |
-
# Example folder path - replace with your actual folder path
|
| 20 |
-
folder_path = "Spring-2025-BAI"
|
| 21 |
-
subfolder_path = "Spring-2025-BAI/transcripts"
|
| 22 |
-
|
| 23 |
-
print("=== Basic Usage Examples for EasyGoogleDrive ===\n")
|
| 24 |
-
|
| 25 |
-
# Example 1: List folders in a directory
|
| 26 |
-
print("Example 1: Listing folders in a directory")
|
| 27 |
-
print("----------------------------------------")
|
| 28 |
-
folders = drive.get_folders_in_folder(folder_path)
|
| 29 |
-
|
| 30 |
-
# Print only the first 3 folders (if any exist)
|
| 31 |
-
if folders:
|
| 32 |
-
print(f"Found {len(folders)} folders. Showing first 3:")
|
| 33 |
-
for i, folder in enumerate(folders[:3]):
|
| 34 |
-
print(f" - {folder['name']} (Created: {folder.get('createdTimeFormatted', 'Unknown')})")
|
| 35 |
-
if len(folders) > 3:
|
| 36 |
-
print(f" ... and {len(folders) - 3} more folders")
|
| 37 |
-
else:
|
| 38 |
-
print("No folders found.")
|
| 39 |
-
print()
|
| 40 |
-
|
| 41 |
-
# Example 2: List files in a directory
|
| 42 |
-
print("Example 2: Listing files in a directory")
|
| 43 |
-
print("--------------------------------------")
|
| 44 |
-
files = drive.get_files_in_folder(subfolder_path)
|
| 45 |
-
|
| 46 |
-
# Print only the first 3 files (if any exist)
|
| 47 |
-
if files:
|
| 48 |
-
print(f"Found {len(files)} files. Showing first 3:")
|
| 49 |
-
for i, file in enumerate(files[:3]):
|
| 50 |
-
file_type = file.get('fileType', 'Unknown')
|
| 51 |
-
created_time = file.get('createdTimeFormatted', 'Unknown')
|
| 52 |
-
print(f" - {file['name']} ({file_type}, Created: {created_time})")
|
| 53 |
-
if len(files) > 3:
|
| 54 |
-
print(f" ... and {len(files) - 3} more files")
|
| 55 |
-
else:
|
| 56 |
-
print("No files found.")
|
| 57 |
-
print()
|
| 58 |
-
|
| 59 |
-
# Example 3: Get a specific file
|
| 60 |
-
print("Example 3: Getting a specific file")
|
| 61 |
-
print("--------------------------------")
|
| 62 |
-
# Use the first file found in the previous example, or a default if none were found
|
| 63 |
-
file_name = files[-1]['name'] if files and len(files) > 0 else "example.txt"
|
| 64 |
-
|
| 65 |
-
file = drive.get_file(file_name, subfolder_path, include_metadata=True)
|
| 66 |
-
if file:
|
| 67 |
-
print(f"File found: {file['name']}")
|
| 68 |
-
print(f" Type: {file.get('fileType', 'Unknown')}")
|
| 69 |
-
print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}")
|
| 70 |
-
print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}")
|
| 71 |
-
print(f" Size: {file.get('sizeFormatted', 'Unknown')}")
|
| 72 |
-
else:
|
| 73 |
-
print(f"File '{file_name}' not found.")
|
| 74 |
-
print()
|
| 75 |
-
|
| 76 |
-
# Example 4: Get all items in a folder (files and folders)
|
| 77 |
-
print("Example 4: Getting all items in a folder")
|
| 78 |
-
print("--------------------------------------")
|
| 79 |
-
all_items = drive.get_all_files_in_folder(folder_path)
|
| 80 |
-
|
| 81 |
-
# Print only the first 3 items (if any exist)
|
| 82 |
-
if all_items:
|
| 83 |
-
print(f"Found {len(all_items)} items. Showing first 3:")
|
| 84 |
-
for i, item in enumerate(all_items[:3]):
|
| 85 |
-
item_type = "Folder" if item.get('mimeType') == drive.MIME_TYPES['folder'] else item.get('fileType', 'Unknown')
|
| 86 |
-
created_time = item.get('createdTimeFormatted', 'Unknown')
|
| 87 |
-
print(f" - {item['name']} ({item_type}, Created: {created_time})")
|
| 88 |
-
if len(all_items) > 3:
|
| 89 |
-
print(f" ... and {len(all_items) - 3} more items")
|
| 90 |
-
else:
|
| 91 |
-
print("No items found.")
|
| 92 |
-
print()
|
| 93 |
-
|
| 94 |
-
# Example 5: Check if a file exists
|
| 95 |
-
print("Example 5: Checking if a file exists")
|
| 96 |
-
print("----------------------------------")
|
| 97 |
-
# Use the same file name from Example 3
|
| 98 |
-
file_to_check = file_name
|
| 99 |
-
|
| 100 |
-
exists = drive.file_exists(file_to_check, subfolder_path)
|
| 101 |
-
print(f"File '{file_to_check}' {'exists' if exists else 'does not exist'} in '{subfolder_path}'.")
|
| 102 |
-
print()
|
| 103 |
-
|
| 104 |
-
# Example 6: Get file modified time
|
| 105 |
-
print("Example 6: Getting file modified time")
|
| 106 |
-
print("-----------------------------------")
|
| 107 |
-
# Use the same file name from Example 3
|
| 108 |
-
file_to_check_time = file_name
|
| 109 |
-
|
| 110 |
-
modified_time = drive.get_file_modified_time(file_to_check_time, subfolder_path)
|
| 111 |
-
if modified_time:
|
| 112 |
-
print(f"File '{file_to_check_time}' was last modified on: {modified_time}")
|
| 113 |
-
else:
|
| 114 |
-
print(f"Could not get modified time for '{file_to_check_time}'.")
|
| 115 |
-
print()
|
| 116 |
-
|
| 117 |
-
# Example 7: Get file with content
|
| 118 |
-
print("Example 7: Getting file with content")
|
| 119 |
-
print("----------------------------------")
|
| 120 |
-
# Use the same file name from Example 3
|
| 121 |
-
file_with_content = file_name
|
| 122 |
-
|
| 123 |
-
file_with_content_obj = drive.get_file(file_with_content, subfolder_path, include_content=True)
|
| 124 |
-
if file_with_content_obj and 'file_content' in file_with_content_obj:
|
| 125 |
-
content = file_with_content_obj['file_content']
|
| 126 |
-
if content:
|
| 127 |
-
print(f"File '{file_with_content}' content (first 100 chars):")
|
| 128 |
-
print(f" {content[:100]}...")
|
| 129 |
-
else:
|
| 130 |
-
print(f"File '{file_with_content}' has no content or content could not be read.")
|
| 131 |
-
else:
|
| 132 |
-
print(f"File '{file_with_content}' not found or content could not be retrieved.")
|
| 133 |
-
print()
|
| 134 |
-
|
| 135 |
-
# Example 8: Get contents of all files in a folder
|
| 136 |
-
print("Example 8: Getting contents of all files in a folder")
|
| 137 |
-
print("------------------------------------------------")
|
| 138 |
-
# Get all files with content
|
| 139 |
-
all_files_with_content = drive.get_files_in_folder(subfolder_path, include_content=True)
|
| 140 |
-
|
| 141 |
-
if all_files_with_content:
|
| 142 |
-
print(f"Found {len(all_files_with_content)} files. Showing content preview for first 3:")
|
| 143 |
-
for i, file in enumerate(all_files_with_content[:3]):
|
| 144 |
-
print(f" File: {file['name']}")
|
| 145 |
-
if 'file_content' in file and file['file_content']:
|
| 146 |
-
content = file['file_content']
|
| 147 |
-
print(f" Content preview: {content[:50]}...")
|
| 148 |
-
else:
|
| 149 |
-
print(f" No content available or file is not text-based.")
|
| 150 |
-
|
| 151 |
-
if len(all_files_with_content) > 3:
|
| 152 |
-
print(f" ... and {len(all_files_with_content) - 3} more files with content")
|
| 153 |
-
else:
|
| 154 |
-
print("No files found or no content could be retrieved.")
|
| 155 |
-
print()
|
| 156 |
-
|
| 157 |
-
# Example 9: Get content from a specific file using read_file_from_object
|
| 158 |
-
print("Example 9: Getting content from a specific file using read_file_from_object")
|
| 159 |
-
print("------------------------------------------------------------------------")
|
| 160 |
-
# Get a file object first
|
| 161 |
-
file_obj = drive.get_file(file_name, subfolder_path)
|
| 162 |
-
|
| 163 |
-
if file_obj:
|
| 164 |
-
# Read the content directly from the file object
|
| 165 |
-
content = drive.read_file_from_object(file_obj)
|
| 166 |
-
if content:
|
| 167 |
-
print(f"File '{file_obj['name']}' content (first 100 chars):")
|
| 168 |
-
print(f" {content[:100]}...")
|
| 169 |
-
else:
|
| 170 |
-
print(f"File '{file_obj['name']}' has no content or content could not be read.")
|
| 171 |
-
else:
|
| 172 |
-
print(f"File '{file_name}' not found.")
|
| 173 |
-
print()
|
| 174 |
-
|
| 175 |
-
print("=== End of Examples ===")
|
| 176 |
-
|
| 177 |
-
if __name__ == "__main__":
|
| 178 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/services/schedule_service.py
CHANGED
|
@@ -2,6 +2,7 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
| 2 |
from apscheduler.triggers.cron import CronTrigger
|
| 3 |
from asyncio import create_task, iscoroutinefunction, to_thread
|
| 4 |
from datetime import datetime
|
|
|
|
| 5 |
from loguru import logger
|
| 6 |
from pydantic import BaseModel, PrivateAttr
|
| 7 |
from pytz import timezone
|
|
@@ -47,11 +48,21 @@ class ScheduleService(BaseModel):
|
|
| 47 |
|
| 48 |
def start(self: Self) -> None:
|
| 49 |
self._scheduler.start()
|
| 50 |
-
logger.info("Started scheduler.")
|
| 51 |
|
| 52 |
def stop(self: Self) -> None:
|
| 53 |
if self._scheduler.running:
|
| 54 |
-
self._scheduler.shutdown(
|
| 55 |
-
logger.info("Shut down scheduler.")
|
| 56 |
else:
|
| 57 |
logger.debug("The scheduler is not running. There is no scheduler to shut down.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
from apscheduler.triggers.cron import CronTrigger
|
| 3 |
from asyncio import create_task, iscoroutinefunction, to_thread
|
| 4 |
from datetime import datetime
|
| 5 |
+
from dependency_injector.resources import Resource
|
| 6 |
from loguru import logger
|
| 7 |
from pydantic import BaseModel, PrivateAttr
|
| 8 |
from pytz import timezone
|
|
|
|
| 48 |
|
| 49 |
def start(self: Self) -> None:
|
| 50 |
self._scheduler.start()
|
|
|
|
| 51 |
|
| 52 |
def stop(self: Self) -> None:
|
| 53 |
if self._scheduler.running:
|
| 54 |
+
self._scheduler.shutdown()
|
|
|
|
| 55 |
else:
|
| 56 |
logger.debug("The scheduler is not running. There is no scheduler to shut down.")
|
| 57 |
+
|
| 58 |
+
class ScheduleServiceResource(Resource):
|
| 59 |
+
def init(self: Self, settings: Settings) -> ScheduleService:
|
| 60 |
+
logger.info("Starting scheduler…")
|
| 61 |
+
schedule_service = ScheduleService(settings=settings)
|
| 62 |
+
schedule_service.start()
|
| 63 |
+
return schedule_service
|
| 64 |
+
|
| 65 |
+
def shutdown(self: Self, schedule_service: ScheduleService) -> None:
|
| 66 |
+
"""Stop scheduler on shutdown."""
|
| 67 |
+
schedule_service.stop()
|
| 68 |
+
logger.info("Stopped scheduler.")
|
src/ctp_slack_bot/utils/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from ctp_slack_bot.utils.secret_stripper import sanitize_mongo_db_uri
|
src/ctp_slack_bot/utils/secret_stripper.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from urllib.parse import urlparse, urlunparse
|
| 2 |
+
|
| 3 |
+
def sanitize_mongo_db_uri(uri: str) -> str:
|
| 4 |
+
parts = urlparse(uri)
|
| 5 |
+
sanitized_netloc = ":".join(filter(None, (parts.hostname, parts.port)))
|
| 6 |
+
return urlunparse((parts.scheme, sanitized_netloc, parts.path, parts.params, parts.query, parts.fragment))
|