Spaces:
Runtime error
Runtime error
Clean up and restore ability to shut down gracefully
Browse files- pyproject.toml +2 -3
- src/ctp_slack_bot/app.py +31 -12
- src/ctp_slack_bot/containers.py +2 -2
- src/ctp_slack_bot/core/logging.py +4 -2
- src/ctp_slack_bot/core/response_rendering.py +0 -13
- src/ctp_slack_bot/db/mongo_db.py +44 -49
- src/ctp_slack_bot/services/GOOGLE_DRIVE_README.md +0 -228
- src/ctp_slack_bot/services/google_drive_access.py +0 -623
- src/ctp_slack_bot/services/google_drive_basic_usage.py +0 -178
- src/ctp_slack_bot/services/schedule_service.py +14 -3
- src/ctp_slack_bot/utils/__init__.py +1 -0
- src/ctp_slack_bot/utils/secret_stripper.py +6 -0
pyproject.toml
CHANGED
@@ -24,7 +24,6 @@ dependencies = [
|
|
24 |
"more-itertools>=10.6.0",
|
25 |
"python-dotenv>=1.1.0",
|
26 |
"loguru>=0.7.3",
|
27 |
-
"fastapi>=0.115.12",
|
28 |
"dependency-injector>=4.46.0",
|
29 |
"pytz>=2025.2",
|
30 |
"apscheduler>=3.11.0",
|
@@ -36,7 +35,7 @@ dependencies = [
|
|
36 |
"slack_bolt>=1.23.0",
|
37 |
"pymongo>=4.11.3 ",
|
38 |
"motor>=3.7.0",
|
39 |
-
"openai>=1.70.0"
|
40 |
"google-api-python-client>=2.167.0",
|
41 |
"google-auth>=2.39.0",
|
42 |
"google-auth-oauthlib>=1.2.1"
|
@@ -50,7 +49,7 @@ dev = [
|
|
50 |
"types-pytz>=2025.2",
|
51 |
"black>=25.1.0",
|
52 |
"isort>=6.0.1",
|
53 |
-
"ruff>=0.11.4"
|
54 |
]
|
55 |
|
56 |
[project.urls]
|
|
|
24 |
"more-itertools>=10.6.0",
|
25 |
"python-dotenv>=1.1.0",
|
26 |
"loguru>=0.7.3",
|
|
|
27 |
"dependency-injector>=4.46.0",
|
28 |
"pytz>=2025.2",
|
29 |
"apscheduler>=3.11.0",
|
|
|
35 |
"slack_bolt>=1.23.0",
|
36 |
"pymongo>=4.11.3 ",
|
37 |
"motor>=3.7.0",
|
38 |
+
"openai>=1.70.0",
|
39 |
"google-api-python-client>=2.167.0",
|
40 |
"google-auth>=2.39.0",
|
41 |
"google-auth-oauthlib>=1.2.1"
|
|
|
49 |
"types-pytz>=2025.2",
|
50 |
"black>=25.1.0",
|
51 |
"isort>=6.0.1",
|
52 |
+
"ruff>=0.11.4"
|
53 |
]
|
54 |
|
55 |
[project.urls]
|
src/ctp_slack_bot/app.py
CHANGED
@@ -1,9 +1,24 @@
|
|
1 |
-
from asyncio import run
|
2 |
from loguru import logger
|
|
|
|
|
3 |
|
4 |
from ctp_slack_bot.containers import Container
|
5 |
from ctp_slack_bot.core.logging import setup_logging
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
async def main() -> None:
|
8 |
# Setup logging.
|
9 |
setup_logging()
|
@@ -16,19 +31,23 @@ async def main() -> None:
|
|
16 |
# Kick off services which should be active from the start.
|
17 |
container.content_ingestion_service()
|
18 |
container.question_dispatch_service()
|
|
|
19 |
|
20 |
-
# Start the
|
21 |
-
schedule_service = container.schedule_service()
|
22 |
-
schedule_service.start()
|
23 |
-
|
24 |
-
# Start the Slack socket mode handler in a background thread.
|
25 |
socket_mode_handler = container.socket_mode_handler()
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
if __name__ == "__main__":
|
34 |
run(main())
|
|
|
1 |
+
from asyncio import all_tasks, CancelledError, create_task, current_task, get_running_loop, run
|
2 |
from loguru import logger
|
3 |
+
from signal import SIGINT, SIGTERM
|
4 |
+
from typing import Any, Callable
|
5 |
|
6 |
from ctp_slack_bot.containers import Container
|
7 |
from ctp_slack_bot.core.logging import setup_logging
|
8 |
|
9 |
+
async def handle_shutdown_signal() -> None:
|
10 |
+
logger.info("Received shutdown signal.")
|
11 |
+
for task in all_tasks():
|
12 |
+
if task is not current_task() and not task.done():
|
13 |
+
task.cancel()
|
14 |
+
logger.trace("Cancelled task {}.", task.get_name())
|
15 |
+
logger.info("Cancelled all tasks.")
|
16 |
+
|
17 |
+
def create_shutdown_signal_handler() -> Callable[[], None]:
|
18 |
+
def shutdown_signal_handler() -> None:
|
19 |
+
create_task(handle_shutdown_signal())
|
20 |
+
return shutdown_signal_handler
|
21 |
+
|
22 |
async def main() -> None:
|
23 |
# Setup logging.
|
24 |
setup_logging()
|
|
|
31 |
# Kick off services which should be active from the start.
|
32 |
container.content_ingestion_service()
|
33 |
container.question_dispatch_service()
|
34 |
+
container.schedule_service()
|
35 |
|
36 |
+
# Start the Slack socket mode handler in the background.
|
|
|
|
|
|
|
|
|
37 |
socket_mode_handler = container.socket_mode_handler()
|
38 |
+
slack_bolt_task = create_task(socket_mode_handler.start_async())
|
39 |
+
shutdown_signal_handler = create_shutdown_signal_handler()
|
40 |
+
loop = get_running_loop()
|
41 |
+
loop.add_signal_handler(SIGINT, shutdown_signal_handler)
|
42 |
+
loop.add_signal_handler(SIGTERM, shutdown_signal_handler)
|
43 |
+
try:
|
44 |
+
logger.info("Starting Slack Socket Mode handler…")
|
45 |
+
await slack_bolt_task
|
46 |
+
except CancelledError:
|
47 |
+
logger.info("Shutting down application…")
|
48 |
+
finally:
|
49 |
+
await socket_mode_handler.close_async()
|
50 |
+
await container.shutdown_resources()
|
51 |
|
52 |
if __name__ == "__main__":
|
53 |
run(main())
|
src/ctp_slack_bot/containers.py
CHANGED
@@ -13,7 +13,7 @@ from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelServi
|
|
13 |
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
14 |
from ctp_slack_bot.services.language_model_service import LanguageModelService
|
15 |
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
16 |
-
from ctp_slack_bot.services.schedule_service import
|
17 |
from ctp_slack_bot.services.slack_service import SlackServiceResource
|
18 |
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
19 |
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
@@ -22,7 +22,7 @@ from ctp_slack_bot.services.vectorization_service import VectorizationService
|
|
22 |
class Container(DeclarativeContainer):
|
23 |
settings = Singleton(Settings)
|
24 |
event_brokerage_service = Singleton(EventBrokerageService)
|
25 |
-
schedule_service =
|
26 |
mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
|
27 |
vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
|
28 |
vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
|
|
|
13 |
from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService
|
14 |
from ctp_slack_bot.services.language_model_service import LanguageModelService
|
15 |
from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService
|
16 |
+
from ctp_slack_bot.services.schedule_service import ScheduleServiceResource
|
17 |
from ctp_slack_bot.services.slack_service import SlackServiceResource
|
18 |
from ctp_slack_bot.services.vector_database_service import VectorDatabaseService
|
19 |
from ctp_slack_bot.services.vectorization_service import VectorizationService
|
|
|
22 |
class Container(DeclarativeContainer):
|
23 |
settings = Singleton(Settings)
|
24 |
event_brokerage_service = Singleton(EventBrokerageService)
|
25 |
+
schedule_service = Resource(ScheduleServiceResource, settings=settings)
|
26 |
mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database.
|
27 |
vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db)
|
28 |
vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db)
|
src/ctp_slack_bot/core/logging.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord
|
2 |
from loguru import logger
|
3 |
from os import getenv
|
4 |
from sys import stderr
|
@@ -90,7 +90,9 @@ def setup_logging() -> None:
|
|
90 |
basicConfig(handlers=[InterceptHandler()], level=0, force=True)
|
91 |
|
92 |
# Update logging levels for some noisy libraries.
|
93 |
-
for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "
|
94 |
getLogger(logger_name).setLevel(INFO)
|
|
|
|
|
95 |
|
96 |
logger.info(f"Logging configured with level {log_level}")
|
|
|
1 |
+
from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord, WARNING
|
2 |
from loguru import logger
|
3 |
from os import getenv
|
4 |
from sys import stderr
|
|
|
90 |
basicConfig(handlers=[InterceptHandler()], level=0, force=True)
|
91 |
|
92 |
# Update logging levels for some noisy libraries.
|
93 |
+
for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "pymongo"):
|
94 |
getLogger(logger_name).setLevel(INFO)
|
95 |
+
for logger_name in ("apscheduler"):
|
96 |
+
getLogger(logger_name).setLevel(WARNING)
|
97 |
|
98 |
logger.info(f"Logging configured with level {log_level}")
|
src/ctp_slack_bot/core/response_rendering.py
DELETED
@@ -1,13 +0,0 @@
|
|
1 |
-
from json import dumps
|
2 |
-
from starlette.responses import JSONResponse
|
3 |
-
from typing import Any, Self
|
4 |
-
|
5 |
-
class PrettyJSONResponse(JSONResponse):
|
6 |
-
def render(self: Self, content: Any) -> bytes:
|
7 |
-
return dumps(
|
8 |
-
content,
|
9 |
-
ensure_ascii=False,
|
10 |
-
allow_nan=False,
|
11 |
-
indent=4,
|
12 |
-
separators=(", ", ": "),
|
13 |
-
).encode()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/db/mongo_db.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
-
from
|
|
|
2 |
from motor.motor_asyncio import AsyncIOMotorClient
|
3 |
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
4 |
from pymongo.operations import SearchIndexModel
|
5 |
from loguru import logger
|
6 |
from pydantic import BaseModel, PrivateAttr
|
7 |
from typing import Any, Dict, Optional, Self
|
8 |
-
import asyncio
|
9 |
|
10 |
from ctp_slack_bot.core.config import Settings
|
|
|
11 |
|
12 |
class MongoDB(BaseModel):
|
13 |
"""
|
@@ -16,23 +17,20 @@ class MongoDB(BaseModel):
|
|
16 |
settings: Settings
|
17 |
_client: PrivateAttr = PrivateAttr()
|
18 |
_db: PrivateAttr = PrivateAttr()
|
19 |
-
|
20 |
class Config:
|
21 |
arbitrary_types_allowed = True
|
22 |
-
|
23 |
def __init__(self: Self, **data: Dict[str, Any]) -> None:
|
24 |
super().__init__(**data)
|
25 |
logger.debug("Created {}", self.__class__.__name__)
|
26 |
-
|
27 |
def connect(self: Self) -> None:
|
28 |
"""Initialize MongoDB client with settings."""
|
29 |
try:
|
30 |
connection_string = self.settings.MONGODB_URI.get_secret_value()
|
31 |
-
logger.debug("Connecting to MongoDB using URI: {}", connection_string
|
32 |
-
|
33 |
-
'[REDACTED]'
|
34 |
-
))
|
35 |
-
|
36 |
# Create client with appropriate settings
|
37 |
self._client = AsyncIOMotorClient(
|
38 |
connection_string,
|
@@ -43,48 +41,48 @@ class MongoDB(BaseModel):
|
|
43 |
retryWrites=True,
|
44 |
w="majority"
|
45 |
)
|
46 |
-
|
47 |
# Set database
|
48 |
db_name = self.settings.MONGODB_NAME
|
49 |
-
|
50 |
self._db = self._client[db_name]
|
51 |
logger.debug("MongoDB client initialized for database: {}", db_name)
|
52 |
-
|
53 |
except Exception as e:
|
54 |
logger.error("Failed to initialize MongoDB client: {}", e)
|
55 |
self._client = None
|
56 |
self._db = None
|
57 |
raise
|
58 |
-
|
59 |
@property
|
60 |
def client(self: Self) -> AsyncIOMotorClient:
|
61 |
"""Get the MongoDB client instance."""
|
62 |
if not hasattr(self, '_client') or self._client is None:
|
63 |
-
logger.warning("MongoDB client not initialized. Attempting to initialize
|
64 |
self.connect()
|
65 |
if not hasattr(self, '_client') or self._client is None:
|
66 |
-
raise ConnectionError("Failed to initialize MongoDB client")
|
67 |
return self._client
|
68 |
-
|
69 |
@property
|
70 |
def db(self: Self) -> Any:
|
71 |
"""Get the MongoDB database instance."""
|
72 |
if not hasattr(self, '_db') or self._db is None:
|
73 |
-
logger.warning("MongoDB database not initialized. Attempting to initialize client
|
74 |
self.connect()
|
75 |
if not hasattr(self, '_db') or self._db is None:
|
76 |
-
raise ConnectionError("Failed to initialize MongoDB database")
|
77 |
return self._db
|
78 |
-
|
79 |
async def ping(self: Self) -> bool:
|
80 |
"""Check if MongoDB connection is alive."""
|
81 |
try:
|
82 |
# Get client to ensure we're connected
|
83 |
client = self.client
|
84 |
-
|
85 |
# Try a simple ping command
|
86 |
await client.admin.command('ping')
|
87 |
-
logger.debug("MongoDB connection is active")
|
88 |
return True
|
89 |
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
90 |
logger.error("MongoDB connection failed: {}", e)
|
@@ -92,7 +90,7 @@ class MongoDB(BaseModel):
|
|
92 |
except Exception as e:
|
93 |
logger.error("Unexpected error during MongoDB ping: {}", e)
|
94 |
return False
|
95 |
-
|
96 |
async def get_collection(self: Self, name: str) -> Any:
|
97 |
"""
|
98 |
Get a collection by name with validation.
|
@@ -100,29 +98,29 @@ class MongoDB(BaseModel):
|
|
100 |
"""
|
101 |
# First ensure we can connect at all
|
102 |
if not await self.ping():
|
103 |
-
logger.error("Cannot get collection '{}'
|
104 |
-
raise ConnectionError("MongoDB connection is not available")
|
105 |
-
|
106 |
try:
|
107 |
# Get all collection names to check if this one exists
|
108 |
-
logger.debug("Checking if collection '{}' exists", name)
|
109 |
collection_names = await self.db.list_collection_names()
|
110 |
|
111 |
if name not in collection_names:
|
112 |
-
logger.info("Collection '{}' does not exist. Creating it
|
113 |
# Create the collection
|
114 |
await self.db.create_collection(name)
|
115 |
-
logger.debug("Successfully created collection
|
116 |
else:
|
117 |
-
logger.debug("Collection '{}' already exists", name)
|
118 |
-
|
119 |
# Get and return the collection
|
120 |
collection = self.db[name]
|
121 |
return collection
|
122 |
except Exception as e:
|
123 |
logger.error("Error accessing collection '{}': {}", name, e)
|
124 |
raise
|
125 |
-
|
126 |
async def create_indexes(self: Self, collection_name: str) -> None:
|
127 |
"""
|
128 |
Create a vector search index on a collection.
|
@@ -131,7 +129,7 @@ class MongoDB(BaseModel):
|
|
131 |
collection_name: Name of the collection
|
132 |
"""
|
133 |
collection = await self.get_collection(collection_name)
|
134 |
-
|
135 |
try:
|
136 |
# Create search index model using MongoDB's recommended approach
|
137 |
search_index_model = SearchIndexModel(
|
@@ -149,41 +147,38 @@ class MongoDB(BaseModel):
|
|
149 |
name=f"{collection_name}_vector_index",
|
150 |
type="vectorSearch"
|
151 |
)
|
152 |
-
|
153 |
# Create the search index using the motor collection
|
154 |
result = await collection.create_search_index(search_index_model)
|
155 |
-
logger.info("Vector search index '{}' created for collection {}", result, collection_name)
|
156 |
-
|
157 |
except Exception as e:
|
158 |
if "command not found" in str(e).lower():
|
159 |
logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
|
160 |
# Create a fallback standard index on embedding field
|
161 |
await collection.create_index("embedding")
|
162 |
-
logger.info("Created standard index on 'embedding' field as fallback")
|
163 |
else:
|
164 |
logger.error("Failed to create vector index: {}", e)
|
165 |
raise
|
166 |
-
|
167 |
async def close(self: Self) -> None:
|
168 |
"""Close MongoDB connection."""
|
169 |
if self._client:
|
170 |
self._client.close()
|
171 |
-
logger.info("MongoDB connection
|
172 |
self._client = None
|
173 |
self._db = None
|
174 |
|
175 |
-
class MongoDBResource(
|
176 |
-
def init(self: Self, settings: Settings) -> MongoDB:
|
177 |
logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
|
178 |
mongo_db = MongoDB(settings=settings)
|
179 |
mongo_db.connect()
|
180 |
-
|
181 |
-
# Test the connection asynchronously - this will run after init returns
|
182 |
-
asyncio.create_task(self._test_connection(mongo_db))
|
183 |
-
|
184 |
return mongo_db
|
185 |
-
|
186 |
-
async def _test_connection(self, mongo_db: MongoDB) -> None:
|
187 |
"""Test MongoDB connection and log the result."""
|
188 |
try:
|
189 |
is_connected = await mongo_db.ping()
|
@@ -193,11 +188,11 @@ class MongoDBResource(Resource):
|
|
193 |
logger.error("MongoDB connection test failed!")
|
194 |
except Exception as e:
|
195 |
logger.error("Error testing MongoDB connection: {}", e)
|
196 |
-
|
|
|
197 |
async def shutdown(self: Self, mongo_db: MongoDB) -> None:
|
198 |
"""Close MongoDB connection on shutdown."""
|
199 |
try:
|
200 |
-
logger.info("Closing MongoDB connection...")
|
201 |
await mongo_db.close()
|
202 |
except Exception as e:
|
203 |
logger.error("Error closing MongoDB connection: {}", e)
|
|
|
1 |
+
from asyncio import create_task
|
2 |
+
from dependency_injector.resources import AsyncResource
|
3 |
from motor.motor_asyncio import AsyncIOMotorClient
|
4 |
from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError
|
5 |
from pymongo.operations import SearchIndexModel
|
6 |
from loguru import logger
|
7 |
from pydantic import BaseModel, PrivateAttr
|
8 |
from typing import Any, Dict, Optional, Self
|
|
|
9 |
|
10 |
from ctp_slack_bot.core.config import Settings
|
11 |
+
from ctp_slack_bot.utils import sanitize_mongo_db_uri
|
12 |
|
13 |
class MongoDB(BaseModel):
|
14 |
"""
|
|
|
17 |
settings: Settings
|
18 |
_client: PrivateAttr = PrivateAttr()
|
19 |
_db: PrivateAttr = PrivateAttr()
|
20 |
+
|
21 |
class Config:
|
22 |
arbitrary_types_allowed = True
|
23 |
+
|
24 |
def __init__(self: Self, **data: Dict[str, Any]) -> None:
|
25 |
super().__init__(**data)
|
26 |
logger.debug("Created {}", self.__class__.__name__)
|
27 |
+
|
28 |
def connect(self: Self) -> None:
|
29 |
"""Initialize MongoDB client with settings."""
|
30 |
try:
|
31 |
connection_string = self.settings.MONGODB_URI.get_secret_value()
|
32 |
+
logger.debug("Connecting to MongoDB using URI: {}", sanitize_mongo_db_uri(connection_string))
|
33 |
+
|
|
|
|
|
|
|
34 |
# Create client with appropriate settings
|
35 |
self._client = AsyncIOMotorClient(
|
36 |
connection_string,
|
|
|
41 |
retryWrites=True,
|
42 |
w="majority"
|
43 |
)
|
44 |
+
|
45 |
# Set database
|
46 |
db_name = self.settings.MONGODB_NAME
|
47 |
+
|
48 |
self._db = self._client[db_name]
|
49 |
logger.debug("MongoDB client initialized for database: {}", db_name)
|
50 |
+
|
51 |
except Exception as e:
|
52 |
logger.error("Failed to initialize MongoDB client: {}", e)
|
53 |
self._client = None
|
54 |
self._db = None
|
55 |
raise
|
56 |
+
|
57 |
@property
|
58 |
def client(self: Self) -> AsyncIOMotorClient:
|
59 |
"""Get the MongoDB client instance."""
|
60 |
if not hasattr(self, '_client') or self._client is None:
|
61 |
+
logger.warning("MongoDB client not initialized. Attempting to initialize…")
|
62 |
self.connect()
|
63 |
if not hasattr(self, '_client') or self._client is None:
|
64 |
+
raise ConnectionError("Failed to initialize MongoDB client.")
|
65 |
return self._client
|
66 |
+
|
67 |
@property
|
68 |
def db(self: Self) -> Any:
|
69 |
"""Get the MongoDB database instance."""
|
70 |
if not hasattr(self, '_db') or self._db is None:
|
71 |
+
logger.warning("MongoDB database not initialized. Attempting to initialize client…")
|
72 |
self.connect()
|
73 |
if not hasattr(self, '_db') or self._db is None:
|
74 |
+
raise ConnectionError("Failed to initialize MongoDB database.")
|
75 |
return self._db
|
76 |
+
|
77 |
async def ping(self: Self) -> bool:
|
78 |
"""Check if MongoDB connection is alive."""
|
79 |
try:
|
80 |
# Get client to ensure we're connected
|
81 |
client = self.client
|
82 |
+
|
83 |
# Try a simple ping command
|
84 |
await client.admin.command('ping')
|
85 |
+
logger.debug("MongoDB connection is active!")
|
86 |
return True
|
87 |
except (ConnectionFailure, ServerSelectionTimeoutError) as e:
|
88 |
logger.error("MongoDB connection failed: {}", e)
|
|
|
90 |
except Exception as e:
|
91 |
logger.error("Unexpected error during MongoDB ping: {}", e)
|
92 |
return False
|
93 |
+
|
94 |
async def get_collection(self: Self, name: str) -> Any:
|
95 |
"""
|
96 |
Get a collection by name with validation.
|
|
|
98 |
"""
|
99 |
# First ensure we can connect at all
|
100 |
if not await self.ping():
|
101 |
+
logger.error("Cannot get collection '{}' because a MongoDB connection is not available.", name)
|
102 |
+
raise ConnectionError("MongoDB connection is not available.")
|
103 |
+
|
104 |
try:
|
105 |
# Get all collection names to check if this one exists
|
106 |
+
logger.debug("Checking if collection '{}' exists…", name)
|
107 |
collection_names = await self.db.list_collection_names()
|
108 |
|
109 |
if name not in collection_names:
|
110 |
+
logger.info("Collection '{}' does not exist. Creating it…", name)
|
111 |
# Create the collection
|
112 |
await self.db.create_collection(name)
|
113 |
+
logger.debug("Successfully created collection: {}", name)
|
114 |
else:
|
115 |
+
logger.debug("Collection '{}' already exists!", name)
|
116 |
+
|
117 |
# Get and return the collection
|
118 |
collection = self.db[name]
|
119 |
return collection
|
120 |
except Exception as e:
|
121 |
logger.error("Error accessing collection '{}': {}", name, e)
|
122 |
raise
|
123 |
+
|
124 |
async def create_indexes(self: Self, collection_name: str) -> None:
|
125 |
"""
|
126 |
Create a vector search index on a collection.
|
|
|
129 |
collection_name: Name of the collection
|
130 |
"""
|
131 |
collection = await self.get_collection(collection_name)
|
132 |
+
|
133 |
try:
|
134 |
# Create search index model using MongoDB's recommended approach
|
135 |
search_index_model = SearchIndexModel(
|
|
|
147 |
name=f"{collection_name}_vector_index",
|
148 |
type="vectorSearch"
|
149 |
)
|
150 |
+
|
151 |
# Create the search index using the motor collection
|
152 |
result = await collection.create_search_index(search_index_model)
|
153 |
+
logger.info("Vector search index '{}' created for collection {}.", result, collection_name)
|
154 |
+
|
155 |
except Exception as e:
|
156 |
if "command not found" in str(e).lower():
|
157 |
logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.")
|
158 |
# Create a fallback standard index on embedding field
|
159 |
await collection.create_index("embedding")
|
160 |
+
logger.info("Created standard index on 'embedding' field as fallback.")
|
161 |
else:
|
162 |
logger.error("Failed to create vector index: {}", e)
|
163 |
raise
|
164 |
+
|
165 |
async def close(self: Self) -> None:
|
166 |
"""Close MongoDB connection."""
|
167 |
if self._client:
|
168 |
self._client.close()
|
169 |
+
logger.info("Closed MongoDB connection.")
|
170 |
self._client = None
|
171 |
self._db = None
|
172 |
|
173 |
+
class MongoDBResource(AsyncResource):
|
174 |
+
async def init(self: Self, settings: Settings) -> MongoDB:
|
175 |
logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME)
|
176 |
mongo_db = MongoDB(settings=settings)
|
177 |
mongo_db.connect()
|
178 |
+
await self._test_connection(mongo_db)
|
|
|
|
|
|
|
179 |
return mongo_db
|
180 |
+
|
181 |
+
async def _test_connection(self: Self, mongo_db: MongoDB) -> None:
|
182 |
"""Test MongoDB connection and log the result."""
|
183 |
try:
|
184 |
is_connected = await mongo_db.ping()
|
|
|
188 |
logger.error("MongoDB connection test failed!")
|
189 |
except Exception as e:
|
190 |
logger.error("Error testing MongoDB connection: {}", e)
|
191 |
+
raise
|
192 |
+
|
193 |
async def shutdown(self: Self, mongo_db: MongoDB) -> None:
|
194 |
"""Close MongoDB connection on shutdown."""
|
195 |
try:
|
|
|
196 |
await mongo_db.close()
|
197 |
except Exception as e:
|
198 |
logger.error("Error closing MongoDB connection: {}", e)
|
src/ctp_slack_bot/services/GOOGLE_DRIVE_README.md
DELETED
@@ -1,228 +0,0 @@
|
|
1 |
-
# Google Drive Access Module
|
2 |
-
|
3 |
-
This Python module provides a simplified way to interact with Google Drive, focusing on easy access to files in nested folders using path-like syntax. It handles various Google file formats and provides comprehensive metadata for files and folders.
|
4 |
-
|
5 |
-
## Features
|
6 |
-
|
7 |
-
- **Path-based folder access**: Access files using simple paths like `folder1/folder2/folder3`
|
8 |
-
- **Efficient caching**: Folder IDs are cached to improve performance
|
9 |
-
- **Comprehensive metadata**: Get detailed information about files and folders
|
10 |
-
- **Read various file types**:
|
11 |
-
- Text files
|
12 |
-
- Google Docs
|
13 |
-
- VTT files
|
14 |
-
- **Robust folder finding**: Works with exact and partial name matching
|
15 |
-
- **Simple API**: Designed for ease of use with minimal code
|
16 |
-
|
17 |
-
## Setup Instructions
|
18 |
-
|
19 |
-
### 1. Create a Google Cloud Project
|
20 |
-
|
21 |
-
1. Go to the [Google Cloud Console](https://console.cloud.google.com/)
|
22 |
-
2. Click on the project dropdown at the top of the page and select "New Project"
|
23 |
-
3. Enter a project name and click "Create"
|
24 |
-
4. Once created, make sure your new project is selected in the dropdown
|
25 |
-
|
26 |
-
### 2. Enable the Google Drive API
|
27 |
-
|
28 |
-
1. In the Google Cloud Console, navigate to "APIs & Services" > "Library" in the left sidebar
|
29 |
-
2. Search for "Google Drive API" in the search bar
|
30 |
-
3. Click on "Google Drive API" in the results
|
31 |
-
4. Click the "Enable" button
|
32 |
-
|
33 |
-
### 3. Create OAuth Credentials
|
34 |
-
|
35 |
-
1. In the Google Cloud Console, go to "APIs & Services" > "Credentials" in the left sidebar
|
36 |
-
2. Click "Create Credentials" at the top and select "OAuth client ID"
|
37 |
-
3. If prompted to configure the OAuth consent screen:
|
38 |
-
- Choose "External" user type (or "Internal" if you're in a Google Workspace organization)
|
39 |
-
- Fill in the required information (App name, User support email, Developer contact email)
|
40 |
-
- Click "Save and Continue"
|
41 |
-
- Add the following scopes:
|
42 |
-
- `.../auth/drive` (Full access to Google Drive)
|
43 |
-
- Click "Save and Continue" and complete the registration
|
44 |
-
4. Return to the "Create OAuth client ID" screen
|
45 |
-
5. Select "Desktop application" as the Application type
|
46 |
-
6. Enter a name for your OAuth client (e.g., "Google Drive Access Desktop")
|
47 |
-
7. Click "Create"
|
48 |
-
8. Download the JSON file (this is your `client_secret.json`)
|
49 |
-
|
50 |
-
### 4. Project Setup
|
51 |
-
|
52 |
-
1. Setup a virtual environment and install dependencies:
|
53 |
-
```bash
|
54 |
-
python -m venv venv
|
55 |
-
source venv/bin/activate # On Windows: venv\Scripts\activate
|
56 |
-
pip install -r requirements.txt
|
57 |
-
```
|
58 |
-
|
59 |
-
2. Place your credentials:
|
60 |
-
- Create a `credentials` directory in your project root
|
61 |
-
- Move the downloaded OAuth client JSON file to the `credentials` directory
|
62 |
-
- Rename it to `client_secret.json`
|
63 |
-
|
64 |
-
### 5. Authentication Process
|
65 |
-
|
66 |
-
When you run the application for the first time:
|
67 |
-
1. A browser window will open automatically
|
68 |
-
2. You'll be asked to sign in to your Google account
|
69 |
-
3. You'll see a consent screen asking for permission to access your Google Drive
|
70 |
-
4. After granting permission, the browser will display a success message
|
71 |
-
5. The application will save a token file (`token.pickle`) in the credentials directory for future use
|
72 |
-
|
73 |
-
## Usage Guide
|
74 |
-
|
75 |
-
The `EasyGoogleDrive` class provides several methods to interact with Google Drive. Here's how to use the core functionality:
|
76 |
-
|
77 |
-
### Basic Usage
|
78 |
-
|
79 |
-
```python
|
80 |
-
from google_drive_access import EasyGoogleDrive
|
81 |
-
|
82 |
-
# Initialize the Google Drive client
|
83 |
-
drive = EasyGoogleDrive()
|
84 |
-
|
85 |
-
# Example folder path - replace with your actual folder path
|
86 |
-
folder_path = "Spring-2025-BAI"
|
87 |
-
subfolder_path = "Spring-2025-BAI/transcripts"
|
88 |
-
```
|
89 |
-
|
90 |
-
### Listing Folders
|
91 |
-
|
92 |
-
```python
|
93 |
-
# List folders in a directory
|
94 |
-
folders = drive.get_folders_in_folder(folder_path)
|
95 |
-
|
96 |
-
# Access folder properties
|
97 |
-
for folder in folders:
|
98 |
-
print(f"Folder: {folder['name']}")
|
99 |
-
print(f" Created: {folder.get('createdTimeFormatted', 'Unknown')}")
|
100 |
-
print(f" Modified: {folder.get('modifiedTimeFormatted', 'Unknown')}")
|
101 |
-
```
|
102 |
-
|
103 |
-
### Listing Files
|
104 |
-
|
105 |
-
```python
|
106 |
-
# List files in a directory
|
107 |
-
files = drive.get_files_in_folder(subfolder_path)
|
108 |
-
|
109 |
-
# Access file properties
|
110 |
-
for file in files:
|
111 |
-
print(f"File: {file['name']}")
|
112 |
-
print(f" Type: {file.get('fileType', 'Unknown')}")
|
113 |
-
print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}")
|
114 |
-
print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}")
|
115 |
-
print(f" Size: {file.get('sizeFormatted', 'Unknown')}")
|
116 |
-
```
|
117 |
-
|
118 |
-
### Getting a Specific File
|
119 |
-
|
120 |
-
```python
|
121 |
-
# Get a specific file with metadata
|
122 |
-
file = drive.get_file("example.txt", subfolder_path, include_metadata=True)
|
123 |
-
|
124 |
-
if file:
|
125 |
-
print(f"File: {file['name']}")
|
126 |
-
print(f" Type: {file.get('fileType', 'Unknown')}")
|
127 |
-
print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}")
|
128 |
-
print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}")
|
129 |
-
print(f" Size: {file.get('sizeFormatted', 'Unknown')}")
|
130 |
-
```
|
131 |
-
|
132 |
-
### Getting All Items in a Folder
|
133 |
-
|
134 |
-
```python
|
135 |
-
# Get all items (files and folders) in a folder
|
136 |
-
all_items = drive.get_all_files_in_folder(folder_path)
|
137 |
-
|
138 |
-
# Access item properties
|
139 |
-
for item in all_items:
|
140 |
-
item_type = "Folder" if item.get('mimeType') == drive.MIME_TYPES['folder'] else item.get('fileType', 'Unknown')
|
141 |
-
print(f"Item: {item['name']} ({item_type})")
|
142 |
-
```
|
143 |
-
|
144 |
-
### Checking if a File Exists
|
145 |
-
|
146 |
-
```python
|
147 |
-
# Check if a file exists
|
148 |
-
exists = drive.file_exists("example.txt", subfolder_path)
|
149 |
-
print(f"File exists: {exists}")
|
150 |
-
```
|
151 |
-
|
152 |
-
### Getting File Modified Time
|
153 |
-
|
154 |
-
```python
|
155 |
-
# Get file modified time
|
156 |
-
modified_time = drive.get_file_modified_time("example.txt", subfolder_path)
|
157 |
-
if modified_time:
|
158 |
-
print(f"Last modified: {modified_time}")
|
159 |
-
```
|
160 |
-
|
161 |
-
### Reading File Content
|
162 |
-
|
163 |
-
```python
|
164 |
-
# Get file with content
|
165 |
-
file_with_content = drive.get_file("example.txt", subfolder_path, include_content=True)
|
166 |
-
|
167 |
-
if file_with_content and 'file_content' in file_with_content:
|
168 |
-
content = file_with_content['file_content']
|
169 |
-
if content:
|
170 |
-
print(f"Content: {content[:100]}...") # Print first 100 characters
|
171 |
-
```
|
172 |
-
|
173 |
-
## Complete Example
|
174 |
-
|
175 |
-
For a complete example of how to use the `EasyGoogleDrive` class, see the `basic_usage.py` file included in this package. This file demonstrates all the core functionality with practical examples.
|
176 |
-
|
177 |
-
## Key Concepts
|
178 |
-
|
179 |
-
### Path-based Folder Access
|
180 |
-
|
181 |
-
The module uses a simple path-like syntax to access folders:
|
182 |
-
|
183 |
-
```python
|
184 |
-
# Access a deeply nested folder
|
185 |
-
folder_path = "folder1/folder2/folder3"
|
186 |
-
files = drive.get_files_in_folder(folder_path)
|
187 |
-
```
|
188 |
-
|
189 |
-
This makes it much easier to work with nested folder structures compared to using folder IDs.
|
190 |
-
|
191 |
-
### Metadata Fields
|
192 |
-
|
193 |
-
The module provides comprehensive metadata for files and folders, including:
|
194 |
-
|
195 |
-
- **Creation and modification dates**: Both as datetime objects and formatted strings
|
196 |
-
- **File size**: Both in bytes and human-readable format (KB, MB, GB)
|
197 |
-
- **File type**: Simplified type based on MIME type
|
198 |
-
- **Owner information**: Names and email addresses of file owners
|
199 |
-
- **Sharing status**: Whether the file is shared
|
200 |
-
- **Web links**: Direct links to view the file in a browser
|
201 |
-
|
202 |
-
## Error Handling
|
203 |
-
|
204 |
-
The module includes comprehensive error handling:
|
205 |
-
|
206 |
-
- **Authentication errors**: Clear messages when credentials are missing or invalid
|
207 |
-
- **Folder not found**: Helpful messages when a folder in the path cannot be found
|
208 |
-
- **File not found**: Attempts partial name matching before giving up
|
209 |
-
- **Decoding errors**: Handles issues with file content encoding
|
210 |
-
|
211 |
-
## Dependencies
|
212 |
-
|
213 |
-
- **Required**:
|
214 |
-
- google-auth-oauthlib
|
215 |
-
- google-auth-httplib2
|
216 |
-
- google-api-python-client
|
217 |
-
- python-dateutil
|
218 |
-
|
219 |
-
## Security Notes
|
220 |
-
|
221 |
-
- Never commit your `client_secret.json` or token files to version control
|
222 |
-
- Add `credentials/` to your `.gitignore` file
|
223 |
-
- Keep your credentials secure and don't share them
|
224 |
-
- For production applications, consider using service accounts with the minimum required permissions
|
225 |
-
|
226 |
-
## Contributing
|
227 |
-
|
228 |
-
Feel free to contribute to this project by submitting issues or pull requests.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/services/google_drive_access.py
DELETED
@@ -1,623 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Easy Google Drive Access
|
3 |
-
|
4 |
-
A simplified module for accessing Google Drive files in nested folders.
|
5 |
-
Designed to make it as easy as possible to access files using path-like syntax.
|
6 |
-
"""
|
7 |
-
|
8 |
-
import os
|
9 |
-
import pickle
|
10 |
-
import io
|
11 |
-
import datetime
|
12 |
-
from typing import List, Dict, Optional, Any, Union
|
13 |
-
|
14 |
-
from google.oauth2.credentials import Credentials
|
15 |
-
from google_auth_oauthlib.flow import InstalledAppFlow
|
16 |
-
from google.auth.transport.requests import Request
|
17 |
-
from googleapiclient.discovery import build
|
18 |
-
from googleapiclient.http import MediaIoBaseDownload
|
19 |
-
from googleapiclient.errors import HttpError
|
20 |
-
|
21 |
-
|
22 |
-
class EasyGoogleDrive:
|
23 |
-
"""
|
24 |
-
Simplified Google Drive access focused on accessing files in nested folders.
|
25 |
-
"""
|
26 |
-
|
27 |
-
# Define the scopes needed for the application
|
28 |
-
SCOPES = ['https://www.googleapis.com/auth/drive']
|
29 |
-
|
30 |
-
# Define common MIME types
|
31 |
-
MIME_TYPES = {
|
32 |
-
'folder': 'application/vnd.google-apps.folder',
|
33 |
-
'document': 'application/vnd.google-apps.document',
|
34 |
-
'spreadsheet': 'application/vnd.google-apps.spreadsheet',
|
35 |
-
'text': 'text/plain',
|
36 |
-
'pdf': 'application/pdf',
|
37 |
-
'image': 'image/jpeg',
|
38 |
-
'video': 'video/mp4',
|
39 |
-
'audio': 'audio/mpeg',
|
40 |
-
}
|
41 |
-
|
42 |
-
# Define metadata fields to retrieve
|
43 |
-
FILE_FIELDS = 'id, name, mimeType, createdTime, modifiedTime, size, description, webViewLink, thumbnailLink, owners, shared, sharingUser, lastModifyingUser, capabilities, permissions'
|
44 |
-
FOLDER_FIELDS = 'id, name, createdTime, modifiedTime, description, webViewLink, owners, shared, sharingUser, lastModifyingUser, capabilities, permissions'
|
45 |
-
|
46 |
-
def __init__(self, credentials_dir: str = 'credentials'):
|
47 |
-
"""Initialize the Google Drive access."""
|
48 |
-
self.credentials_dir = credentials_dir
|
49 |
-
self.credentials_path = os.path.join(credentials_dir, 'client_secret.json')
|
50 |
-
self.token_path = os.path.join(credentials_dir, 'token.pickle')
|
51 |
-
|
52 |
-
# Ensure credentials directory exists
|
53 |
-
os.makedirs(credentials_dir, exist_ok=True)
|
54 |
-
|
55 |
-
# Initialize the Drive API service
|
56 |
-
self.service = build('drive', 'v3', credentials=self._get_credentials())
|
57 |
-
|
58 |
-
# Cache for folder IDs to avoid repeated lookups
|
59 |
-
self.folder_id_cache = {}
|
60 |
-
|
61 |
-
def _get_credentials(self) -> Credentials:
|
62 |
-
"""Get and refresh Google Drive API credentials."""
|
63 |
-
creds = None
|
64 |
-
|
65 |
-
# Load existing token if it exists
|
66 |
-
if os.path.exists(self.token_path):
|
67 |
-
with open(self.token_path, 'rb') as token:
|
68 |
-
creds = pickle.load(token)
|
69 |
-
|
70 |
-
# If credentials need refresh or don't exist
|
71 |
-
if not creds or not creds.valid:
|
72 |
-
if creds and creds.expired and creds.refresh_token:
|
73 |
-
creds.refresh(Request())
|
74 |
-
else:
|
75 |
-
if not os.path.exists(self.credentials_path):
|
76 |
-
raise FileNotFoundError(
|
77 |
-
f"Client secrets file not found at {self.credentials_path}. "
|
78 |
-
"Please follow the setup instructions in the README."
|
79 |
-
)
|
80 |
-
|
81 |
-
flow = InstalledAppFlow.from_client_secrets_file(
|
82 |
-
self.credentials_path, self.SCOPES)
|
83 |
-
creds = flow.run_local_server(port=0)
|
84 |
-
|
85 |
-
# Save the credentials for future use
|
86 |
-
with open(self.token_path, 'wb') as token:
|
87 |
-
pickle.dump(creds, token)
|
88 |
-
|
89 |
-
return creds
|
90 |
-
|
91 |
-
def _format_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
92 |
-
"""
|
93 |
-
Format metadata for easier reading and usage.
|
94 |
-
|
95 |
-
Args:
|
96 |
-
metadata: Raw metadata from Google Drive API
|
97 |
-
|
98 |
-
Returns:
|
99 |
-
Formatted metadata dictionary
|
100 |
-
"""
|
101 |
-
formatted = metadata.copy()
|
102 |
-
|
103 |
-
# Format dates
|
104 |
-
for date_field in ['createdTime', 'modifiedTime']:
|
105 |
-
if date_field in formatted:
|
106 |
-
try:
|
107 |
-
# Convert ISO 8601 string to datetime object
|
108 |
-
dt = datetime.datetime.fromisoformat(formatted[date_field].replace('Z', '+00:00'))
|
109 |
-
formatted[date_field] = dt
|
110 |
-
# Add a formatted date string for easier reading
|
111 |
-
formatted[f"{date_field}Formatted"] = dt.strftime('%Y-%m-%d %H:%M:%S')
|
112 |
-
except (ValueError, AttributeError):
|
113 |
-
pass
|
114 |
-
|
115 |
-
# Format size
|
116 |
-
if 'size' in formatted and formatted['size']:
|
117 |
-
try:
|
118 |
-
size_bytes = int(formatted['size'])
|
119 |
-
# Add human-readable size
|
120 |
-
if size_bytes < 1024:
|
121 |
-
formatted['sizeFormatted'] = f"{size_bytes} B"
|
122 |
-
elif size_bytes < 1024 * 1024:
|
123 |
-
formatted['sizeFormatted'] = f"{size_bytes / 1024:.1f} KB"
|
124 |
-
elif size_bytes < 1024 * 1024 * 1024:
|
125 |
-
formatted['sizeFormatted'] = f"{size_bytes / (1024 * 1024):.1f} MB"
|
126 |
-
else:
|
127 |
-
formatted['sizeFormatted'] = f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
|
128 |
-
except (ValueError, TypeError):
|
129 |
-
pass
|
130 |
-
|
131 |
-
# Extract owner names
|
132 |
-
if 'owners' in formatted and formatted['owners']:
|
133 |
-
formatted['ownerNames'] = [owner.get('displayName', 'Unknown') for owner in formatted['owners']]
|
134 |
-
formatted['ownerEmails'] = [owner.get('emailAddress', 'Unknown') for owner in formatted['owners']]
|
135 |
-
|
136 |
-
# Add file type description
|
137 |
-
if 'mimeType' in formatted:
|
138 |
-
mime_type = formatted['mimeType']
|
139 |
-
for key, value in self.MIME_TYPES.items():
|
140 |
-
if mime_type == value:
|
141 |
-
formatted['fileType'] = key
|
142 |
-
break
|
143 |
-
else:
|
144 |
-
# If not found in our predefined types
|
145 |
-
formatted['fileType'] = mime_type.split('/')[-1]
|
146 |
-
|
147 |
-
return formatted
|
148 |
-
|
149 |
-
def get_folder_id(self, folder_path: str) -> Optional[str]:
|
150 |
-
"""
|
151 |
-
Get a folder ID from a path like 'folder1/folder2/folder3'.
|
152 |
-
|
153 |
-
Args:
|
154 |
-
folder_path: Path to the folder, using '/' as separator
|
155 |
-
|
156 |
-
Returns:
|
157 |
-
The folder ID if found, None otherwise
|
158 |
-
"""
|
159 |
-
# Check if we've already resolved this path
|
160 |
-
if folder_path in self.folder_id_cache:
|
161 |
-
return self.folder_id_cache[folder_path]
|
162 |
-
|
163 |
-
# If it looks like an ID already, return it
|
164 |
-
if len(folder_path) > 25 and '/' not in folder_path:
|
165 |
-
return folder_path
|
166 |
-
|
167 |
-
# Split the path into components
|
168 |
-
parts = folder_path.split('/')
|
169 |
-
|
170 |
-
# Start from the root
|
171 |
-
current_folder_id = None
|
172 |
-
current_path = ""
|
173 |
-
|
174 |
-
# Traverse the path one folder at a time
|
175 |
-
for i, folder_name in enumerate(parts):
|
176 |
-
if not folder_name: # Skip empty parts
|
177 |
-
continue
|
178 |
-
|
179 |
-
# Update the current path for caching
|
180 |
-
if current_path:
|
181 |
-
current_path += f"/{folder_name}"
|
182 |
-
else:
|
183 |
-
current_path = folder_name
|
184 |
-
|
185 |
-
# Check if we've already resolved this subpath
|
186 |
-
if current_path in self.folder_id_cache:
|
187 |
-
current_folder_id = self.folder_id_cache[current_path]
|
188 |
-
continue
|
189 |
-
|
190 |
-
# Search for the folder by name
|
191 |
-
query = f"mimeType='{self.MIME_TYPES['folder']}' and name='{folder_name}'"
|
192 |
-
if current_folder_id:
|
193 |
-
query += f" and '{current_folder_id}' in parents"
|
194 |
-
|
195 |
-
try:
|
196 |
-
results = self.service.files().list(
|
197 |
-
q=query,
|
198 |
-
spaces='drive',
|
199 |
-
fields='files(id, name)',
|
200 |
-
pageSize=10
|
201 |
-
).execute()
|
202 |
-
|
203 |
-
files = results.get('files', [])
|
204 |
-
if not files:
|
205 |
-
# Try a more flexible search if exact match fails
|
206 |
-
query = query.replace(f"name='{folder_name}'", f"name contains '{folder_name}'")
|
207 |
-
results = self.service.files().list(
|
208 |
-
q=query,
|
209 |
-
spaces='drive',
|
210 |
-
fields='files(id, name)',
|
211 |
-
pageSize=10
|
212 |
-
).execute()
|
213 |
-
|
214 |
-
files = results.get('files', [])
|
215 |
-
if not files:
|
216 |
-
print(f"Could not find folder '{folder_name}' in path '{folder_path}'")
|
217 |
-
return None
|
218 |
-
|
219 |
-
# Use the first match
|
220 |
-
current_folder_id = files[0]['id']
|
221 |
-
|
222 |
-
# Cache this result
|
223 |
-
self.folder_id_cache[current_path] = current_folder_id
|
224 |
-
|
225 |
-
except HttpError as error:
|
226 |
-
print(f"Error finding folder: {error}")
|
227 |
-
return None
|
228 |
-
|
229 |
-
return current_folder_id
|
230 |
-
|
231 |
-
def get_folders_in_folder(self, folder_path: str, include_metadata: bool = True) -> List[Dict[str, Any]]:
|
232 |
-
"""
|
233 |
-
Get all subfolders in a folder specified by path.
|
234 |
-
|
235 |
-
Args:
|
236 |
-
folder_path: Path to the folder, using '/' as separator
|
237 |
-
include_metadata: Whether to include detailed metadata (default: True)
|
238 |
-
|
239 |
-
Returns:
|
240 |
-
List of folder metadata dictionaries
|
241 |
-
"""
|
242 |
-
# Get the folder ID
|
243 |
-
folder_id = self.get_folder_id(folder_path)
|
244 |
-
if not folder_id:
|
245 |
-
print(f"Could not find folder: '{folder_path}'")
|
246 |
-
return []
|
247 |
-
|
248 |
-
# List all folders in this folder
|
249 |
-
query = f"'{folder_id}' in parents and mimeType = '{self.MIME_TYPES['folder']}'"
|
250 |
-
|
251 |
-
try:
|
252 |
-
results = self.service.files().list(
|
253 |
-
q=query,
|
254 |
-
spaces='drive',
|
255 |
-
fields=f'files({self.FOLDER_FIELDS})' if include_metadata else 'files(id, name)',
|
256 |
-
pageSize=1000
|
257 |
-
).execute()
|
258 |
-
|
259 |
-
folders = results.get('files', [])
|
260 |
-
|
261 |
-
# Format metadata if requested
|
262 |
-
if include_metadata and folders:
|
263 |
-
folders = [self._format_metadata(folder) for folder in folders]
|
264 |
-
|
265 |
-
if folders:
|
266 |
-
print(f"Found {len(folders)} subfolders in '{folder_path}':")
|
267 |
-
for folder in folders:
|
268 |
-
if include_metadata and 'createdTimeFormatted' in folder:
|
269 |
-
print(f" - {folder['name']} (Created: {folder['createdTimeFormatted']})")
|
270 |
-
else:
|
271 |
-
print(f" - {folder['name']}")
|
272 |
-
else:
|
273 |
-
print(f"No subfolders found in '{folder_path}'")
|
274 |
-
|
275 |
-
return folders
|
276 |
-
|
277 |
-
except HttpError as error:
|
278 |
-
print(f"Error listing folders: {error}")
|
279 |
-
return []
|
280 |
-
|
281 |
-
def get_files_in_folder(self, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> List[Dict[str, Any]]:
|
282 |
-
"""
|
283 |
-
Get all files in a folder specified by path.
|
284 |
-
|
285 |
-
Args:
|
286 |
-
folder_path: Path to the folder, using '/' as separator
|
287 |
-
include_metadata: Whether to include detailed metadata (default: True)
|
288 |
-
include_content: Whether to include file content (default: False)
|
289 |
-
|
290 |
-
Returns:
|
291 |
-
List of file metadata dictionaries, optionally including file content
|
292 |
-
"""
|
293 |
-
# Get the folder ID
|
294 |
-
folder_id = self.get_folder_id(folder_path)
|
295 |
-
if not folder_id:
|
296 |
-
print(f"Could not find folder: '{folder_path}'")
|
297 |
-
return []
|
298 |
-
|
299 |
-
# List all non-folder files in this folder
|
300 |
-
query = f"'{folder_id}' in parents and mimeType != '{self.MIME_TYPES['folder']}'"
|
301 |
-
|
302 |
-
try:
|
303 |
-
results = self.service.files().list(
|
304 |
-
q=query,
|
305 |
-
spaces='drive',
|
306 |
-
fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
|
307 |
-
pageSize=1000
|
308 |
-
).execute()
|
309 |
-
|
310 |
-
files = results.get('files', [])
|
311 |
-
|
312 |
-
# Format metadata if requested
|
313 |
-
if include_metadata and files:
|
314 |
-
files = [self._format_metadata(file) for file in files]
|
315 |
-
|
316 |
-
# Add file content if requested
|
317 |
-
if include_content and files:
|
318 |
-
for file in files:
|
319 |
-
try:
|
320 |
-
# Skip files that are likely not text-based
|
321 |
-
if any(ext in file['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']):
|
322 |
-
print(f"Skipping content for non-text file: {file['name']}")
|
323 |
-
file['file_content'] = None
|
324 |
-
continue
|
325 |
-
|
326 |
-
# Read the file content
|
327 |
-
content = self.read_file_from_object(file)
|
328 |
-
file['file_content'] = content
|
329 |
-
|
330 |
-
if content is not None:
|
331 |
-
print(f"Successfully read content for: {file['name']} ({len(content)} characters)")
|
332 |
-
else:
|
333 |
-
print(f"Unable to read content for: {file['name']}")
|
334 |
-
except Exception as e:
|
335 |
-
print(f"Error reading content for {file['name']}: {e}")
|
336 |
-
file['file_content'] = None
|
337 |
-
|
338 |
-
if files:
|
339 |
-
print(f"Found {len(files)} files in '{folder_path}':")
|
340 |
-
for file in files:
|
341 |
-
if include_metadata and 'createdTimeFormatted' in file:
|
342 |
-
print(f" - {file['name']} ({file.get('fileType', 'Unknown')}, Created: {file['createdTimeFormatted']})")
|
343 |
-
else:
|
344 |
-
print(f" - {file['name']} ({file.get('mimeType', 'Unknown')})")
|
345 |
-
else:
|
346 |
-
print(f"No files found in '{folder_path}'")
|
347 |
-
|
348 |
-
return files
|
349 |
-
|
350 |
-
except HttpError as error:
|
351 |
-
print(f"Error listing files: {error}")
|
352 |
-
return []
|
353 |
-
|
354 |
-
def get_file(self, file_name: str, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> Optional[Dict[str, Any]]:
|
355 |
-
"""
|
356 |
-
Get a specific file by name from a folder.
|
357 |
-
|
358 |
-
Args:
|
359 |
-
file_name: Name of the file to get
|
360 |
-
folder_path: Path to the folder containing the file
|
361 |
-
include_metadata: Whether to include detailed metadata (default: True)
|
362 |
-
include_content: Whether to include file content (default: False)
|
363 |
-
|
364 |
-
Returns:
|
365 |
-
File metadata dictionary, optionally including content, or None if file not found
|
366 |
-
"""
|
367 |
-
# Get the folder ID
|
368 |
-
folder_id = self.get_folder_id(folder_path)
|
369 |
-
if not folder_id:
|
370 |
-
print(f"Could not find folder: '{folder_path}'")
|
371 |
-
return None
|
372 |
-
|
373 |
-
# Find the file by name in this folder
|
374 |
-
query = f"'{folder_id}' in parents and name = '{file_name}'"
|
375 |
-
|
376 |
-
try:
|
377 |
-
results = self.service.files().list(
|
378 |
-
q=query,
|
379 |
-
spaces='drive',
|
380 |
-
fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
|
381 |
-
pageSize=1
|
382 |
-
).execute()
|
383 |
-
|
384 |
-
files = results.get('files', [])
|
385 |
-
if not files:
|
386 |
-
# Try a more flexible search
|
387 |
-
query = query.replace(f"name = '{file_name}'", f"name contains '{file_name}'")
|
388 |
-
results = self.service.files().list(
|
389 |
-
q=query,
|
390 |
-
spaces='drive',
|
391 |
-
fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
|
392 |
-
pageSize=10
|
393 |
-
).execute()
|
394 |
-
|
395 |
-
files = results.get('files', [])
|
396 |
-
if not files:
|
397 |
-
print(f"Could not find file '{file_name}' in '{folder_path}'")
|
398 |
-
return None
|
399 |
-
|
400 |
-
# Use the first match
|
401 |
-
file = files[0]
|
402 |
-
|
403 |
-
# Format metadata if requested
|
404 |
-
if include_metadata:
|
405 |
-
file = self._format_metadata(file)
|
406 |
-
|
407 |
-
# Add file content if requested
|
408 |
-
if include_content:
|
409 |
-
try:
|
410 |
-
# Skip files that are likely not text-based
|
411 |
-
if any(ext in file['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']):
|
412 |
-
print(f"Skipping content for non-text file: {file['name']}")
|
413 |
-
file['file_content'] = None
|
414 |
-
else:
|
415 |
-
# Read the file content
|
416 |
-
content = self.read_file_from_object(file)
|
417 |
-
file['file_content'] = content
|
418 |
-
|
419 |
-
if content is not None:
|
420 |
-
print(f"Successfully read content for: {file['name']} ({len(content)} characters)")
|
421 |
-
else:
|
422 |
-
print(f"Unable to read content for: {file['name']}")
|
423 |
-
except Exception as e:
|
424 |
-
print(f"Error reading content for {file['name']}: {e}")
|
425 |
-
file['file_content'] = None
|
426 |
-
|
427 |
-
print(f"Found file: {file['name']}")
|
428 |
-
return file
|
429 |
-
|
430 |
-
except HttpError as error:
|
431 |
-
print(f"Error getting file: {error}")
|
432 |
-
return None
|
433 |
-
|
434 |
-
def get_all_files_in_folder(self, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> List[Dict[str, Any]]:
|
435 |
-
"""
|
436 |
-
Get all items (files and folders) in a folder specified by path.
|
437 |
-
|
438 |
-
Args:
|
439 |
-
folder_path: Path to the folder, using '/' as separator
|
440 |
-
include_metadata: Whether to include detailed metadata (default: True)
|
441 |
-
include_content: Whether to include file content (default: False)
|
442 |
-
|
443 |
-
Returns:
|
444 |
-
List of file and folder metadata dictionaries, optionally including file content
|
445 |
-
"""
|
446 |
-
# Get the folder ID
|
447 |
-
folder_id = self.get_folder_id(folder_path)
|
448 |
-
if not folder_id:
|
449 |
-
print(f"Could not find folder: '{folder_path}'")
|
450 |
-
return []
|
451 |
-
|
452 |
-
# List all items in this folder
|
453 |
-
query = f"'{folder_id}' in parents"
|
454 |
-
|
455 |
-
try:
|
456 |
-
results = self.service.files().list(
|
457 |
-
q=query,
|
458 |
-
spaces='drive',
|
459 |
-
fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)',
|
460 |
-
pageSize=1000
|
461 |
-
).execute()
|
462 |
-
|
463 |
-
items = results.get('files', [])
|
464 |
-
|
465 |
-
# Format metadata if requested
|
466 |
-
if include_metadata and items:
|
467 |
-
items = [self._format_metadata(item) for item in items]
|
468 |
-
|
469 |
-
# Add file content if requested
|
470 |
-
if include_content and items:
|
471 |
-
for item in items:
|
472 |
-
# Skip folders and non-text files
|
473 |
-
if item.get('mimeType') == self.MIME_TYPES['folder'] or any(ext in item['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']):
|
474 |
-
item['file_content'] = None
|
475 |
-
continue
|
476 |
-
|
477 |
-
try:
|
478 |
-
# Read the file content
|
479 |
-
content = self.read_file_from_object(item)
|
480 |
-
item['file_content'] = content
|
481 |
-
|
482 |
-
if content is not None:
|
483 |
-
print(f"Successfully read content for: {item['name']} ({len(content)} characters)")
|
484 |
-
else:
|
485 |
-
print(f"Unable to read content for: {item['name']}")
|
486 |
-
except Exception as e:
|
487 |
-
print(f"Error reading content for {item['name']}: {e}")
|
488 |
-
item['file_content'] = None
|
489 |
-
|
490 |
-
if items:
|
491 |
-
print(f"Found {len(items)} items in '{folder_path}':")
|
492 |
-
for item in items:
|
493 |
-
if include_metadata and 'createdTimeFormatted' in item:
|
494 |
-
item_type = 'Folder' if item.get('mimeType') == self.MIME_TYPES['folder'] else item.get('fileType', 'Unknown')
|
495 |
-
print(f" - {item['name']} ({item_type}, Created: {item['createdTimeFormatted']})")
|
496 |
-
else:
|
497 |
-
item_type = 'Folder' if item.get('mimeType') == self.MIME_TYPES['folder'] else item.get('mimeType', 'Unknown')
|
498 |
-
print(f" - {item['name']} ({item_type})")
|
499 |
-
else:
|
500 |
-
print(f"No items found in '{folder_path}'")
|
501 |
-
|
502 |
-
return items
|
503 |
-
|
504 |
-
except HttpError as error:
|
505 |
-
print(f"Error listing items: {error}")
|
506 |
-
return []
|
507 |
-
|
508 |
-
def file_exists(self, file_name: str, folder_path: str) -> bool:
|
509 |
-
"""
|
510 |
-
Check if a file exists at the specified path in Google Drive.
|
511 |
-
|
512 |
-
Args:
|
513 |
-
file_name: Name of the file to check
|
514 |
-
folder_path: Path to the folder containing the file
|
515 |
-
|
516 |
-
Returns:
|
517 |
-
True if the file exists, False otherwise
|
518 |
-
"""
|
519 |
-
# Get the folder ID
|
520 |
-
folder_id = self.get_folder_id(folder_path)
|
521 |
-
if not folder_id:
|
522 |
-
print(f"Could not find folder: '{folder_path}'")
|
523 |
-
return False
|
524 |
-
|
525 |
-
# Check if the file exists in this folder
|
526 |
-
query = f"'{folder_id}' in parents and name = '{file_name}'"
|
527 |
-
|
528 |
-
try:
|
529 |
-
results = self.service.files().list(
|
530 |
-
q=query,
|
531 |
-
spaces='drive',
|
532 |
-
fields='files(id, name)',
|
533 |
-
pageSize=1
|
534 |
-
).execute()
|
535 |
-
|
536 |
-
files = results.get('files', [])
|
537 |
-
if not files:
|
538 |
-
# Try a more flexible search
|
539 |
-
query = query.replace(f"name = '{file_name}'", f"name contains '{file_name}'")
|
540 |
-
results = self.service.files().list(
|
541 |
-
q=query,
|
542 |
-
spaces='drive',
|
543 |
-
fields='files(id, name)',
|
544 |
-
pageSize=10
|
545 |
-
).execute()
|
546 |
-
|
547 |
-
files = results.get('files', [])
|
548 |
-
if not files:
|
549 |
-
print(f"File '{file_name}' does not exist in '{folder_path}'")
|
550 |
-
return False
|
551 |
-
|
552 |
-
# File exists
|
553 |
-
print(f"File '{file_name}' exists in '{folder_path}'")
|
554 |
-
return True
|
555 |
-
|
556 |
-
except HttpError as error:
|
557 |
-
print(f"Error checking if file exists: {error}")
|
558 |
-
return False
|
559 |
-
|
560 |
-
def get_file_modified_time(self, file_name: str, folder_path: str) -> Optional[datetime.datetime]:
|
561 |
-
"""
|
562 |
-
Get the last modified time of a file.
|
563 |
-
|
564 |
-
Args:
|
565 |
-
file_name: Name of the file
|
566 |
-
folder_path: Path to the folder containing the file
|
567 |
-
|
568 |
-
Returns:
|
569 |
-
The last modified time as a datetime object, or None if the file doesn't exist
|
570 |
-
"""
|
571 |
-
# Get the file metadata
|
572 |
-
file = self.get_file(file_name, folder_path, include_metadata=True)
|
573 |
-
if not file:
|
574 |
-
return None
|
575 |
-
|
576 |
-
# Return the modified time
|
577 |
-
return file.get('modifiedTime')
|
578 |
-
|
579 |
-
def read_file_from_object(self, file_object: Dict[str, Any]) -> Optional[str]:
|
580 |
-
"""
|
581 |
-
Read the contents of a file using a file object.
|
582 |
-
|
583 |
-
Args:
|
584 |
-
file_object: A Google file object with at least 'id' and 'mimeType' fields
|
585 |
-
|
586 |
-
Returns:
|
587 |
-
The file contents as a string, or None if the file couldn't be read
|
588 |
-
"""
|
589 |
-
file_id = file_object.get('id')
|
590 |
-
mime_type = file_object.get('mimeType')
|
591 |
-
|
592 |
-
if not file_id or not mime_type:
|
593 |
-
print("File object is missing 'id' or 'mimeType' fields.")
|
594 |
-
return None
|
595 |
-
|
596 |
-
try:
|
597 |
-
# Read the file based on its type
|
598 |
-
if mime_type == self.MIME_TYPES['document']:
|
599 |
-
# Export Google Doc as plain text
|
600 |
-
response = self.service.files().export(
|
601 |
-
fileId=file_id,
|
602 |
-
mimeType='text/plain'
|
603 |
-
).execute()
|
604 |
-
return response.decode('utf-8')
|
605 |
-
|
606 |
-
else:
|
607 |
-
# Download regular files
|
608 |
-
request = self.service.files().get_media(fileId=file_id)
|
609 |
-
fh = io.BytesIO()
|
610 |
-
downloader = MediaIoBaseDownload(fh, request)
|
611 |
-
|
612 |
-
done = False
|
613 |
-
while not done:
|
614 |
-
_, done = downloader.next_chunk()
|
615 |
-
|
616 |
-
return fh.getvalue().decode('utf-8')
|
617 |
-
|
618 |
-
except HttpError as error:
|
619 |
-
print(f"Error reading file: {error}")
|
620 |
-
return None
|
621 |
-
except Exception as e:
|
622 |
-
print(f"Error decoding file content: {e}")
|
623 |
-
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/services/google_drive_basic_usage.py
DELETED
@@ -1,178 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Basic Usage Examples for EasyGoogleDrive
|
3 |
-
|
4 |
-
This file demonstrates how to use the EasyGoogleDrive class to interact with Google Drive.
|
5 |
-
It provides examples of the main functionality without printing all results to keep the output clean.
|
6 |
-
"""
|
7 |
-
|
8 |
-
from google_drive_access import EasyGoogleDrive
|
9 |
-
import datetime
|
10 |
-
|
11 |
-
def main():
|
12 |
-
"""
|
13 |
-
Main function demonstrating the basic usage of EasyGoogleDrive.
|
14 |
-
"""
|
15 |
-
# Initialize the Google Drive client
|
16 |
-
# This will prompt for authentication the first time it's run
|
17 |
-
drive = EasyGoogleDrive()
|
18 |
-
|
19 |
-
# Example folder path - replace with your actual folder path
|
20 |
-
folder_path = "Spring-2025-BAI"
|
21 |
-
subfolder_path = "Spring-2025-BAI/transcripts"
|
22 |
-
|
23 |
-
print("=== Basic Usage Examples for EasyGoogleDrive ===\n")
|
24 |
-
|
25 |
-
# Example 1: List folders in a directory
|
26 |
-
print("Example 1: Listing folders in a directory")
|
27 |
-
print("----------------------------------------")
|
28 |
-
folders = drive.get_folders_in_folder(folder_path)
|
29 |
-
|
30 |
-
# Print only the first 3 folders (if any exist)
|
31 |
-
if folders:
|
32 |
-
print(f"Found {len(folders)} folders. Showing first 3:")
|
33 |
-
for i, folder in enumerate(folders[:3]):
|
34 |
-
print(f" - {folder['name']} (Created: {folder.get('createdTimeFormatted', 'Unknown')})")
|
35 |
-
if len(folders) > 3:
|
36 |
-
print(f" ... and {len(folders) - 3} more folders")
|
37 |
-
else:
|
38 |
-
print("No folders found.")
|
39 |
-
print()
|
40 |
-
|
41 |
-
# Example 2: List files in a directory
|
42 |
-
print("Example 2: Listing files in a directory")
|
43 |
-
print("--------------------------------------")
|
44 |
-
files = drive.get_files_in_folder(subfolder_path)
|
45 |
-
|
46 |
-
# Print only the first 3 files (if any exist)
|
47 |
-
if files:
|
48 |
-
print(f"Found {len(files)} files. Showing first 3:")
|
49 |
-
for i, file in enumerate(files[:3]):
|
50 |
-
file_type = file.get('fileType', 'Unknown')
|
51 |
-
created_time = file.get('createdTimeFormatted', 'Unknown')
|
52 |
-
print(f" - {file['name']} ({file_type}, Created: {created_time})")
|
53 |
-
if len(files) > 3:
|
54 |
-
print(f" ... and {len(files) - 3} more files")
|
55 |
-
else:
|
56 |
-
print("No files found.")
|
57 |
-
print()
|
58 |
-
|
59 |
-
# Example 3: Get a specific file
|
60 |
-
print("Example 3: Getting a specific file")
|
61 |
-
print("--------------------------------")
|
62 |
-
# Use the first file found in the previous example, or a default if none were found
|
63 |
-
file_name = files[-1]['name'] if files and len(files) > 0 else "example.txt"
|
64 |
-
|
65 |
-
file = drive.get_file(file_name, subfolder_path, include_metadata=True)
|
66 |
-
if file:
|
67 |
-
print(f"File found: {file['name']}")
|
68 |
-
print(f" Type: {file.get('fileType', 'Unknown')}")
|
69 |
-
print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}")
|
70 |
-
print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}")
|
71 |
-
print(f" Size: {file.get('sizeFormatted', 'Unknown')}")
|
72 |
-
else:
|
73 |
-
print(f"File '{file_name}' not found.")
|
74 |
-
print()
|
75 |
-
|
76 |
-
# Example 4: Get all items in a folder (files and folders)
|
77 |
-
print("Example 4: Getting all items in a folder")
|
78 |
-
print("--------------------------------------")
|
79 |
-
all_items = drive.get_all_files_in_folder(folder_path)
|
80 |
-
|
81 |
-
# Print only the first 3 items (if any exist)
|
82 |
-
if all_items:
|
83 |
-
print(f"Found {len(all_items)} items. Showing first 3:")
|
84 |
-
for i, item in enumerate(all_items[:3]):
|
85 |
-
item_type = "Folder" if item.get('mimeType') == drive.MIME_TYPES['folder'] else item.get('fileType', 'Unknown')
|
86 |
-
created_time = item.get('createdTimeFormatted', 'Unknown')
|
87 |
-
print(f" - {item['name']} ({item_type}, Created: {created_time})")
|
88 |
-
if len(all_items) > 3:
|
89 |
-
print(f" ... and {len(all_items) - 3} more items")
|
90 |
-
else:
|
91 |
-
print("No items found.")
|
92 |
-
print()
|
93 |
-
|
94 |
-
# Example 5: Check if a file exists
|
95 |
-
print("Example 5: Checking if a file exists")
|
96 |
-
print("----------------------------------")
|
97 |
-
# Use the same file name from Example 3
|
98 |
-
file_to_check = file_name
|
99 |
-
|
100 |
-
exists = drive.file_exists(file_to_check, subfolder_path)
|
101 |
-
print(f"File '{file_to_check}' {'exists' if exists else 'does not exist'} in '{subfolder_path}'.")
|
102 |
-
print()
|
103 |
-
|
104 |
-
# Example 6: Get file modified time
|
105 |
-
print("Example 6: Getting file modified time")
|
106 |
-
print("-----------------------------------")
|
107 |
-
# Use the same file name from Example 3
|
108 |
-
file_to_check_time = file_name
|
109 |
-
|
110 |
-
modified_time = drive.get_file_modified_time(file_to_check_time, subfolder_path)
|
111 |
-
if modified_time:
|
112 |
-
print(f"File '{file_to_check_time}' was last modified on: {modified_time}")
|
113 |
-
else:
|
114 |
-
print(f"Could not get modified time for '{file_to_check_time}'.")
|
115 |
-
print()
|
116 |
-
|
117 |
-
# Example 7: Get file with content
|
118 |
-
print("Example 7: Getting file with content")
|
119 |
-
print("----------------------------------")
|
120 |
-
# Use the same file name from Example 3
|
121 |
-
file_with_content = file_name
|
122 |
-
|
123 |
-
file_with_content_obj = drive.get_file(file_with_content, subfolder_path, include_content=True)
|
124 |
-
if file_with_content_obj and 'file_content' in file_with_content_obj:
|
125 |
-
content = file_with_content_obj['file_content']
|
126 |
-
if content:
|
127 |
-
print(f"File '{file_with_content}' content (first 100 chars):")
|
128 |
-
print(f" {content[:100]}...")
|
129 |
-
else:
|
130 |
-
print(f"File '{file_with_content}' has no content or content could not be read.")
|
131 |
-
else:
|
132 |
-
print(f"File '{file_with_content}' not found or content could not be retrieved.")
|
133 |
-
print()
|
134 |
-
|
135 |
-
# Example 8: Get contents of all files in a folder
|
136 |
-
print("Example 8: Getting contents of all files in a folder")
|
137 |
-
print("------------------------------------------------")
|
138 |
-
# Get all files with content
|
139 |
-
all_files_with_content = drive.get_files_in_folder(subfolder_path, include_content=True)
|
140 |
-
|
141 |
-
if all_files_with_content:
|
142 |
-
print(f"Found {len(all_files_with_content)} files. Showing content preview for first 3:")
|
143 |
-
for i, file in enumerate(all_files_with_content[:3]):
|
144 |
-
print(f" File: {file['name']}")
|
145 |
-
if 'file_content' in file and file['file_content']:
|
146 |
-
content = file['file_content']
|
147 |
-
print(f" Content preview: {content[:50]}...")
|
148 |
-
else:
|
149 |
-
print(f" No content available or file is not text-based.")
|
150 |
-
|
151 |
-
if len(all_files_with_content) > 3:
|
152 |
-
print(f" ... and {len(all_files_with_content) - 3} more files with content")
|
153 |
-
else:
|
154 |
-
print("No files found or no content could be retrieved.")
|
155 |
-
print()
|
156 |
-
|
157 |
-
# Example 9: Get content from a specific file using read_file_from_object
|
158 |
-
print("Example 9: Getting content from a specific file using read_file_from_object")
|
159 |
-
print("------------------------------------------------------------------------")
|
160 |
-
# Get a file object first
|
161 |
-
file_obj = drive.get_file(file_name, subfolder_path)
|
162 |
-
|
163 |
-
if file_obj:
|
164 |
-
# Read the content directly from the file object
|
165 |
-
content = drive.read_file_from_object(file_obj)
|
166 |
-
if content:
|
167 |
-
print(f"File '{file_obj['name']}' content (first 100 chars):")
|
168 |
-
print(f" {content[:100]}...")
|
169 |
-
else:
|
170 |
-
print(f"File '{file_obj['name']}' has no content or content could not be read.")
|
171 |
-
else:
|
172 |
-
print(f"File '{file_name}' not found.")
|
173 |
-
print()
|
174 |
-
|
175 |
-
print("=== End of Examples ===")
|
176 |
-
|
177 |
-
if __name__ == "__main__":
|
178 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctp_slack_bot/services/schedule_service.py
CHANGED
@@ -2,6 +2,7 @@ from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
2 |
from apscheduler.triggers.cron import CronTrigger
|
3 |
from asyncio import create_task, iscoroutinefunction, to_thread
|
4 |
from datetime import datetime
|
|
|
5 |
from loguru import logger
|
6 |
from pydantic import BaseModel, PrivateAttr
|
7 |
from pytz import timezone
|
@@ -47,11 +48,21 @@ class ScheduleService(BaseModel):
|
|
47 |
|
48 |
def start(self: Self) -> None:
|
49 |
self._scheduler.start()
|
50 |
-
logger.info("Started scheduler.")
|
51 |
|
52 |
def stop(self: Self) -> None:
|
53 |
if self._scheduler.running:
|
54 |
-
self._scheduler.shutdown(
|
55 |
-
logger.info("Shut down scheduler.")
|
56 |
else:
|
57 |
logger.debug("The scheduler is not running. There is no scheduler to shut down.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from apscheduler.triggers.cron import CronTrigger
|
3 |
from asyncio import create_task, iscoroutinefunction, to_thread
|
4 |
from datetime import datetime
|
5 |
+
from dependency_injector.resources import Resource
|
6 |
from loguru import logger
|
7 |
from pydantic import BaseModel, PrivateAttr
|
8 |
from pytz import timezone
|
|
|
48 |
|
49 |
def start(self: Self) -> None:
|
50 |
self._scheduler.start()
|
|
|
51 |
|
52 |
def stop(self: Self) -> None:
|
53 |
if self._scheduler.running:
|
54 |
+
self._scheduler.shutdown()
|
|
|
55 |
else:
|
56 |
logger.debug("The scheduler is not running. There is no scheduler to shut down.")
|
57 |
+
|
58 |
+
class ScheduleServiceResource(Resource):
|
59 |
+
def init(self: Self, settings: Settings) -> ScheduleService:
|
60 |
+
logger.info("Starting scheduler…")
|
61 |
+
schedule_service = ScheduleService(settings=settings)
|
62 |
+
schedule_service.start()
|
63 |
+
return schedule_service
|
64 |
+
|
65 |
+
def shutdown(self: Self, schedule_service: ScheduleService) -> None:
|
66 |
+
"""Stop scheduler on shutdown."""
|
67 |
+
schedule_service.stop()
|
68 |
+
logger.info("Stopped scheduler.")
|
src/ctp_slack_bot/utils/__init__.py
CHANGED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from ctp_slack_bot.utils.secret_stripper import sanitize_mongo_db_uri
|
src/ctp_slack_bot/utils/secret_stripper.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from urllib.parse import urlparse, urlunparse
|
2 |
+
|
3 |
+
def sanitize_mongo_db_uri(uri: str) -> str:
|
4 |
+
parts = urlparse(uri)
|
5 |
+
sanitized_netloc = ":".join(filter(None, (parts.hostname, parts.port)))
|
6 |
+
return urlunparse((parts.scheme, sanitized_netloc, parts.path, parts.params, parts.query, parts.fragment))
|