Spaces:
Runtime error
Runtime error
`GoogleDriveService`
Browse files- .env.template +7 -6
- pyproject.toml +3 -1
- src/ctp_slack_bot/core/config.py +13 -1
- src/ctp_slack_bot/services/google_drive_service.py +144 -0
.env.template
CHANGED
@@ -3,15 +3,9 @@
|
|
3 |
# APScheduler Configuration
|
4 |
SCHEDULER_TIMEZONE=UTC
|
5 |
|
6 |
-
# API Configuration
|
7 |
-
API_HOST=0.0.0.0
|
8 |
-
API_PORT=8000
|
9 |
-
|
10 |
# Slack Configuration
|
11 |
SLACK_BOT_TOKEN=🪙
|
12 |
-
SLACK_SIGNING_SECRET=🔐
|
13 |
SLACK_APP_TOKEN=🦥
|
14 |
-
SLACK_USER_TOKEN=🦊
|
15 |
|
16 |
# Vectorization Configuration
|
17 |
EMBEDDING_MODEL=🌮
|
@@ -34,3 +28,10 @@ CHAT_MODEL=gpt-3.5-turbo
|
|
34 |
MAX_TOKENS=150
|
35 |
TEMPERATURE=0.8
|
36 |
SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
# APScheduler Configuration
|
4 |
SCHEDULER_TIMEZONE=UTC
|
5 |
|
|
|
|
|
|
|
|
|
6 |
# Slack Configuration
|
7 |
SLACK_BOT_TOKEN=🪙
|
|
|
8 |
SLACK_APP_TOKEN=🦥
|
|
|
9 |
|
10 |
# Vectorization Configuration
|
11 |
EMBEDDING_MODEL=🌮
|
|
|
28 |
MAX_TOKENS=150
|
29 |
TEMPERATURE=0.8
|
30 |
SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
|
31 |
+
|
32 |
+
# Google Drive Configuration
|
33 |
+
GOOGLE_PROJECT_ID=insufferable-slacker-123456
|
34 |
+
GOOGLE_PRIVATE_KEY_ID=1a2b3c4d5e6f748891091d21304e506674829507
|
35 |
+
GOOGLE_PRIVATE_KEY="-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASC...\n-----END PRIVATE KEY-----\n"
|
36 |
+
GOOGLE_CLIENT_EMAIL=botty-bot@insufferable-slacker-123456.iam.gserviceaccount.com
|
37 |
+
GOOGLE_CLIENT_ID=123456789012345678901
|
pyproject.toml
CHANGED
@@ -36,7 +36,9 @@ dependencies = [
|
|
36 |
"slack_bolt>=1.23.0",
|
37 |
"motor>=3.7.0",
|
38 |
"openai>=1.70.0"
|
39 |
-
|
|
|
|
|
40 |
]
|
41 |
|
42 |
[project.optional-dependencies]
|
|
|
36 |
"slack_bolt>=1.23.0",
|
37 |
"motor>=3.7.0",
|
38 |
"openai>=1.70.0"
|
39 |
+
"google-api-python-client>=2.167.0",
|
40 |
+
"google-auth>=2.39.0",
|
41 |
+
"google-auth-oauthlib>=1.2.1"
|
42 |
]
|
43 |
|
44 |
[project.optional-dependencies]
|
src/ctp_slack_bot/core/config.py
CHANGED
@@ -39,7 +39,7 @@ class Settings(BaseSettings):
|
|
39 |
SCORE_THRESHOLD: NonNegativeFloat
|
40 |
|
41 |
# Hugging Face Configuration
|
42 |
-
HF_API_TOKEN: Optional[SecretStr] = None
|
43 |
|
44 |
# OpenAI Configuration
|
45 |
OPENAI_API_KEY: SecretStr
|
@@ -48,6 +48,18 @@ class Settings(BaseSettings):
|
|
48 |
TEMPERATURE: NonNegativeFloat
|
49 |
SYSTEM_PROMPT: str
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
model_config = SettingsConfigDict(
|
52 |
env_file=".env",
|
53 |
env_file_encoding="utf-8",
|
|
|
39 |
SCORE_THRESHOLD: NonNegativeFloat
|
40 |
|
41 |
# Hugging Face Configuration
|
42 |
+
HF_API_TOKEN: Optional[SecretStr] = None # TODO: Currently, this is unused.
|
43 |
|
44 |
# OpenAI Configuration
|
45 |
OPENAI_API_KEY: SecretStr
|
|
|
48 |
TEMPERATURE: NonNegativeFloat
|
49 |
SYSTEM_PROMPT: str
|
50 |
|
51 |
+
# Google Drive Configuration
|
52 |
+
GOOGLE_PROJECT_ID: str
|
53 |
+
GOOGLE_PRIVATE_KEY_ID: SecretStr
|
54 |
+
GOOGLE_PRIVATE_KEY: SecretStr
|
55 |
+
GOOGLE_CLIENT_ID: str
|
56 |
+
GOOGLE_CLIENT_EMAIL: str
|
57 |
+
GOOGLE_AUTH_URI: str = "https://accounts.google.com/o/oauth2/auth"
|
58 |
+
GOOGLE_TOKEN_URI: str = "https://oauth2.googleapis.com/token"
|
59 |
+
GOOGLE_AUTH_PROVIDER_CERT_URL: str = "https://www.googleapis.com/oauth2/v1/certs"
|
60 |
+
GOOGLE_CLIENT_CERT_URL: str = "https://www.googleapis.com/robot/v1/metadata/x509/ctp-slack-bot-714%40voltaic-reducer-294821.iam.gserviceaccount.com"
|
61 |
+
GOOGLE_UNIVERSE_DOMAIN: str = "googleapis.com"
|
62 |
+
|
63 |
model_config = SettingsConfigDict(
|
64 |
env_file=".env",
|
65 |
env_file_encoding="utf-8",
|
src/ctp_slack_bot/services/google_drive_service.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime
|
2 |
+
from google.oauth2 import service_account
|
3 |
+
from googleapiclient.discovery import build
|
4 |
+
from googleapiclient.http import MediaIoBaseDownload
|
5 |
+
from googleapiclient.errors import HttpError
|
6 |
+
from io import BytesIO
|
7 |
+
from loguru import logger
|
8 |
+
from pydantic import BaseModel, PrivateAttr
|
9 |
+
from typing import Collection, Dict, List, Optional, Self
|
10 |
+
|
11 |
+
from ctp_slack_bot.core import Settings
|
12 |
+
|
13 |
+
FOLDER_MIME_TYPE: str = "application/vnd.google-apps.folder"
|
14 |
+
ROOT_FOLDER_NAME: str = "root"
|
15 |
+
|
16 |
+
class GoogleDriveService(BaseModel):
|
17 |
+
"""Service for interacting with Google Drive."""
|
18 |
+
|
19 |
+
settings: Settings
|
20 |
+
_google_drive_client: PrivateAttr = PrivateAttr()
|
21 |
+
_folder_cache: PrivateAttr = PrivateAttr(default_factory=dict)
|
22 |
+
|
23 |
+
class Config:
|
24 |
+
frozen=True
|
25 |
+
|
26 |
+
def __init__(self: Self, **data) -> None:
|
27 |
+
super().__init__(**data)
|
28 |
+
credentials = service_account.Credentials.from_service_account_info({
|
29 |
+
"type": "service_account",
|
30 |
+
"project_id": self.settings.GOOGLE_PROJECT_ID,
|
31 |
+
"private_key_id": self.settings.GOOGLE_PRIVATE_KEY_ID,
|
32 |
+
"private_key": self.settings.GOOGLE_PRIVATE_KEY,
|
33 |
+
"client_email": self.settings.GOOGLE_CLIENT_EMAIL,
|
34 |
+
"client_id": self.settings.GOOGLE_CLIENT_ID,
|
35 |
+
"token_uri": self.settings.GOOGLE_TOKEN_URI,
|
36 |
+
}, scopes=['https://www.googleapis.com/auth/drive'])
|
37 |
+
self._google_drive_client = build('drive', 'v3', credentials=credentials)
|
38 |
+
logger.debug("Created {}", self.__class__.__name__)
|
39 |
+
|
40 |
+
def _resolve_folder_id(self: Self, folder_path: str) -> Optional[str]:
|
41 |
+
"""Resolve a folder path to a Google Drive ID."""
|
42 |
+
|
43 |
+
if not folder_path:
|
44 |
+
return ROOT_FOLDER_NAME
|
45 |
+
|
46 |
+
if folder_path in self._folder_cache:
|
47 |
+
return self._folder_cache[folder_path]
|
48 |
+
|
49 |
+
current_id = ROOT_FOLDER_NAME
|
50 |
+
for part in folder_path.split('/'):
|
51 |
+
if not part:
|
52 |
+
continue
|
53 |
+
try:
|
54 |
+
results = self._google_drive_client.files().list(
|
55 |
+
q=f"name='{part}' and mimeType='{FOLDER_MIME_TYPE}' and '{current_id}' in parents",
|
56 |
+
fields='files(id,name)',
|
57 |
+
supportsAllDrives=True,
|
58 |
+
includeItemsFromAllDrives=True
|
59 |
+
).execute()
|
60 |
+
if not results.get('files'):
|
61 |
+
return None
|
62 |
+
current_id = results['files'][0]['id']
|
63 |
+
except HttpError as e:
|
64 |
+
logger.error("Error resolving folder path: {}", folder_path)
|
65 |
+
return None
|
66 |
+
|
67 |
+
self._folder_cache[folder_path] = current_id
|
68 |
+
return current_id
|
69 |
+
|
70 |
+
def list_directory(self: Self, folder_path: str) -> List[Dict]:
|
71 |
+
"""List contents of a directory with basic metadata."""
|
72 |
+
|
73 |
+
folder_id = self._resolve_folder_id(folder_path)
|
74 |
+
if not folder_id:
|
75 |
+
return []
|
76 |
+
try:
|
77 |
+
results = self._google_drive_client.files().list(
|
78 |
+
q=f"'{folder_id}' in parents",
|
79 |
+
fields='files(id,name,mimeType,modifiedTime)',
|
80 |
+
supportsAllDrives=True,
|
81 |
+
includeItemsFromAllDrives=True,
|
82 |
+
pageSize=1000
|
83 |
+
).execute()
|
84 |
+
|
85 |
+
return [{
|
86 |
+
'id': f['id'],
|
87 |
+
'name': f['name'],
|
88 |
+
'modified': f['modifiedTime'],
|
89 |
+
'mime_type': f['mimeType']
|
90 |
+
} for f in results.get('files', [])]
|
91 |
+
except HttpError as e:
|
92 |
+
logger.error("Error listing folder by path, {}: {}", folder_path, e)
|
93 |
+
return []
|
94 |
+
|
95 |
+
def get_metadata(self: Self, item_path: str) -> Optional[Dict]:
|
96 |
+
"""Get metadata for a specific file/folder by path."""
|
97 |
+
|
98 |
+
if '/' not in item_path:
|
99 |
+
folder_id = ROOT_FOLDER_NAME
|
100 |
+
item_name = item_path
|
101 |
+
else:
|
102 |
+
parts = item_path.split('/')
|
103 |
+
item_name = parts[-1]
|
104 |
+
folder_path = '/'.join(parts[:-1])
|
105 |
+
folder_id = self._resolve_folder_id(folder_path)
|
106 |
+
|
107 |
+
if not folder_id:
|
108 |
+
return None
|
109 |
+
|
110 |
+
try:
|
111 |
+
results = self._google_drive_client.files().list(
|
112 |
+
q=f"name='{item_name}' and '{folder_id}' in parents",
|
113 |
+
fields='files(id,name,mimeType,modifiedTime)',
|
114 |
+
supportsAllDrives=True,
|
115 |
+
includeItemsFromAllDrives=True,
|
116 |
+
pageSize=1
|
117 |
+
).execute()
|
118 |
+
|
119 |
+
if files := results.get('files'):
|
120 |
+
return {
|
121 |
+
'id': files[0]['id'],
|
122 |
+
'name': files[0]['name'],
|
123 |
+
'modified': files[0]['modifiedTime'],
|
124 |
+
'mime_type': files[0]['mimeType']
|
125 |
+
}
|
126 |
+
except HttpError as e:
|
127 |
+
logger.error("Error getting metadata for item by path, {}: {}", item_path, e)
|
128 |
+
|
129 |
+
return None
|
130 |
+
|
131 |
+
def read_file_by_id(self: Self, file_id: str) -> Optional[bytes]:
|
132 |
+
"""Read contents of a file by its unique identifier."""
|
133 |
+
|
134 |
+
try:
|
135 |
+
request = self.service.files().get_media(fileId=file_id)
|
136 |
+
buffer = BytesIO()
|
137 |
+
downloader = MediaIoBaseDownload(buffer, request)
|
138 |
+
done = False
|
139 |
+
while not done:
|
140 |
+
_, done = downloader.next_chunk()
|
141 |
+
return buffer.getvalue()
|
142 |
+
except HttpError as e:
|
143 |
+
logger.error("Error reading file by ID, {}: {}", file_id, e)
|
144 |
+
return None
|