LiKenun commited on
Commit
6deb275
·
1 Parent(s): b1d0b85

`GoogleDriveService`

Browse files
.env.template CHANGED
@@ -3,15 +3,9 @@
3
  # APScheduler Configuration
4
  SCHEDULER_TIMEZONE=UTC
5
 
6
- # API Configuration
7
- API_HOST=0.0.0.0
8
- API_PORT=8000
9
-
10
  # Slack Configuration
11
  SLACK_BOT_TOKEN=🪙
12
- SLACK_SIGNING_SECRET=🔐
13
  SLACK_APP_TOKEN=🦥
14
- SLACK_USER_TOKEN=🦊
15
 
16
  # Vectorization Configuration
17
  EMBEDDING_MODEL=🌮
@@ -34,3 +28,10 @@ CHAT_MODEL=gpt-3.5-turbo
34
  MAX_TOKENS=150
35
  TEMPERATURE=0.8
36
  SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
 
 
 
 
 
 
 
 
3
  # APScheduler Configuration
4
  SCHEDULER_TIMEZONE=UTC
5
 
 
 
 
 
6
  # Slack Configuration
7
  SLACK_BOT_TOKEN=🪙
 
8
  SLACK_APP_TOKEN=🦥
 
9
 
10
  # Vectorization Configuration
11
  EMBEDDING_MODEL=🌮
 
28
  MAX_TOKENS=150
29
  TEMPERATURE=0.8
30
  SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
31
+
32
+ # Google Drive Configuration
33
+ GOOGLE_PROJECT_ID=insufferable-slacker-123456
34
+ GOOGLE_PRIVATE_KEY_ID=1a2b3c4d5e6f748891091d21304e506674829507
35
+ GOOGLE_PRIVATE_KEY="-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASC...\n-----END PRIVATE KEY-----\n"
36
+ GOOGLE_CLIENT_EMAIL=botty-bot@insufferable-slacker-123456.iam.gserviceaccount.com
37
+ GOOGLE_CLIENT_ID=123456789012345678901
pyproject.toml CHANGED
@@ -36,7 +36,9 @@ dependencies = [
36
  "slack_bolt>=1.23.0",
37
  "motor>=3.7.0",
38
  "openai>=1.70.0"
39
-
 
 
40
  ]
41
 
42
  [project.optional-dependencies]
 
36
  "slack_bolt>=1.23.0",
37
  "motor>=3.7.0",
38
  "openai>=1.70.0"
39
+ "google-api-python-client>=2.167.0",
40
+ "google-auth>=2.39.0",
41
+ "google-auth-oauthlib>=1.2.1"
42
  ]
43
 
44
  [project.optional-dependencies]
src/ctp_slack_bot/core/config.py CHANGED
@@ -39,7 +39,7 @@ class Settings(BaseSettings):
39
  SCORE_THRESHOLD: NonNegativeFloat
40
 
41
  # Hugging Face Configuration
42
- HF_API_TOKEN: Optional[SecretStr] = None
43
 
44
  # OpenAI Configuration
45
  OPENAI_API_KEY: SecretStr
@@ -48,6 +48,18 @@ class Settings(BaseSettings):
48
  TEMPERATURE: NonNegativeFloat
49
  SYSTEM_PROMPT: str
50
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  model_config = SettingsConfigDict(
52
  env_file=".env",
53
  env_file_encoding="utf-8",
 
39
  SCORE_THRESHOLD: NonNegativeFloat
40
 
41
  # Hugging Face Configuration
42
+ HF_API_TOKEN: Optional[SecretStr] = None # TODO: Currently, this is unused.
43
 
44
  # OpenAI Configuration
45
  OPENAI_API_KEY: SecretStr
 
48
  TEMPERATURE: NonNegativeFloat
49
  SYSTEM_PROMPT: str
50
 
51
+ # Google Drive Configuration
52
+ GOOGLE_PROJECT_ID: str
53
+ GOOGLE_PRIVATE_KEY_ID: SecretStr
54
+ GOOGLE_PRIVATE_KEY: SecretStr
55
+ GOOGLE_CLIENT_ID: str
56
+ GOOGLE_CLIENT_EMAIL: str
57
+ GOOGLE_AUTH_URI: str = "https://accounts.google.com/o/oauth2/auth"
58
+ GOOGLE_TOKEN_URI: str = "https://oauth2.googleapis.com/token"
59
+ GOOGLE_AUTH_PROVIDER_CERT_URL: str = "https://www.googleapis.com/oauth2/v1/certs"
60
+ GOOGLE_CLIENT_CERT_URL: str = "https://www.googleapis.com/robot/v1/metadata/x509/ctp-slack-bot-714%40voltaic-reducer-294821.iam.gserviceaccount.com"
61
+ GOOGLE_UNIVERSE_DOMAIN: str = "googleapis.com"
62
+
63
  model_config = SettingsConfigDict(
64
  env_file=".env",
65
  env_file_encoding="utf-8",
src/ctp_slack_bot/services/google_drive_service.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from google.oauth2 import service_account
3
+ from googleapiclient.discovery import build
4
+ from googleapiclient.http import MediaIoBaseDownload
5
+ from googleapiclient.errors import HttpError
6
+ from io import BytesIO
7
+ from loguru import logger
8
+ from pydantic import BaseModel, PrivateAttr
9
+ from typing import Collection, Dict, List, Optional, Self
10
+
11
+ from ctp_slack_bot.core import Settings
12
+
13
+ FOLDER_MIME_TYPE: str = "application/vnd.google-apps.folder"
14
+ ROOT_FOLDER_NAME: str = "root"
15
+
16
+ class GoogleDriveService(BaseModel):
17
+ """Service for interacting with Google Drive."""
18
+
19
+ settings: Settings
20
+ _google_drive_client: PrivateAttr = PrivateAttr()
21
+ _folder_cache: PrivateAttr = PrivateAttr(default_factory=dict)
22
+
23
+ class Config:
24
+ frozen=True
25
+
26
+ def __init__(self: Self, **data) -> None:
27
+ super().__init__(**data)
28
+ credentials = service_account.Credentials.from_service_account_info({
29
+ "type": "service_account",
30
+ "project_id": self.settings.GOOGLE_PROJECT_ID,
31
+ "private_key_id": self.settings.GOOGLE_PRIVATE_KEY_ID,
32
+ "private_key": self.settings.GOOGLE_PRIVATE_KEY,
33
+ "client_email": self.settings.GOOGLE_CLIENT_EMAIL,
34
+ "client_id": self.settings.GOOGLE_CLIENT_ID,
35
+ "token_uri": self.settings.GOOGLE_TOKEN_URI,
36
+ }, scopes=['https://www.googleapis.com/auth/drive'])
37
+ self._google_drive_client = build('drive', 'v3', credentials=credentials)
38
+ logger.debug("Created {}", self.__class__.__name__)
39
+
40
+ def _resolve_folder_id(self: Self, folder_path: str) -> Optional[str]:
41
+ """Resolve a folder path to a Google Drive ID."""
42
+
43
+ if not folder_path:
44
+ return ROOT_FOLDER_NAME
45
+
46
+ if folder_path in self._folder_cache:
47
+ return self._folder_cache[folder_path]
48
+
49
+ current_id = ROOT_FOLDER_NAME
50
+ for part in folder_path.split('/'):
51
+ if not part:
52
+ continue
53
+ try:
54
+ results = self._google_drive_client.files().list(
55
+ q=f"name='{part}' and mimeType='{FOLDER_MIME_TYPE}' and '{current_id}' in parents",
56
+ fields='files(id,name)',
57
+ supportsAllDrives=True,
58
+ includeItemsFromAllDrives=True
59
+ ).execute()
60
+ if not results.get('files'):
61
+ return None
62
+ current_id = results['files'][0]['id']
63
+ except HttpError as e:
64
+ logger.error("Error resolving folder path: {}", folder_path)
65
+ return None
66
+
67
+ self._folder_cache[folder_path] = current_id
68
+ return current_id
69
+
70
+ def list_directory(self: Self, folder_path: str) -> List[Dict]:
71
+ """List contents of a directory with basic metadata."""
72
+
73
+ folder_id = self._resolve_folder_id(folder_path)
74
+ if not folder_id:
75
+ return []
76
+ try:
77
+ results = self._google_drive_client.files().list(
78
+ q=f"'{folder_id}' in parents",
79
+ fields='files(id,name,mimeType,modifiedTime)',
80
+ supportsAllDrives=True,
81
+ includeItemsFromAllDrives=True,
82
+ pageSize=1000
83
+ ).execute()
84
+
85
+ return [{
86
+ 'id': f['id'],
87
+ 'name': f['name'],
88
+ 'modified': f['modifiedTime'],
89
+ 'mime_type': f['mimeType']
90
+ } for f in results.get('files', [])]
91
+ except HttpError as e:
92
+ logger.error("Error listing folder by path, {}: {}", folder_path, e)
93
+ return []
94
+
95
+ def get_metadata(self: Self, item_path: str) -> Optional[Dict]:
96
+ """Get metadata for a specific file/folder by path."""
97
+
98
+ if '/' not in item_path:
99
+ folder_id = ROOT_FOLDER_NAME
100
+ item_name = item_path
101
+ else:
102
+ parts = item_path.split('/')
103
+ item_name = parts[-1]
104
+ folder_path = '/'.join(parts[:-1])
105
+ folder_id = self._resolve_folder_id(folder_path)
106
+
107
+ if not folder_id:
108
+ return None
109
+
110
+ try:
111
+ results = self._google_drive_client.files().list(
112
+ q=f"name='{item_name}' and '{folder_id}' in parents",
113
+ fields='files(id,name,mimeType,modifiedTime)',
114
+ supportsAllDrives=True,
115
+ includeItemsFromAllDrives=True,
116
+ pageSize=1
117
+ ).execute()
118
+
119
+ if files := results.get('files'):
120
+ return {
121
+ 'id': files[0]['id'],
122
+ 'name': files[0]['name'],
123
+ 'modified': files[0]['modifiedTime'],
124
+ 'mime_type': files[0]['mimeType']
125
+ }
126
+ except HttpError as e:
127
+ logger.error("Error getting metadata for item by path, {}: {}", item_path, e)
128
+
129
+ return None
130
+
131
+ def read_file_by_id(self: Self, file_id: str) -> Optional[bytes]:
132
+ """Read contents of a file by its unique identifier."""
133
+
134
+ try:
135
+ request = self.service.files().get_media(fileId=file_id)
136
+ buffer = BytesIO()
137
+ downloader = MediaIoBaseDownload(buffer, request)
138
+ done = False
139
+ while not done:
140
+ _, done = downloader.next_chunk()
141
+ return buffer.getvalue()
142
+ except HttpError as e:
143
+ logger.error("Error reading file by ID, {}: {}", file_id, e)
144
+ return None