LiKenun commited on
Commit
64566ca
·
1 Parent(s): b6ce87e

Partial code clean-up and logging enablement

Browse files
.dockerignore CHANGED
@@ -59,11 +59,11 @@ venv.bak/
59
  # PyCharm
60
  .idea/
61
 
62
- # Jupyter notebooks
63
- notebooks/
64
-
65
  # Documentation
66
  docs/
67
 
68
  # MacOS
69
  .DS_Store
 
 
 
 
59
  # PyCharm
60
  .idea/
61
 
 
 
 
62
  # Documentation
63
  docs/
64
 
65
  # MacOS
66
  .DS_Store
67
+
68
+ # Application logs
69
+ /logs
.env.template CHANGED
@@ -1,25 +1,41 @@
1
  # Copy this file and modify. Do not save or commit the secrets!
2
 
 
 
 
 
 
 
 
 
 
 
3
  # API Configuration
4
  API_HOST=0.0.0.0
5
  API_PORT=8000
6
- DEBUG=false
7
-
8
- # MongoDB Configuration
9
- MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
10
- MONGODB_DB_NAME=ctp_slack_bot
11
 
12
  # Slack Configuration
13
  SLACK_BOT_TOKEN=🪙
14
  SLACK_SIGNING_SECRET=🔏
15
  SLACK_APP_TOKEN=🦥
16
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Hugging Face Configuration
18
  HF_API_TOKEN=🤗
19
 
20
- # Logging Configuration
21
- LOG_LEVEL=INFO
22
- LOG_FORMAT=json
23
-
24
- # APScheduler Configuration
25
- SCHEDULER_TIMEZONE=UTC
 
1
  # Copy this file and modify. Do not save or commit the secrets!
2
 
3
+ # Application Configuration
4
+ DEBUG=false
5
+
6
+ # Logging Configuration
7
+ LOG_LEVEL=INFO
8
+ LOG_FORMAT=text
9
+
10
+ # APScheduler Configuration
11
+ SCHEDULER_TIMEZONE=UTC
12
+
13
  # API Configuration
14
  API_HOST=0.0.0.0
15
  API_PORT=8000
 
 
 
 
 
16
 
17
  # Slack Configuration
18
  SLACK_BOT_TOKEN=🪙
19
  SLACK_SIGNING_SECRET=🔏
20
  SLACK_APP_TOKEN=🦥
21
 
22
+ # Vectorization Configuration
23
+ EMBEDDING_MODEL=🌮
24
+ VECTOR_DIMENSION=9001
25
+ CHUNK_SIZE=42
26
+ CHUNK_OVERLAP=37
27
+ TOP_K_MATCHES=1
28
+
29
+ # MongoDB Configuration
30
+ MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
31
+ MONGODB_NAME=ctp_slack_bot
32
+
33
  # Hugging Face Configuration
34
  HF_API_TOKEN=🤗
35
 
36
+ # OpenAI Configuration
37
+ OPENAI_API_KEY=😐
38
+ CHAT_MODEL=🙊
39
+ MAX_TOKENS=42
40
+ TEMPERATURE=0.5
41
+ SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
.gitignore CHANGED
@@ -91,8 +91,8 @@ dmypy.json
91
  # PyCharm
92
  .idea/
93
 
94
- # Jupyter notebooks
95
- notebooks/
96
-
97
  # MacOS
98
  .DS_Store
 
 
 
 
91
  # PyCharm
92
  .idea/
93
 
 
 
 
94
  # MacOS
95
  .DS_Store
96
+
97
+ # Application logs
98
+ /logs
README.MD CHANGED
@@ -14,6 +14,7 @@
14
  * `src/`
15
  * `ctp_slack_bot/`
16
  * `api/`: FastAPI application structure
 
17
  * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
18
  * `db/`: database connection
19
  * `repositories/`: repository pattern implementation
@@ -23,7 +24,9 @@
23
  * `utils/`: reusable utilities
24
  * `tests/`: unit tests
25
  * `scripts/`: utility scripts for development, deployment, etc.
 
26
  * `notebooks/`: Jupyter notebooks for exploration and model development
 
27
 
28
  ## How to Run the Application
29
 
@@ -41,6 +44,8 @@ First, make sure you are set up with a Python virtual environment created by the
41
  pip3 install -e .
42
  ```
43
 
 
 
44
  If `localhost` port `8000` is free, running the following will make the application available on that port:
45
 
46
  ```sh
@@ -54,4 +59,6 @@ $ curl http://localhost:8000/health
54
  {"status":"healthy"}
55
  ```
56
 
 
 
57
  Uvicorn will restart the application automatically when any source files are changed.
 
14
  * `src/`
15
  * `ctp_slack_bot/`
16
  * `api/`: FastAPI application structure
17
+ * `routes.py`: API endpoint definitions
18
  * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
19
  * `db/`: database connection
20
  * `repositories/`: repository pattern implementation
 
24
  * `utils/`: reusable utilities
25
  * `tests/`: unit tests
26
  * `scripts/`: utility scripts for development, deployment, etc.
27
+ * `run-dev.sh`: script to run the application locally
28
  * `notebooks/`: Jupyter notebooks for exploration and model development
29
+ * `.env`: local environment variables for development purposes
30
 
31
  ## How to Run the Application
32
 
 
44
  pip3 install -e .
45
  ```
46
 
47
+ Make a copy of `.env.template` as `.env` and define the environment variables. (You can also define them by other means, but this has the least friction.) This file should not be committed and is excluded by `.gitignore`!
48
+
49
  If `localhost` port `8000` is free, running the following will make the application available on that port:
50
 
51
  ```sh
 
59
  {"status":"healthy"}
60
  ```
61
 
62
+ In debug mode (`DEBUG=true`), [http://localhost:8000/env](http://localhost:8000/env) will pretty-print the non-sensitive environment variables as JSON.
63
+
64
  Uvicorn will restart the application automatically when any source files are changed.
pyproject.toml CHANGED
@@ -43,6 +43,7 @@ dev = [
43
  "pytest>=7.3.1",
44
  "pytest-cov>=4.1.0",
45
  "mypy>=1.3.0",
 
46
  "black>=23.3.0",
47
  "isort>=5.12.0",
48
  "ruff>=0.0.270",
 
43
  "pytest>=7.3.1",
44
  "pytest-cov>=4.1.0",
45
  "mypy>=1.3.0",
46
+ "types-pytz>=2025.2",
47
  "black>=23.3.0",
48
  "isort>=5.12.0",
49
  "ruff>=0.0.270",
src/ctp_slack_bot/api/main.py CHANGED
@@ -1,23 +1,23 @@
1
- import logging
2
  from contextlib import asynccontextmanager
3
-
4
- from fastapi import FastAPI
5
  from loguru import logger
 
6
 
7
  from ctp_slack_bot.api.routes import router
8
- from ctp_slack_bot.core.config import settings
9
  from ctp_slack_bot.core.logging import setup_logging
 
10
  from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
11
 
12
 
13
  @asynccontextmanager
14
- async def lifespan(app: FastAPI):
15
  """
16
  Lifespan context manager for FastAPI application.
17
  Handles startup and shutdown events.
18
  """
19
  # Setup logging
20
- #setup_logging()
21
  logger.info("Starting application")
22
 
23
  # Start scheduler
@@ -42,11 +42,19 @@ app = FastAPI(
42
  # Include routers
43
  app.include_router(router)
44
 
45
-
46
  @app.get("/health")
47
- async def health_check():
48
- """Health check endpoint"""
49
- return {"status": "healthy"}
 
 
 
 
 
 
 
 
 
50
 
51
 
52
  if __name__ == "__main__":
@@ -54,7 +62,7 @@ if __name__ == "__main__":
54
 
55
  uvicorn.run(
56
  "main:app",
57
- host="localhost", #settings.API_HOST,
58
- port=8000, #settings.API_PORT,
59
- reload=True #settings.DEBUG,
60
  )
 
 
1
  from contextlib import asynccontextmanager
2
+ from fastapi import FastAPI, HTTPException
 
3
  from loguru import logger
4
+ from typing import AsyncGenerator, Never
5
 
6
  from ctp_slack_bot.api.routes import router
7
+ from ctp_slack_bot.core.config import Settings, settings
8
  from ctp_slack_bot.core.logging import setup_logging
9
+ from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
10
  from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
11
 
12
 
13
  @asynccontextmanager
14
+ async def lifespan(app: FastAPI) -> AsyncGenerator:
15
  """
16
  Lifespan context manager for FastAPI application.
17
  Handles startup and shutdown events.
18
  """
19
  # Setup logging
20
+ setup_logging()
21
  logger.info("Starting application")
22
 
23
  # Start scheduler
 
42
  # Include routers
43
  app.include_router(router)
44
 
 
45
  @app.get("/health")
46
+ async def health() -> dict[str, str]:
47
+ """Health check"""
48
+ return {
49
+ "status": "healthy"
50
+ }
51
+
52
+ @app.get("/env", response_class=PrettyJSONResponse)
53
+ async def env() -> Settings:
54
+ """Server-internal environment variables"""
55
+ if not settings.DEBUG:
56
+ raise HTTPException(status_code=404)
57
+ return settings
58
 
59
 
60
  if __name__ == "__main__":
 
62
 
63
  uvicorn.run(
64
  "main:app",
65
+ host=settings.API_HOST,
66
+ port=settings.API_PORT,
67
+ reload=settings.DEBUG
68
  )
src/ctp_slack_bot/core/config.py CHANGED
@@ -1,69 +1,54 @@
1
  from functools import lru_cache
2
  from typing import Literal, Optional
3
 
4
- from pydantic import Field, SecretStr, validator
5
  from pydantic_settings import BaseSettings, SettingsConfigDict
6
 
7
 
8
- class Settings(BaseSettings):
9
  """
10
  Application settings loaded from environment variables.
11
  """
12
- # API Configuration
13
- API_HOST: str = "0.0.0.0"
14
- API_PORT: int = 8000
15
  DEBUG: bool = False
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Vectorization Configuration
18
- EMBEDDING_MODEL: str = "text-embedding-3-small"
19
- VECTOR_DIMENSION: int = 1536
20
- CHUNK_SIZE: int = 1000
21
- CHUNK_OVERLAP: int = 200
22
- TOP_K_MATCHES: int = 5
23
 
24
  # MongoDB Configuration
25
- MONGODB_URI: Optional[SecretStr] = None # TODO: Remove optionality
26
- MONGODB_DB_NAME: str = "ctp_slack_bot"
27
-
28
- # Slack Configuration
29
- SLACK_BOT_TOKEN: Optional[SecretStr] = None # TODO: Remove optionality
30
- SLACK_SIGNING_SECRET: Optional[SecretStr] = None # TODO: Remove optionality
31
- SLACK_APP_TOKEN: Optional[SecretStr] = None
32
-
33
  # Hugging Face Configuration
34
  HF_API_TOKEN: Optional[SecretStr] = None
35
 
36
  # OpenAI Configuration
37
  OPENAI_API_KEY: Optional[SecretStr] = None
 
 
 
 
38
 
39
- # Chat Model Configuration
40
- CHAT_MODEL: str = "gpt-3.5-turbo"
41
- MAX_TOKENS: int = 150
42
- TEMPERATURE: float = 0.8 # Maximum tokens for response generation
43
- SYSTEM_PROMPT: str = """
44
- You are a helpful teaching assistant for a data science class.
45
- Based on the students question, you will be given context retreived from class transcripts and materials to answer their question.
46
- Your responses should be:
47
- 1. Accurate and based on the class content
48
- 2. Clear and educational
49
- 3. Concise but complete
50
- If you're unsure about something, acknowledge it and suggest asking the professor.
51
- """
52
-
53
- # Logging Configuration
54
- LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
55
- LOG_FORMAT: Literal["text", "json"] = "json"
56
-
57
- # APScheduler Configuration
58
- SCHEDULER_TIMEZONE: str = "UTC"
59
-
60
- @validator("MONGODB_URI")
61
- def validate_mongodb_uri(cls, v):
62
- """Validate MongoDB URI format"""
63
- #if not v.get_secret_value().startswith("mongodb"):
64
- # raise ValueError("MONGODB_URI must be a valid MongoDB connection string")
65
- return v
66
-
67
  model_config = SettingsConfigDict(
68
  env_file=".env",
69
  env_file_encoding="utf-8",
@@ -76,7 +61,7 @@ def get_settings() -> Settings:
76
  """
77
  Get cached settings instance.
78
  """
79
- return Settings()
80
 
81
 
82
  settings = get_settings()
 
1
  from functools import lru_cache
2
  from typing import Literal, Optional
3
 
4
+ from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
5
  from pydantic_settings import BaseSettings, SettingsConfigDict
6
 
7
 
8
+ class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies
9
  """
10
  Application settings loaded from environment variables.
11
  """
12
+ # Application Configuration
 
 
13
  DEBUG: bool = False
14
 
15
+ # Logging Configuration
16
+ LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default_factory=lambda data: "DEBUG" if data.get("DEBUG", False) else "INFO")
17
+ LOG_FORMAT: Literal["text", "json"] = "json"
18
+
19
+ # APScheduler Configuration
20
+ SCHEDULER_TIMEZONE: str = "UTC"
21
+
22
+ # API Configuration
23
+ API_HOST: str
24
+ API_PORT: PositiveInt
25
+
26
+ # Slack Configuration
27
+ SLACK_BOT_TOKEN: SecretStr
28
+ SLACK_SIGNING_SECRET: SecretStr
29
+ SLACK_APP_TOKEN: SecretStr
30
+
31
  # Vectorization Configuration
32
+ EMBEDDING_MODEL: str
33
+ VECTOR_DIMENSION: PositiveInt
34
+ CHUNK_SIZE: PositiveInt
35
+ CHUNK_OVERLAP: NonNegativeInt
36
+ TOP_K_MATCHES: PositiveInt
37
 
38
  # MongoDB Configuration
39
+ MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
40
+ MONGODB_NAME: str
41
+
 
 
 
 
 
42
  # Hugging Face Configuration
43
  HF_API_TOKEN: Optional[SecretStr] = None
44
 
45
  # OpenAI Configuration
46
  OPENAI_API_KEY: Optional[SecretStr] = None
47
+ CHAT_MODEL: str
48
+ MAX_TOKENS: PositiveInt
49
+ TEMPERATURE: NonNegativeFloat
50
+ SYSTEM_PROMPT: str
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  model_config = SettingsConfigDict(
53
  env_file=".env",
54
  env_file_encoding="utf-8",
 
61
  """
62
  Get cached settings instance.
63
  """
64
+ return Settings() # type: ignore
65
 
66
 
67
  settings = get_settings()
src/ctp_slack_bot/core/response_rendering.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from json import dumps
2
+ from starlette.responses import JSONResponse
3
+ from typing import Any, Self
4
+
5
+ class PrettyJSONResponse(JSONResponse):
6
+ def render(self: Self, content: Any) -> bytes:
7
+ return dumps(
8
+ content,
9
+ ensure_ascii=False,
10
+ allow_nan=False,
11
+ indent=4,
12
+ separators=(", ", ": "),
13
+ ).encode("utf-8")