Spaces:
Runtime error
Runtime error
Partial code clean-up and logging enablement
Browse files- .dockerignore +3 -3
- .env.template +27 -11
- .gitignore +3 -3
- README.MD +7 -0
- pyproject.toml +1 -0
- src/ctp_slack_bot/api/main.py +21 -13
- src/ctp_slack_bot/core/config.py +32 -47
- src/ctp_slack_bot/core/response_rendering.py +13 -0
.dockerignore
CHANGED
@@ -59,11 +59,11 @@ venv.bak/
|
|
59 |
# PyCharm
|
60 |
.idea/
|
61 |
|
62 |
-
# Jupyter notebooks
|
63 |
-
notebooks/
|
64 |
-
|
65 |
# Documentation
|
66 |
docs/
|
67 |
|
68 |
# MacOS
|
69 |
.DS_Store
|
|
|
|
|
|
|
|
59 |
# PyCharm
|
60 |
.idea/
|
61 |
|
|
|
|
|
|
|
62 |
# Documentation
|
63 |
docs/
|
64 |
|
65 |
# MacOS
|
66 |
.DS_Store
|
67 |
+
|
68 |
+
# Application logs
|
69 |
+
/logs
|
.env.template
CHANGED
@@ -1,25 +1,41 @@
|
|
1 |
# Copy this file and modify. Do not save or commit the secrets!
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
# API Configuration
|
4 |
API_HOST=0.0.0.0
|
5 |
API_PORT=8000
|
6 |
-
DEBUG=false
|
7 |
-
|
8 |
-
# MongoDB Configuration
|
9 |
-
MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
|
10 |
-
MONGODB_DB_NAME=ctp_slack_bot
|
11 |
|
12 |
# Slack Configuration
|
13 |
SLACK_BOT_TOKEN=🪙
|
14 |
SLACK_SIGNING_SECRET=🔏
|
15 |
SLACK_APP_TOKEN=🦥
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
# Hugging Face Configuration
|
18 |
HF_API_TOKEN=🤗
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
1 |
# Copy this file and modify. Do not save or commit the secrets!
|
2 |
|
3 |
+
# Application Configuration
|
4 |
+
DEBUG=false
|
5 |
+
|
6 |
+
# Logging Configuration
|
7 |
+
LOG_LEVEL=INFO
|
8 |
+
LOG_FORMAT=text
|
9 |
+
|
10 |
+
# APScheduler Configuration
|
11 |
+
SCHEDULER_TIMEZONE=UTC
|
12 |
+
|
13 |
# API Configuration
|
14 |
API_HOST=0.0.0.0
|
15 |
API_PORT=8000
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Slack Configuration
|
18 |
SLACK_BOT_TOKEN=🪙
|
19 |
SLACK_SIGNING_SECRET=🔏
|
20 |
SLACK_APP_TOKEN=🦥
|
21 |
|
22 |
+
# Vectorization Configuration
|
23 |
+
EMBEDDING_MODEL=🌮
|
24 |
+
VECTOR_DIMENSION=9001
|
25 |
+
CHUNK_SIZE=42
|
26 |
+
CHUNK_OVERLAP=37
|
27 |
+
TOP_K_MATCHES=1
|
28 |
+
|
29 |
+
# MongoDB Configuration
|
30 |
+
MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
|
31 |
+
MONGODB_NAME=ctp_slack_bot
|
32 |
+
|
33 |
# Hugging Face Configuration
|
34 |
HF_API_TOKEN=🤗
|
35 |
|
36 |
+
# OpenAI Configuration
|
37 |
+
OPENAI_API_KEY=😐
|
38 |
+
CHAT_MODEL=🙊
|
39 |
+
MAX_TOKENS=42
|
40 |
+
TEMPERATURE=0.5
|
41 |
+
SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor."
|
.gitignore
CHANGED
@@ -91,8 +91,8 @@ dmypy.json
|
|
91 |
# PyCharm
|
92 |
.idea/
|
93 |
|
94 |
-
# Jupyter notebooks
|
95 |
-
notebooks/
|
96 |
-
|
97 |
# MacOS
|
98 |
.DS_Store
|
|
|
|
|
|
|
|
91 |
# PyCharm
|
92 |
.idea/
|
93 |
|
|
|
|
|
|
|
94 |
# MacOS
|
95 |
.DS_Store
|
96 |
+
|
97 |
+
# Application logs
|
98 |
+
/logs
|
README.MD
CHANGED
@@ -14,6 +14,7 @@
|
|
14 |
* `src/`
|
15 |
* `ctp_slack_bot/`
|
16 |
* `api/`: FastAPI application structure
|
|
|
17 |
* `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
|
18 |
* `db/`: database connection
|
19 |
* `repositories/`: repository pattern implementation
|
@@ -23,7 +24,9 @@
|
|
23 |
* `utils/`: reusable utilities
|
24 |
* `tests/`: unit tests
|
25 |
* `scripts/`: utility scripts for development, deployment, etc.
|
|
|
26 |
* `notebooks/`: Jupyter notebooks for exploration and model development
|
|
|
27 |
|
28 |
## How to Run the Application
|
29 |
|
@@ -41,6 +44,8 @@ First, make sure you are set up with a Python virtual environment created by the
|
|
41 |
pip3 install -e .
|
42 |
```
|
43 |
|
|
|
|
|
44 |
If `localhost` port `8000` is free, running the following will make the application available on that port:
|
45 |
|
46 |
```sh
|
@@ -54,4 +59,6 @@ $ curl http://localhost:8000/health
|
|
54 |
{"status":"healthy"}
|
55 |
```
|
56 |
|
|
|
|
|
57 |
Uvicorn will restart the application automatically when any source files are changed.
|
|
|
14 |
* `src/`
|
15 |
* `ctp_slack_bot/`
|
16 |
* `api/`: FastAPI application structure
|
17 |
+
* `routes.py`: API endpoint definitions
|
18 |
* `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
|
19 |
* `db/`: database connection
|
20 |
* `repositories/`: repository pattern implementation
|
|
|
24 |
* `utils/`: reusable utilities
|
25 |
* `tests/`: unit tests
|
26 |
* `scripts/`: utility scripts for development, deployment, etc.
|
27 |
+
* `run-dev.sh`: script to run the application locally
|
28 |
* `notebooks/`: Jupyter notebooks for exploration and model development
|
29 |
+
* `.env`: local environment variables for development purposes
|
30 |
|
31 |
## How to Run the Application
|
32 |
|
|
|
44 |
pip3 install -e .
|
45 |
```
|
46 |
|
47 |
+
Make a copy of `.env.template` as `.env` and define the environment variables. (You can also define them by other means, but this has the least friction.) This file should not be committed and is excluded by `.gitignore`!
|
48 |
+
|
49 |
If `localhost` port `8000` is free, running the following will make the application available on that port:
|
50 |
|
51 |
```sh
|
|
|
59 |
{"status":"healthy"}
|
60 |
```
|
61 |
|
62 |
+
In debug mode (`DEBUG=true`), [http://localhost:8000/env](http://localhost:8000/env) will pretty-print the non-sensitive environment variables as JSON.
|
63 |
+
|
64 |
Uvicorn will restart the application automatically when any source files are changed.
|
pyproject.toml
CHANGED
@@ -43,6 +43,7 @@ dev = [
|
|
43 |
"pytest>=7.3.1",
|
44 |
"pytest-cov>=4.1.0",
|
45 |
"mypy>=1.3.0",
|
|
|
46 |
"black>=23.3.0",
|
47 |
"isort>=5.12.0",
|
48 |
"ruff>=0.0.270",
|
|
|
43 |
"pytest>=7.3.1",
|
44 |
"pytest-cov>=4.1.0",
|
45 |
"mypy>=1.3.0",
|
46 |
+
"types-pytz>=2025.2",
|
47 |
"black>=23.3.0",
|
48 |
"isort>=5.12.0",
|
49 |
"ruff>=0.0.270",
|
src/ctp_slack_bot/api/main.py
CHANGED
@@ -1,23 +1,23 @@
|
|
1 |
-
import logging
|
2 |
from contextlib import asynccontextmanager
|
3 |
-
|
4 |
-
from fastapi import FastAPI
|
5 |
from loguru import logger
|
|
|
6 |
|
7 |
from ctp_slack_bot.api.routes import router
|
8 |
-
from ctp_slack_bot.core.config import settings
|
9 |
from ctp_slack_bot.core.logging import setup_logging
|
|
|
10 |
from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
|
11 |
|
12 |
|
13 |
@asynccontextmanager
|
14 |
-
async def lifespan(app: FastAPI):
|
15 |
"""
|
16 |
Lifespan context manager for FastAPI application.
|
17 |
Handles startup and shutdown events.
|
18 |
"""
|
19 |
# Setup logging
|
20 |
-
|
21 |
logger.info("Starting application")
|
22 |
|
23 |
# Start scheduler
|
@@ -42,11 +42,19 @@ app = FastAPI(
|
|
42 |
# Include routers
|
43 |
app.include_router(router)
|
44 |
|
45 |
-
|
46 |
@app.get("/health")
|
47 |
-
async def
|
48 |
-
"""Health check
|
49 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
|
52 |
if __name__ == "__main__":
|
@@ -54,7 +62,7 @@ if __name__ == "__main__":
|
|
54 |
|
55 |
uvicorn.run(
|
56 |
"main:app",
|
57 |
-
host=
|
58 |
-
port=
|
59 |
-
reload=
|
60 |
)
|
|
|
|
|
1 |
from contextlib import asynccontextmanager
|
2 |
+
from fastapi import FastAPI, HTTPException
|
|
|
3 |
from loguru import logger
|
4 |
+
from typing import AsyncGenerator, Never
|
5 |
|
6 |
from ctp_slack_bot.api.routes import router
|
7 |
+
from ctp_slack_bot.core.config import Settings, settings
|
8 |
from ctp_slack_bot.core.logging import setup_logging
|
9 |
+
from ctp_slack_bot.core.response_rendering import PrettyJSONResponse
|
10 |
from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler
|
11 |
|
12 |
|
13 |
@asynccontextmanager
|
14 |
+
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
15 |
"""
|
16 |
Lifespan context manager for FastAPI application.
|
17 |
Handles startup and shutdown events.
|
18 |
"""
|
19 |
# Setup logging
|
20 |
+
setup_logging()
|
21 |
logger.info("Starting application")
|
22 |
|
23 |
# Start scheduler
|
|
|
42 |
# Include routers
|
43 |
app.include_router(router)
|
44 |
|
|
|
45 |
@app.get("/health")
|
46 |
+
async def health() -> dict[str, str]:
|
47 |
+
"""Health check"""
|
48 |
+
return {
|
49 |
+
"status": "healthy"
|
50 |
+
}
|
51 |
+
|
52 |
+
@app.get("/env", response_class=PrettyJSONResponse)
|
53 |
+
async def env() -> Settings:
|
54 |
+
"""Server-internal environment variables"""
|
55 |
+
if not settings.DEBUG:
|
56 |
+
raise HTTPException(status_code=404)
|
57 |
+
return settings
|
58 |
|
59 |
|
60 |
if __name__ == "__main__":
|
|
|
62 |
|
63 |
uvicorn.run(
|
64 |
"main:app",
|
65 |
+
host=settings.API_HOST,
|
66 |
+
port=settings.API_PORT,
|
67 |
+
reload=settings.DEBUG
|
68 |
)
|
src/ctp_slack_bot/core/config.py
CHANGED
@@ -1,69 +1,54 @@
|
|
1 |
from functools import lru_cache
|
2 |
from typing import Literal, Optional
|
3 |
|
4 |
-
from pydantic import Field,
|
5 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
6 |
|
7 |
|
8 |
-
class Settings(BaseSettings):
|
9 |
"""
|
10 |
Application settings loaded from environment variables.
|
11 |
"""
|
12 |
-
#
|
13 |
-
API_HOST: str = "0.0.0.0"
|
14 |
-
API_PORT: int = 8000
|
15 |
DEBUG: bool = False
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
# Vectorization Configuration
|
18 |
-
EMBEDDING_MODEL: str
|
19 |
-
VECTOR_DIMENSION:
|
20 |
-
CHUNK_SIZE:
|
21 |
-
CHUNK_OVERLAP:
|
22 |
-
TOP_K_MATCHES:
|
23 |
|
24 |
# MongoDB Configuration
|
25 |
-
MONGODB_URI:
|
26 |
-
|
27 |
-
|
28 |
-
# Slack Configuration
|
29 |
-
SLACK_BOT_TOKEN: Optional[SecretStr] = None # TODO: Remove optionality
|
30 |
-
SLACK_SIGNING_SECRET: Optional[SecretStr] = None # TODO: Remove optionality
|
31 |
-
SLACK_APP_TOKEN: Optional[SecretStr] = None
|
32 |
-
|
33 |
# Hugging Face Configuration
|
34 |
HF_API_TOKEN: Optional[SecretStr] = None
|
35 |
|
36 |
# OpenAI Configuration
|
37 |
OPENAI_API_KEY: Optional[SecretStr] = None
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
# Chat Model Configuration
|
40 |
-
CHAT_MODEL: str = "gpt-3.5-turbo"
|
41 |
-
MAX_TOKENS: int = 150
|
42 |
-
TEMPERATURE: float = 0.8 # Maximum tokens for response generation
|
43 |
-
SYSTEM_PROMPT: str = """
|
44 |
-
You are a helpful teaching assistant for a data science class.
|
45 |
-
Based on the students question, you will be given context retreived from class transcripts and materials to answer their question.
|
46 |
-
Your responses should be:
|
47 |
-
1. Accurate and based on the class content
|
48 |
-
2. Clear and educational
|
49 |
-
3. Concise but complete
|
50 |
-
If you're unsure about something, acknowledge it and suggest asking the professor.
|
51 |
-
"""
|
52 |
-
|
53 |
-
# Logging Configuration
|
54 |
-
LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
|
55 |
-
LOG_FORMAT: Literal["text", "json"] = "json"
|
56 |
-
|
57 |
-
# APScheduler Configuration
|
58 |
-
SCHEDULER_TIMEZONE: str = "UTC"
|
59 |
-
|
60 |
-
@validator("MONGODB_URI")
|
61 |
-
def validate_mongodb_uri(cls, v):
|
62 |
-
"""Validate MongoDB URI format"""
|
63 |
-
#if not v.get_secret_value().startswith("mongodb"):
|
64 |
-
# raise ValueError("MONGODB_URI must be a valid MongoDB connection string")
|
65 |
-
return v
|
66 |
-
|
67 |
model_config = SettingsConfigDict(
|
68 |
env_file=".env",
|
69 |
env_file_encoding="utf-8",
|
@@ -76,7 +61,7 @@ def get_settings() -> Settings:
|
|
76 |
"""
|
77 |
Get cached settings instance.
|
78 |
"""
|
79 |
-
return Settings()
|
80 |
|
81 |
|
82 |
settings = get_settings()
|
|
|
1 |
from functools import lru_cache
|
2 |
from typing import Literal, Optional
|
3 |
|
4 |
+
from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr
|
5 |
from pydantic_settings import BaseSettings, SettingsConfigDict
|
6 |
|
7 |
|
8 |
+
class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies
|
9 |
"""
|
10 |
Application settings loaded from environment variables.
|
11 |
"""
|
12 |
+
# Application Configuration
|
|
|
|
|
13 |
DEBUG: bool = False
|
14 |
|
15 |
+
# Logging Configuration
|
16 |
+
LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default_factory=lambda data: "DEBUG" if data.get("DEBUG", False) else "INFO")
|
17 |
+
LOG_FORMAT: Literal["text", "json"] = "json"
|
18 |
+
|
19 |
+
# APScheduler Configuration
|
20 |
+
SCHEDULER_TIMEZONE: str = "UTC"
|
21 |
+
|
22 |
+
# API Configuration
|
23 |
+
API_HOST: str
|
24 |
+
API_PORT: PositiveInt
|
25 |
+
|
26 |
+
# Slack Configuration
|
27 |
+
SLACK_BOT_TOKEN: SecretStr
|
28 |
+
SLACK_SIGNING_SECRET: SecretStr
|
29 |
+
SLACK_APP_TOKEN: SecretStr
|
30 |
+
|
31 |
# Vectorization Configuration
|
32 |
+
EMBEDDING_MODEL: str
|
33 |
+
VECTOR_DIMENSION: PositiveInt
|
34 |
+
CHUNK_SIZE: PositiveInt
|
35 |
+
CHUNK_OVERLAP: NonNegativeInt
|
36 |
+
TOP_K_MATCHES: PositiveInt
|
37 |
|
38 |
# MongoDB Configuration
|
39 |
+
MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables.
|
40 |
+
MONGODB_NAME: str
|
41 |
+
|
|
|
|
|
|
|
|
|
|
|
42 |
# Hugging Face Configuration
|
43 |
HF_API_TOKEN: Optional[SecretStr] = None
|
44 |
|
45 |
# OpenAI Configuration
|
46 |
OPENAI_API_KEY: Optional[SecretStr] = None
|
47 |
+
CHAT_MODEL: str
|
48 |
+
MAX_TOKENS: PositiveInt
|
49 |
+
TEMPERATURE: NonNegativeFloat
|
50 |
+
SYSTEM_PROMPT: str
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
model_config = SettingsConfigDict(
|
53 |
env_file=".env",
|
54 |
env_file_encoding="utf-8",
|
|
|
61 |
"""
|
62 |
Get cached settings instance.
|
63 |
"""
|
64 |
+
return Settings() # type: ignore
|
65 |
|
66 |
|
67 |
settings = get_settings()
|
src/ctp_slack_bot/core/response_rendering.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from json import dumps
|
2 |
+
from starlette.responses import JSONResponse
|
3 |
+
from typing import Any, Self
|
4 |
+
|
5 |
+
class PrettyJSONResponse(JSONResponse):
|
6 |
+
def render(self: Self, content: Any) -> bytes:
|
7 |
+
return dumps(
|
8 |
+
content,
|
9 |
+
ensure_ascii=False,
|
10 |
+
allow_nan=False,
|
11 |
+
indent=4,
|
12 |
+
separators=(", ", ": "),
|
13 |
+
).encode("utf-8")
|