LiKenun commited on
Commit
005a292
·
1 Parent(s): 616eb03

Provisional project structure

Browse files
.dockerignore ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git
3
+ .gitignore
4
+
5
+ # CI
6
+ .github/
7
+
8
+ # Docker
9
+ Dockerfile
10
+ .dockerignore
11
+
12
+ # Python
13
+ __pycache__/
14
+ *.py[cod]
15
+ *$py.class
16
+ *.so
17
+ .Python
18
+ build/
19
+ develop-eggs/
20
+ dist/
21
+ downloads/
22
+ eggs/
23
+ .eggs/
24
+ lib/
25
+ lib64/
26
+ parts/
27
+ sdist/
28
+ var/
29
+ wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+
34
+ # Unit test / coverage reports
35
+ htmlcov/
36
+ .tox/
37
+ .nox/
38
+ .coverage
39
+ .coverage.*
40
+ .cache
41
+ nosetests.xml
42
+ coverage.xml
43
+ *.cover
44
+ .hypothesis/
45
+ .pytest_cache/
46
+
47
+ # Environments
48
+ .env
49
+ .venv
50
+ env/
51
+ venv/
52
+ ENV/
53
+ env.bak/
54
+ venv.bak/
55
+
56
+ # VS Code
57
+ .vscode/
58
+
59
+ # PyCharm
60
+ .idea/
61
+
62
+ # Jupyter notebooks
63
+ notebooks/
64
+
65
+ # Documentation
66
+ docs/
67
+
68
+ # MacOS
69
+ .DS_Store
.env.template ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copy this file and modify. Do not save or commit the secrets!
2
+
3
+ # API Configuration
4
+ API_HOST=0.0.0.0
5
+ API_PORT=8000
6
+ DEBUG=false
7
+
8
+ # MongoDB Configuration
9
+ MONGODB_URI=mongodb+srv://username:[email protected]/database?retryWrites=true&w=majority
10
+ MONGODB_DB_NAME=ctp_slack_bot
11
+
12
+ # Slack Configuration
13
+ SLACK_BOT_TOKEN=🪙
14
+ SLACK_SIGNING_SECRET=🔏
15
+ SLACK_APP_TOKEN=🦥
16
+
17
+ # Hugging Face Configuration
18
+ HF_API_TOKEN=🤗
19
+
20
+ # Logging Configuration
21
+ LOG_LEVEL=INFO
22
+ LOG_FORMAT=json
23
+
24
+ # APScheduler Configuration
25
+ SCHEDULER_TIMEZONE=UTC
.gitignore ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+ MANIFEST
27
+
28
+ # PyInstaller
29
+ *.manifest
30
+ *.spec
31
+
32
+ # Installer logs
33
+ pip-log.txt
34
+ pip-delete-this-directory.txt
35
+
36
+ # Unit test / coverage reports
37
+ htmlcov/
38
+ .tox/
39
+ .nox/
40
+ .coverage
41
+ .coverage.*
42
+ .cache
43
+ nosetests.xml
44
+ coverage.xml
45
+ *.cover
46
+ .hypothesis/
47
+ .pytest_cache/
48
+
49
+ # Sphinx documentation
50
+ docs/_build/
51
+
52
+ # PyBuilder
53
+ target/
54
+
55
+ # Jupyter Notebook
56
+ .ipynb_checkpoints
57
+
58
+ # IPython
59
+ profile_default/
60
+ ipython_config.py
61
+
62
+ # pyenv
63
+ .python-version
64
+
65
+ # celery beat schedule file
66
+ celerybeat-schedule
67
+
68
+ # Environments
69
+ .env
70
+ .venv
71
+ env/
72
+ venv/
73
+ ENV/
74
+ env.bak/
75
+ venv.bak/
76
+
77
+ # mkdocs documentation
78
+ /site
79
+
80
+ # mypy
81
+ .mypy_cache/
82
+ .dmypy.json
83
+ dmypy.json
84
+
85
+ # Pyre type checker
86
+ .pyre/
87
+
88
+ # VS Code
89
+ .vscode/
90
+
91
+ # PyCharm
92
+ .idea/
93
+
94
+ # Jupyter notebooks
95
+ notebooks/
96
+
97
+ # MacOS
98
+ .DS_Store
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Set environment variables.
6
+ ENV PYTHONDONTWRITEBYTECODE=1 \
7
+ PYTHONUNBUFFERED=1 \
8
+ PYTHONPATH=/app
9
+
10
+ # Install system dependencies.
11
+ RUN apt-get update \
12
+ && apt-get install -y --no-install-recommends build-essential \
13
+ && apt-get clean \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # Copy project files.
17
+ COPY pyproject.toml README.md ./
18
+ COPY src/ ./src/
19
+
20
+ # Install Python dependencies.
21
+ RUN pip install --no-cache-dir --upgrade pip \
22
+ && pip install --no-cache-dir .
23
+
24
+ # Create a non-root user and switch to it.
25
+ RUN useradd -m appuser
26
+ USER appuser
27
+
28
+ # Run the application.
29
+ CMD ["uvicorn", "src.ctp_slack_bot.api.main:app", "--host", "0.0.0.0", "--port", "8000"]
README.MD ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CTP Slack Bot
2
+
3
+
4
+ ## Dependencies
5
+
6
+ * Python 3.12
7
+
8
+ ### Development-specific
9
+
10
+ * Docker
11
+
12
+ ## General Project Structure
13
+
14
+ * `src/`
15
+ * `ctp_slack_bot/`
16
+ * `api/`: FastAPI application structure
17
+ * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions
18
+ * `db/`: database connection
19
+ * `repositories/`: repository pattern implementation
20
+ * `models/`: Pydantic models for data validation and serialization
21
+ * `services/`: business logic
22
+ * `tasks/`: background scheduled jobs
23
+ * `utils/`: reusable utilities
24
+ * `tests/`: unit tests
25
+ * `scripts/`: utility scripts for development, deployment, etc.
26
+ * `notebooks/`: Jupyter notebooks for exploration and model development
README.md DELETED
@@ -1,2 +0,0 @@
1
- # 2025-Spring-BAI
2
- Spring 2025's Building AI Applications
 
 
 
pyproject.toml ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "ctp-slack-bot"
7
+ version = "0.1.0"
8
+ description = "A Slack bot for processing and analyzing Zoom transcripts using AI"
9
+ readme = "README.md"
10
+ requires-python = ">=3.12"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Your Name", email = "[email protected]"}
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.12",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ ]
21
+ dependencies = [
22
+ "pydantic>=2.0.0",
23
+ "fastapi>=0.100.0",
24
+ "uvicorn>=0.22.0",
25
+ "loguru>=0.7.0",
26
+ "python-dotenv>=1.0.0",
27
+ "httpx>=0.24.1",
28
+ "tenacity>=8.2.2",
29
+ "pybreaker>=1.0.2",
30
+ "apscheduler>=3.10.1",
31
+ "slack-sdk>=3.21.3",
32
+ "pymongo>=4.4.1",
33
+ "webvtt-py>=0.4.6",
34
+ "langchain>=0.0.200",
35
+ "transformers>=4.30.0",
36
+ "torch>=2.0.0",
37
+ ]
38
+
39
+ [project.optional-dependencies]
40
+ dev = [
41
+ "pytest>=7.3.1",
42
+ "pytest-cov>=4.1.0",
43
+ "mypy>=1.3.0",
44
+ "black>=23.3.0",
45
+ "isort>=5.12.0",
46
+ "ruff>=0.0.270",
47
+ ]
48
+
49
+ [project.urls]
50
+ "Homepage" = "https://github.com/yourusername/ctp-slack-bot"
51
+ "Bug Tracker" = "https://github.com/yourusername/ctp-slack-bot/issues"
52
+
53
+ [tool.setuptools]
54
+ package-dir = {"" = "src"}
55
+
56
+ [tool.mypy]
57
+ python_version = "3.12"
58
+ warn_return_any = true
59
+ warn_unused_configs = true
60
+ disallow_untyped_defs = true
61
+ disallow_incomplete_defs = true
62
+ check_untyped_defs = true
63
+ disallow_untyped_decorators = true
64
+ no_implicit_optional = true
65
+ strict_optional = true
66
+
67
+ [[tool.mypy.overrides]]
68
+ module = ["apscheduler.*", "webvtt.*"]
69
+ ignore_missing_imports = true
70
+
71
+ [tool.pytest.ini_options]
72
+ testpaths = ["tests"]
73
+ python_files = "test_*.py"
74
+ python_classes = "Test*"
75
+ python_functions = "test_*"
76
+
77
+ [tool.black]
78
+ line-length = 88
79
+ target-version = ['py312']
80
+ include = '\.pyi?$'
81
+
82
+ [tool.isort]
83
+ profile = "black"
84
+ line_length = 88
85
+
86
+ [tool.ruff]
87
+ line-length = 88
88
+ target-version = "py312"
89
+ select = ["E", "F", "B", "I"]
90
+ ignore = []
setup.py ADDED
File without changes
src/ctp_slack_bot/__init__.py ADDED
File without changes
src/ctp_slack_bot/api/__init__.py ADDED
File without changes
src/ctp_slack_bot/api/app.py ADDED
File without changes
src/ctp_slack_bot/api/routes/__init__.py ADDED
File without changes
src/ctp_slack_bot/core/__init__.py ADDED
File without changes
src/ctp_slack_bot/db/__init__.py ADDED
File without changes
src/ctp_slack_bot/db/repositories/__init__.py ADDED
File without changes
src/ctp_slack_bot/models/__init__.py ADDED
File without changes
src/ctp_slack_bot/models/base.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from datetime import datetime
3
+ from pydantic import BaseModel, Field, validator
4
+ from typing import Dict, List, Optional, Union, Any, ClassVar
5
+ import hashlib
6
+ import json
7
+
8
+
9
+ class Metadata(BaseModel):
10
+ """A class representing metadata about content."""
11
+
12
+ id: str # The content’s identity consistent across modifications
13
+ modification_time: datetime # The content’s modification for detection of alterations
14
+ hash: str # The content’s hash for detection of alterations
15
+
16
+
17
+ class Content(BaseModel):
18
+ """A class representing ingested content."""
19
+
20
+ metadata: Metadata
21
+
22
+
23
+
24
+ class Ingestible(ABC, BaseModel):
25
+ """An abstract base class for ingestible content."""
26
+
27
+ metadata: Metadata
28
+
29
+ @property
30
+ @abstractmethod
31
+ def content(self) -> Content:
32
+ """
33
+ Return content ready for vectorization.
34
+
35
+ This could be:
36
+ - A single string
37
+ - A list of strings (pre-chunked)
38
+ - A more complex structure that can be recursively processed
39
+ """
40
+ pass
41
+
42
+ def get_chunks(self) -> List[str]:
43
+ """
44
+ Split content into chunks suitable for vectorization.
45
+ Override this in subclasses for specialized chunking logic.
46
+ """
47
+ content = self.content
48
+ if isinstance(content, str):
49
+ # Simple chunking by character count
50
+ return [content[i:i+self.chunk_size]
51
+ for i in range(0, len(content), self.chunk_size)]
52
+ elif isinstance(content, list):
53
+ # Content is already chunked
54
+ return content
55
+ else:
56
+ raise ValueError(f"Unsupported content type: {type(content)}")
57
+
58
+ @property
59
+ def key(self) -> str:
60
+ """Convenience accessor for the metadata key."""
61
+ return self.metadata.key
src/ctp_slack_bot/models/slack.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from typing import Optional, List, Dict, Any
3
+
4
+ class SlackMessage(BaseModel):
5
+ """Represents a message from Slack after adaptation."""
6
+ channel_id: str
7
+ user_id: str
8
+ text: str
9
+ thread_ts: Optional[str] = None
10
+ timestamp: str
11
+ is_question: bool = False
12
+
13
+ @property
14
+ def key(self) -> str:
15
+ """Unique identifier for this message."""
16
+ return f"slack:{self.channel_id}:{self.timestamp}"
src/ctp_slack_bot/services/__init__.py ADDED
File without changes
src/ctp_slack_bot/tasks/__init__.py ADDED
File without changes
src/ctp_slack_bot/utils/__init__.py ADDED
File without changes
src/ctp_slack_bot/utils/circuit_breaker.py ADDED
File without changes
src/ctp_slack_bot/utils/retry.py ADDED
File without changes
tests/__init__.py ADDED
File without changes
tests/conftest.py ADDED
File without changes