diff --git a/.env.template b/.env.template index e6fee1b0fabb682858efe57c1a6e46991b4b2097..e0439461902d5d9dfa687ffd92af5bb067062d16 100644 --- a/.env.template +++ b/.env.template @@ -1,41 +1,41 @@ # Copy this file and modify. Do not save or commit the secrets! -# Application Configuration -DEBUG=false - -# Logging Configuration -LOG_LEVEL=INFO -LOG_FORMAT=text - # APScheduler Configuration SCHEDULER_TIMEZONE=UTC -# API Configuration -API_HOST=0.0.0.0 -API_PORT=8000 - # Slack Configuration SLACK_BOT_TOKEN=šŸŖ™ -SLACK_SIGNING_SECRET=šŸ” SLACK_APP_TOKEN=🦄 # Vectorization Configuration EMBEDDING_MODEL=🌮 -VECTOR_DIMENSION=9001 -CHUNK_SIZE=42 -CHUNK_OVERLAP=37 -TOP_K_MATCHES=1 +VECTOR_DIMENSION=1536 +CHUNK_SIZE=1000 +CHUNK_OVERLAP=200 +TOP_K_MATCHES=5 # MongoDB Configuration MONGODB_URI=mongodb+srv://username:password@cluster.mongodb.net/database?retryWrites=true&w=majority MONGODB_NAME=ctp_slack_bot +SCORE_THRESHOLD=0.5 # Hugging Face Configuration HF_API_TOKEN=šŸ¤— # OpenAI Configuration OPENAI_API_KEY=😐 -CHAT_MODEL=šŸ™Š -MAX_TOKENS=42 -TEMPERATURE=0.5 +CHAT_MODEL=gpt-3.5-turbo +MAX_TOKENS=150 +TEMPERATURE=0.8 SYSTEM_PROMPT="You are a helpful teaching assistant for a data science class.\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\nYour responses should be:\n\n1. Accurate and based on the class content\n2. Clear and educational\n3. Concise but complete\nIf you're unsure about something, acknowledge it and suggest asking the professor." + +# Google Drive Configuration +GOOGLE_DRIVE_ROOT_ID=1NB91EcIUXbOVcdCkXOAHdmWrDfgoh9fQ +GOOGLE_PROJECT_ID=insufferable-slacker-123456 +GOOGLE_PRIVATE_KEY_ID=1a2b3c4d5e6f748891091d21304e506674829507 +GOOGLE_PRIVATE_KEY="-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASC...\n-----END PRIVATE KEY-----\n" +GOOGLE_CLIENT_EMAIL=botty-bot@insufferable-slacker-123456.iam.gserviceaccount.com +GOOGLE_CLIENT_ID=123456789012345678901 + +# File Monitoring Configuration +FILE_MONITOR_ROOT_PATH=Transcripts/Friday diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..7177bcd0108116be441fda13f9e87df0a077ddfe --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,19 @@ +name: Sync to Hugging Face hub +on: + push: + branches: [main] + # to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + sync-to-hub: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + lfs: true + - name: Push to hub + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: git push https://KingZack:$HF_TOKEN@huggingface.co/spaces/KingZack/ctp-slack-bot main diff --git a/Dockerfile b/Dockerfile index 6804720399be1e78d2077239b0fac21bd9ea4695..a849deafa1d8dab4637f5c00f144ebefe60f837d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ WORKDIR /app # Set environment variables. ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ - PYTHONPATH=/app + PYTHONPATH=/app/src # Install system dependencies. RUN apt-get update \ @@ -25,5 +25,8 @@ RUN pip install --no-cache-dir . RUN useradd -m appuser USER appuser +# Expose a volume mount for logs ― Hugging Face Spaces requires specifically /data. +VOLUME /data + # Run the application. -CMD ["uvicorn", "src.ctp_slack_bot.api.main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["python", "-m", "ctp_slack_bot.app"] diff --git a/README.MD b/README.md similarity index 56% rename from README.MD rename to README.md index 19f1b195c1da49165e266f647dfb1f33532ea7af..f1b0972230ddf20c6c9c9bc12e46fd90e2f0835f 100644 --- a/README.MD +++ b/README.md @@ -1,42 +1,22 @@ +--- +title: CTP Slack Bot +emoji: 🦄 +colorFrom: red +colorTo: green +sdk: docker +pinned: false +license: mit +short_description: Spring 2025 CTP Slack Bot RAG system +--- + + # CTP Slack Bot ## _Modus Operandi_ in a Nutshell -* Intelligently responds to Slack messages based on a repository of data. +* Intelligently responds to Slack messages (when mentioned) based on a repository of data. * Periodically checks for new content to add to its repository. -## Tech Stack - -* Hugging Face Spaces for hosting and serverless API -* Google Drive for reference data (i.e., the material to be incorporated into the bot’s knowledge base) -* MongoDB for data persistence -* Docker for containerization -* Python - * FastAPI for serving HTTP requests - * httpx for making HTTP requests - * APScheduler for running periodic tasks in the background - * See `pyproject.toml` for additional Python packages. - -## General Project Structure - -* `src/` - * `ctp_slack_bot/` - * `api/`: FastAPI application structure - * `routes.py`: API endpoint definitions - * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions - * `db/`: database connection - * `repositories/`: repository pattern implementation - * `models/`: Pydantic models for data validation and serialization - * `services/`: business logic - * `tasks/`: background scheduled jobs - * `utils/`: reusable utilities -* `tests/`: unit tests -* `scripts/`: utility scripts for development, deployment, etc. - * `run-dev.sh`: script to run the application locally -* `notebooks/`: Jupyter notebooks for exploration and model development -* `.env`: local environment variables for development purposes (to be created for local use only from `.env.template`) -* `Dockerfile`: Docker container build definition - ## How to Run the Application ### Normally @@ -52,7 +32,7 @@ docker build . -t ctp-slack-bot Run it with: ```sh -docker run --env-file=.env -p 8000:8000 --name my-ctp-slack-bot-instance ctp-slack-bot +docker run --volume ./logs:/app/logs/ --env-file=.env -p 8000:8000 --name my-ctp-slack-bot-instance ctp-slack-bot ``` ### For Development @@ -73,13 +53,45 @@ If `localhost` port `8000` is free, running the following will make the applicat scripts/run-dev.sh ``` -You can check that it’s reachable by visiting [http://localhost:8000/health](http://localhost:8000/health). +## Tech Stack -```text -$ curl http://localhost:8000/health -{"status":"healthy"} -``` +* Hugging Face Spaces for hosting +* OpenAI for embeddings and language models +* Google Drive for reference data (i.e., the material to be incorporated into the bot’s knowledge base) +* MongoDB for data persistence +* Docker for containerization +* Python + * Slack Bolt client for interfacing with Slack + * See `pyproject.toml` for additional Python packages. -In debug mode (`DEBUG=true`), [http://localhost:8000/api/v1/env](http://localhost:8000/api/v1/env) will pretty-print the non-sensitive environment variables as JSON. +## General Project Structure -Uvicorn will restart the application automatically when any source files are changed. +* `src/` + * `ctp_slack_bot/` + * `core/`: fundamental components like configuration (using pydantic), logging setup (loguru), and custom exceptions + * `db/`: database connection + * `repositories/`: repository pattern implementation + * `models/`: Pydantic models for data validation and serialization + * `services/`: business logic + * `answer_retrieval_service.py`: obtains an answer to a question from a language model using relevant context + * `content_ingestion_service.py`: converts content into chunks and stores them into the database + * `context_retrieval_service.py`: queries for relevant context from the database to answer a question + * `embeddings_model_service.py`: converts text to embeddings + * `event_brokerage_service.py`: brokers events between decoupled components + * `language_model_service.py`: answers questions using relevant context + * `question_dispatch_service.py`: listens for questions and retrieves relevant context to get answers + * `schedule_service.py`: runs background jobs + * `slack_service.py`: handles events from Slack and sends back responses + * `vector_database_service.py`: stores and queries chunks + * `vectorization_service.py`: converts chunks into chunks with embeddings + * `tasks/`: background scheduled jobs + * `utils/`: reusable utilities + * `app.py`: application entry point + * `containers.py`: the dependency injection container +* `tests/`: unit tests +* `scripts/`: utility scripts for development, deployment, etc. + * `run-dev.sh`: script to run the application locally +* `notebooks/`: Jupyter notebooks for exploration and model development +* `.env`: local environment variables for development purposes (to be created for local use only from `.env.template`) +* `Dockerfile`: Docker container build definition +* `pyproject.toml`: project definition and dependencies diff --git a/notebooks/container.ipynb b/notebooks/container.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a68fe1921a6916f67f8c2ba0f02d25d6954602df --- /dev/null +++ b/notebooks/container.ipynb @@ -0,0 +1,102 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Loading Dependency Injection Container in Jupyter Notebook" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from ctp_slack_bot.containers import Container\n", + "from ctp_slack_bot.services import VectorDatabaseService\n", + "\n", + "container = Container()\n", + "container.wire(packages=['ctp_slack_bot'])" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 16:43:46.927\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "Settings(LOG_LEVEL='INFO', LOG_FORMAT='json', SCHEDULER_TIMEZONE='America/New_York', SLACK_BOT_TOKEN=SecretStr('**********'), SLACK_APP_TOKEN=SecretStr('**********'), EMBEDDING_MODEL='text-embedding-3-small', VECTOR_DIMENSION=1536, CHUNK_SIZE=1000, CHUNK_OVERLAP=200, TOP_K_MATCHES=5, MONGODB_URI=SecretStr('**********'), MONGODB_NAME='ctp_slack_bot', SCORE_THRESHOLD=0.5, HF_API_TOKEN=SecretStr('**********'), OPENAI_API_KEY=SecretStr('**********'), CHAT_MODEL='gpt-3.5-turbo', MAX_TOKENS=150, TEMPERATURE=0.8, SYSTEM_PROMPT=\"You are a helpful teaching assistant for a data science class.\\nBased on the students question, you will be given context retreived from class transcripts and materials to answer their question.\\nYour responses should be:\\n\\n1. Accurate and based on the class content\\n2. Clear and educational\\n3. Concise but complete\\nIf you're unsure about something, acknowledge it and suggest asking the professor.\", GOOGLE_PROJECT_ID='voltaic-reducer-294821', GOOGLE_PRIVATE_KEY_ID=SecretStr('**********'), GOOGLE_PRIVATE_KEY=SecretStr('**********'), GOOGLE_CLIENT_ID='102943207835073856980', GOOGLE_CLIENT_EMAIL='ctp-slack-bot-714@voltaic-reducer-294821.iam.gserviceaccount.com', GOOGLE_AUTH_URI='https://accounts.google.com/o/oauth2/auth', GOOGLE_TOKEN_URI='https://oauth2.googleapis.com/token', GOOGLE_AUTH_PROVIDER_CERT_URL='https://www.googleapis.com/oauth2/v1/certs', GOOGLE_CLIENT_CERT_URL='https://www.googleapis.com/robot/v1/metadata/x509/ctp-slack-bot-714%40voltaic-reducer-294821.iam.gserviceaccount.com', GOOGLE_UNIVERSE_DOMAIN='googleapis.com', FILE_MONITOR_ROOT_PATH='Transcripts/Friday Building AI Applications Session')" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "container.settings()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 16:45:25.997\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 16:45:25.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36minit\u001b[0m:\u001b[36m175\u001b[0m - \u001b[1mInitializing MongoDB connection for database: ctp_slack_bot\u001b[0m\n", + "\u001b[32m2025-04-19 16:45:25.999\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m26\u001b[0m - \u001b[34m\u001b[1mCreated MongoDB\u001b[0m\n", + "\u001b[32m2025-04-19 16:45:25.999\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m32\u001b[0m - \u001b[34m\u001b[1mConnecting to MongoDB using URI: mongodb+srv://ctp-slack-bot.xkipuvm.mongodb.net/?retryWrites=true&w=majority&appName=ctp-slack-bot\u001b[0m\n", + "\u001b[32m2025-04-19 16:45:26.000\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mMongoDB client initialized for database: ctp_slack_bot\u001b[0m\n", + "\u001b[32m2025-04-19 16:45:26.279\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 16:45:26.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m_test_connection\u001b[0m:\u001b[36m186\u001b[0m - \u001b[1mMongoDB connection test successful!\u001b[0m\n", + "\u001b[32m2025-04-19 16:45:26.280\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m21\u001b[0m - \u001b[34m\u001b[1mCreated VectorDatabaseService\u001b[0m\n" + ] + } + ], + "source": [ + "vector_database_service: VectorDatabaseService = container.vector_database_service()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/google_drive.ipynb b/notebooks/google_drive.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e838e98e241056ab923b5b1a8ed6ea6dc2c2f85a --- /dev/null +++ b/notebooks/google_drive.ipynb @@ -0,0 +1,6095 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# `GoogleDriveService` Testing Notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 18:17:19.845\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n" + ] + } + ], + "source": [ + "from functools import partial\n", + "from html import escape\n", + "from IPython.display import display_html\n", + "from itertools import chain\n", + "from textwrap import wrap\n", + "\n", + "from ctp_slack_bot.containers import Container\n", + "\n", + "display_html = partial(display_html, raw=True)\n", + "\n", + "container = Container()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get a `GoogleDriveService` singleton instance from the dependency injection container." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 18:17:19.850\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n", + "\u001b[32m2025-04-19 18:17:19.853\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.google_drive_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreated GoogleDriveService\u001b[0m\n" + ] + } + ], + "source": [ + "google_drive_service = container.google_drive_service()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "item_metadata = google_drive_service.list_directory(\"\")\n", + "\n", + "display_html(\"\".join(chain(\"\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Week-03-Analytics-Friday-2024-09-13.cc.vtt

MIME type: text/vtt

Last modified: 2025-02-07 02:33:49+00:00

" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
WEBVTT\n",
+       "\n",
+       "\n",
+       "00:00:03.615 --> 00:00:09.615\n",
+       "Did the closed captions just turn on for everybody or just me, because I did turn them on. You can turn them off if you want.\n",
+       "\n",
+       "00:00:14.017 --> 00:00:15.017\n",
+       "Now I don't know.\n",
+       "\n",
+       "00:00:15.017 --> 00:00:16.017\n",
+       "This! Hold on.\n",
+       "\n",
+       "00:00:16.460 --> 00:00:17.460\n",
+       "I can't like.\n",
+       "\n",
+       "00:00:19.218 --> 00:00:20.218\n",
+       "Surprising to me.\n",
+       "\n",
+       "00:00:20.766 --> 00:00:22.766\n",
+       "Sometimes zoom still kind of sucks.\n",
+       "\n",
+       "00:00:23.915 --> 00:00:24.915\n",
+       "Alright, share, screen.\n",
+       "\n",
+       "00:00:25.216 --> 00:00:29.216\n",
+       "So yeah, going back to these Andrews is really good and sweet.\n",
+       "\n",
+       "00:00:29.264 --> 00:00:30.264\n",
+       "And like concise.\n",
+       "\n",
+       "00:00:30.863 --> 00:00:32.863\n",
+       "Hashtags, tagging people.\n",
+       "\n",
+       "00:00:33.015 --> 00:00:35.015\n",
+       "Like it popped up on my phone. I was like.\n",
+       "\n",
+       "00:00:34.665 --> 00:00:36.665\n",
+       "This person posted about you.\n",
+       "\n",
+       "00:00:37.315 --> 00:00:39.315\n",
+       "I like a lot of them.\n",
+       "\n",
+       "00:00:40.360 --> 00:00:41.360\n",
+       "Tony's.\n",
+       "\n",
+       "00:00:41.415 --> 00:00:42.415\n",
+       "You can't.\n",
+       "\n",
+       "00:00:42.560 --> 00:00:44.560\n",
+       "Repost a post that doesn't count.\n",
+       "\n",
+       "00:00:45.715 --> 00:00:46.715\n",
+       "I want you slide this time because it.\n",
+       "\n",
+       "00:00:46.416 --> 00:00:50.416\n",
+       "Hopefully should have been clear. But you cannot just like repost the post which.\n",
+       "\n",
+       "00:00:50.515 --> 00:00:52.515\n",
+       "90% sure. This is.\n",
+       "\n",
+       "00:00:53.215 --> 00:00:55.215\n",
+       "So that doesn't count.\n",
+       "\n",
+       "00:00:56.061 --> 00:01:01.061\n",
+       "But the person I like the post I like the most playing favorites is Aiana.\n",
+       "\n",
+       "00:01:02.215 --> 00:01:04.215\n",
+       "Why did I like it so much.\n",
+       "\n",
+       "00:01:05.016 --> 00:01:08.016\n",
+       "Part 2. It was like just real\n",
+       "\n",
+       "00:01:11.416 --> 00:01:15.416\n",
+       "You talked about like your search? You did, and I was like, I relate to this.\n",
+       "\n",
+       "00:01:16.563 --> 00:01:17.563\n",
+       "And then you were like.\n",
+       "\n",
+       "00:01:18.660 --> 00:01:20.660\n",
+       "It was just like real\n",
+       "\n",
+       "00:01:21.463 --> 00:01:25.463\n",
+       "You like looked in the data. And I was like, Oh, like they really looked at this. This is like interesting.\n",
+       "\n",
+       "00:01:26.162 --> 00:01:29.162\n",
+       "I would. The only thing I would have done was like.\n",
+       "\n",
+       "00:01:30.515 --> 00:01:35.515\n",
+       "Tag Kaggle, maybe tag some people. The people that made the data set that would have been great.\n",
+       "\n",
+       "00:01:36.264 --> 00:01:38.264\n",
+       "Who made the data set.\n",
+       "\n",
+       "00:01:39.363 --> 00:01:41.363\n",
+       "Public domain.\n",
+       "\n",
+       "00:01:45.215 --> 00:01:47.215\n",
+       "I guess they don't have. Who made the data set.\n",
+       "\n",
+       "00:01:48.415 --> 00:01:49.415\n",
+       "Call metadata.\n",
+       "\n",
+       "00:01:48.861 --> 00:01:50.861\n",
+       "Expand, all.\n",
+       "\n",
+       "00:01:57.415 --> 00:01:59.415\n",
+       "Okay. Well, I guess I don't have that.\n",
+       "\n",
+       "00:02:00.561 --> 00:02:02.561\n",
+       "But like I I just liked it. It was really.\n",
+       "\n",
+       "00:02:02.062 --> 00:02:04.062\n",
+       "It was really well.\n",
+       "\n",
+       "00:02:04.261 --> 00:02:07.261\n",
+       "Done in like meaningful and like it looked really.\n",
+       "\n",
+       "00:02:07.461 --> 00:02:10.461\n",
+       "Somebody did. They were like.\n",
+       "\n",
+       "00:02:11.316 --> 00:02:15.316\n",
+       "They used. They had their thoughts, and then they used to do it, and I like.\n",
+       "\n",
+       "00:02:14.915 --> 00:02:16.915\n",
+       "Was reading it, and I was like.\n",
+       "\n",
+       "00:02:17.462 --> 00:02:25.462\n",
+       "This is very chat. Gbt to me. And I talk to them. And they're like, Well, yeah, like, I didn't use it just to do it like. I gave it my thoughts, and it gave me this thing, and I was like.\n",
+       "\n",
+       "00:02:25.216 --> 00:02:30.216\n",
+       "Yeah, I noticed it. You guys should not do that like, yes.\n",
+       "\n",
+       "00:02:29.863 --> 00:02:33.863\n",
+       "It is. Gonna make it quote unquote, like.\n",
+       "\n",
+       "00:02:34.115 --> 00:02:37.115\n",
+       "Better, or like more informational, but like.\n",
+       "\n",
+       "00:02:37.163 --> 00:02:38.163\n",
+       "That's not better.\n",
+       "\n",
+       "00:02:38.363 --> 00:02:42.363\n",
+       "Right like it sat. It looked and authentic to me, and I was like.\n",
+       "\n",
+       "00:02:43.316 --> 00:02:48.316\n",
+       "You don't get hired on that like that person's just gonna be like, oh, they're using chat. Gbt, for this like.\n",
+       "\n",
+       "00:02:48.263 --> 00:02:49.263\n",
+       "It doesn't.\n",
+       "\n",
+       "00:02:49.467 --> 00:02:52.467\n",
+       "Give them any any incentive to hire you.\n",
+       "\n",
+       "00:02:51.565 --> 00:02:53.565\n",
+       "So.\n",
+       "\n",
+       "00:02:54.166 --> 00:02:55.166\n",
+       "Cool\n",
+       "\n",
+       "00:02:58.716 --> 00:03:04.716\n",
+       "Who here would had a problem, or would like to go over like wants to say something about the homework.\n",
+       "\n",
+       "00:03:05.617 --> 00:03:11.617\n",
+       "If not, I'm gonna randomly pick somebody so somebody can speak up 1st or.\n",
+       "\n",
+       "00:03:13.217 --> 00:03:14.217\n",
+       "5.\n",
+       "\n",
+       "00:03:15.362 --> 00:03:16.362\n",
+       "4, 3.\n",
+       "\n",
+       "00:03:17.215 --> 00:03:22.215\n",
+       "2 1. Alright! We are going to start with the people.\n",
+       "\n",
+       "00:03:22.463 --> 00:03:25.463\n",
+       "Don't have their cameras on that. Didn't.\n",
+       "\n",
+       "00:03:24.517 --> 00:03:26.517\n",
+       "Request.\n",
+       "\n",
+       "00:03:27.063 --> 00:03:28.063\n",
+       "To\n",
+       "\n",
+       "00:03:28.261 --> 00:03:31.261\n",
+       "Have their camera off. So Robert.\n",
+       "\n",
+       "00:03:34.716 --> 00:03:38.716\n",
+       "What did you have? What problem did you have with the homework? What did you think.\n",
+       "\n",
+       "00:03:39.016 --> 00:03:47.016\n",
+       "I found a homework to be rather tedious in a way, when it comes to renaming all of the column headers.\n",
+       "\n",
+       "00:03:47.066 --> 00:03:48.066\n",
+       "And.\n",
+       "\n",
+       "00:03:47.859 --> 00:03:49.859\n",
+       "Yeah, that was annoying.\n",
+       "\n",
+       "00:03:49.361 --> 00:03:52.361\n",
+       "Yeah, and definitely, there are different approaches to.\n",
+       "\n",
+       "00:03:53.664 --> 00:03:54.664\n",
+       "Solving that, and.\n",
+       "\n",
+       "00:03:54.015 --> 00:03:57.015\n",
+       "Mine was probably way too.\n",
+       "\n",
+       "00:03:56.765 --> 00:04:01.765\n",
+       "Sophisticated and time consuming compared to other ways to solve that.\n",
+       "\n",
+       "00:04:00.816 --> 00:04:04.816\n",
+       "How did you do it? What was sophisticated about what you did.\n",
+       "\n",
+       "00:04:04.117 --> 00:04:09.117\n",
+       "Essentially making a dictionary, instead of making it as a list.\n",
+       "\n",
+       "00:04:10.762 --> 00:04:13.762\n",
+       "This. This is the proper way to do it. This is how we expected you to do it.\n",
+       "\n",
+       "00:04:15.363 --> 00:04:19.363\n",
+       "You could have done it the other way. But the only problem, if you just do it as a list.\n",
+       "\n",
+       "00:04:19.961 --> 00:04:21.961\n",
+       "If your list is like.\n",
+       "\n",
+       "00:04:21.516 --> 00:04:23.516\n",
+       "Any way out of order.\n",
+       "\n",
+       "00:04:23.660 --> 00:04:31.660\n",
+       "It'll just reorder the columns in the wrong way. This you ensure that this column, even if you put the dictionary well, dictionaries are unordered.\n",
+       "\n",
+       "00:04:31.916 --> 00:04:35.916\n",
+       "But, like it'll make sure that this column is renamed. This.\n",
+       "\n",
+       "00:04:35.959 --> 00:04:38.959\n",
+       "And if you do it as a list, if this one.\n",
+       "\n",
+       "00:04:38.867 --> 00:04:44.867\n",
+       "Is swapped, the other one that those will get messed up and everything below it could get messed up, or after it.\n",
+       "\n",
+       "00:04:46.164 --> 00:04:47.164\n",
+       "So.\n",
+       "\n",
+       "00:04:46.562 --> 00:04:58.562\n",
+       "Right. And the other thing I wanted to mention with that section was, I had to include a variable in that dictionary other, since one of the column names was having double quotations.\n",
+       "\n",
+       "00:04:58.459 --> 00:05:00.459\n",
+       "And it was.\n",
+       "\n",
+       "00:05:00.461 --> 00:05:01.461\n",
+       "It was split into 2.\n",
+       "\n",
+       "00:05:01.118 --> 00:05:02.118\n",
+       "Yeah.\n",
+       "\n",
+       "00:05:02.059 --> 00:05:05.059\n",
+       "And because of that, it was.\n",
+       "\n",
+       "00:05:05.816 --> 00:05:10.816\n",
+       "Just requiring a lot of research how to work with double quotations in double quotations.\n",
+       "\n",
+       "00:05:10.615 --> 00:05:14.615\n",
+       "Yeah. There is the quote, character,\n",
+       "\n",
+       "00:05:15.361 --> 00:05:21.361\n",
+       "Parameter that we talked about last class. And and you can add that into your when you read the Csv. Andrew.\n",
+       "\n",
+       "00:05:21.064 --> 00:05:29.064\n",
+       "Yeah, I ran into the same exact issue like with that with that column header, like, if other please, indicate currency here.\n",
+       "\n",
+       "00:05:32.115 --> 00:05:35.115\n",
+       "Okay, so, if other currency.\n",
+       "\n",
+       "00:05:35.659 --> 00:05:39.659\n",
+       "I see they're mostly NGN, that's interesting.\n",
+       "\n",
+       "00:05:42.462 --> 00:05:43.462\n",
+       "If other please.\n",
+       "\n",
+       "00:05:43.059 --> 00:05:46.059\n",
+       "So I guess there was quotes in this column.\n",
+       "\n",
+       "00:05:46.115 --> 00:05:50.115\n",
+       "Yeah, so like, other has like double quotations.\n",
+       "\n",
+       "00:05:49.717 --> 00:05:50.717\n",
+       "Yeah.\n",
+       "\n",
+       "00:05:50.968 --> 00:05:51.968\n",
+       "Around.\n",
+       "\n",
+       "00:05:51.562 --> 00:05:53.562\n",
+       "You add quote, car equals.\n",
+       "\n",
+       "00:05:53.562 --> 00:05:54.562\n",
+       "Double quotes.\n",
+       "\n",
+       "00:05:53.659 --> 00:05:55.659\n",
+       "And it should fix that.\n",
+       "\n",
+       "00:05:59.465 --> 00:06:00.465\n",
+       "Okay.\n",
+       "\n",
+       "00:06:02.015 --> 00:06:05.015\n",
+       "Did you guys request? Oh, Robert.\n",
+       "\n",
+       "00:06:05.216 --> 00:06:12.216\n",
+       "Okay, sorry, Robert. I did call you out, and you did request it. I just didn't. I don't really read the chat. So that's my bad.\n",
+       "\n",
+       "00:06:14.165 --> 00:06:19.165\n",
+       "However. Oh, Akbar! Oh, my God! Yes, okay. Where is Akbar?\n",
+       "\n",
+       "00:06:21.060 --> 00:06:25.060\n",
+       "Act bar every so act bar is an alum.\n",
+       "\n",
+       "00:06:25.216 --> 00:06:28.216\n",
+       "And I was like we were just like chatting the other day.\n",
+       "\n",
+       "00:06:28.160 --> 00:06:34.160\n",
+       "And I was like, I want you to come in and spit some hot fire, and he agreed to take some time out of his day, just to kind of.\n",
+       "\n",
+       "00:06:36.016 --> 00:06:41.016\n",
+       "Tell? Yeah, I did, just to fire you guys up. He's a very successful\n",
+       "\n",
+       "00:06:41.159 --> 00:06:43.159\n",
+       "He was in our 3rd cohort. He worked at Google.\n",
+       "\n",
+       "00:06:42.716 --> 00:06:44.716\n",
+       "And then even.\n",
+       "\n",
+       "00:06:44.559 --> 00:06:48.559\n",
+       "Then, the other day I was like meeting with the head.\n",
+       "\n",
+       "00:06:49.160 --> 00:06:51.160\n",
+       "Of like the people that fund us.\n",
+       "\n",
+       "00:06:50.960 --> 00:06:55.960\n",
+       "And we're just like talking because they're a cool person. He's like, Oh, I have to end our meeting like.\n",
+       "\n",
+       "00:06:56.216 --> 00:06:59.216\n",
+       "Not early, but like on time, because I'm gonna go speak with Akbar.\n",
+       "\n",
+       "00:06:58.862 --> 00:07:00.862\n",
+       "Akbar\n",
+       "\n",
+       "00:07:01.566 --> 00:07:03.566\n",
+       "Well. He pronounced her name Mirza.\n",
+       "\n",
+       "00:07:04.415 --> 00:07:06.415\n",
+       "And I was like, you know, he was like.\n",
+       "\n",
+       "00:07:05.816 --> 00:07:07.816\n",
+       "You're talking about Brendan.\n",
+       "\n",
+       "00:07:06.516 --> 00:07:07.516\n",
+       "I was just like.\n",
+       "\n",
+       "00:07:08.815 --> 00:07:10.815\n",
+       "Yeah. Brendan, this is like a while.\n",
+       "\n",
+       "00:07:09.361 --> 00:07:12.361\n",
+       "Yeah, this. This was a while ago, though. But.\n",
+       "\n",
+       "00:07:10.761 --> 00:07:14.761\n",
+       "This is a while ago. But like, yeah, I don't talk with him that often, but I was just like.\n",
+       "\n",
+       "00:07:15.215 --> 00:07:17.215\n",
+       "Why, and he's like hang out.\n",
+       "\n",
+       "00:07:15.766 --> 00:07:20.766\n",
+       "I I think it was serendipitous that time, like we just both.\n",
+       "\n",
+       "00:07:17.816 --> 00:07:22.816\n",
+       "It will clearly, but what I wanted to emphasize just everywhere.\n",
+       "\n",
+       "00:07:23.462 --> 00:07:26.462\n",
+       "And that's how you gotta be a hustler like he's here right now.\n",
+       "\n",
+       "00:07:26.664 --> 00:07:29.664\n",
+       "He doesn't have to be like this isn't really doing.\n",
+       "\n",
+       "00:07:29.215 --> 00:07:35.215\n",
+       "Anything for him like right now, but, like it kind of is, it keeps them relevant and like.\n",
+       "\n",
+       "00:07:34.805 --> 00:07:37.805\n",
+       "Mentality. You like to go, Akbar.\n",
+       "\n",
+       "00:07:39.462 --> 00:07:43.462\n",
+       "The floor is yours. You got like, however long you want under 15Ā min, or maybe like.\n",
+       "\n",
+       "00:07:43.164 --> 00:07:51.164\n",
+       "I mean, I'm I'm honestly not really sure what to say. Like, you've kind of like brought this up way way too much. And I was literally just gonna say, Hi.\n",
+       "\n",
+       "00:07:51.115 --> 00:07:56.115\n",
+       "But I mean, I think the thing that Zach is trying to get at is this is a program that's.\n",
+       "\n",
+       "00:07:56.416 --> 00:08:05.416\n",
+       "It's not something that comes across often, and it's like really rare, and shows a lot of effort and initiative on your end that you're continuing to do this right cause. It's.\n",
+       "\n",
+       "00:08:05.467 --> 00:08:09.467\n",
+       "You're doing this outside of your your regular responsibilities. Right? You're you're all going to class.\n",
+       "\n",
+       "00:08:08.715 --> 00:08:11.715\n",
+       "You're probably doing that full time.\n",
+       "\n",
+       "00:08:11.716 --> 00:08:15.716\n",
+       "And may or may or may not be working another job like.\n",
+       "\n",
+       "00:08:15.815 --> 00:08:19.815\n",
+       "When I was in college I was taking a full course doing Cdp.\n",
+       "\n",
+       "00:08:20.415 --> 00:08:25.415\n",
+       "And then in the middle of either working, and then also doing like any additional like.\n",
+       "\n",
+       "00:08:26.315 --> 00:08:28.315\n",
+       "Programs like interview prep programs, or like.\n",
+       "\n",
+       "00:08:28.816 --> 00:08:32.816\n",
+       "Taing. I was a ta for code path. If any of you are familiar with that.\n",
+       "\n",
+       "00:08:33.015 --> 00:08:35.015\n",
+       "Teaching ios\n",
+       "\n",
+       "00:08:34.716 --> 00:08:35.716\n",
+       "And.\n",
+       "\n",
+       "00:08:35.915 --> 00:08:37.915\n",
+       "That kind of.\n",
+       "\n",
+       "00:08:39.215 --> 00:08:41.215\n",
+       "That kind of commitment to be able to.\n",
+       "\n",
+       "00:08:41.560 --> 00:08:46.560\n",
+       "Think about the things that you really like and the the things that you really want to build.\n",
+       "\n",
+       "00:08:47.060 --> 00:08:53.060\n",
+       "Isn't something that comes around that often, but it's also something that's kind of like uniquely to where you are in your life right now.\n",
+       "\n",
+       "00:08:53.215 --> 00:08:55.215\n",
+       "And so like it's.\n",
+       "\n",
+       "00:08:55.615 --> 00:08:58.615\n",
+       "Something where you can really set yourself up for success.\n",
+       "\n",
+       "00:08:59.360 --> 00:09:00.360\n",
+       "But.\n",
+       "\n",
+       "00:09:01.062 --> 00:09:02.062\n",
+       "Daniel.\n",
+       "\n",
+       "00:09:02.961 --> 00:09:04.961\n",
+       "Something. You can really set yourself.\n",
+       "\n",
+       "00:09:05.060 --> 00:09:16.060\n",
+       "For success by kind of taking advantage of the fact of all the resources that you have around you, and the fact that your students, so you can give the excuse that like, oh, I'm still learning, so I can kind of screw up. I can. You know I can mess up.\n",
+       "\n",
+       "00:09:16.215 --> 00:09:18.215\n",
+       "As long as I've taken I'm taking something from it.\n",
+       "\n",
+       "00:09:18.558 --> 00:09:20.558\n",
+       "And like, really, just.\n",
+       "\n",
+       "00:09:21.166 --> 00:09:25.166\n",
+       "Using that excuse as long as you can. Right? Because the the more experimentation and the more.\n",
+       "\n",
+       "00:09:25.615 --> 00:09:29.615\n",
+       "Learning that you do. The more failing that you do the better off you'll be in the long term.\n",
+       "\n",
+       "00:09:29.315 --> 00:09:31.315\n",
+       "And I know that it's like.\n",
+       "\n",
+       "00:09:31.867 --> 00:09:35.867\n",
+       "You might have like a fear of failing, and I certainly still do.\n",
+       "\n",
+       "00:09:36.115 --> 00:09:42.115\n",
+       "But it's also the thing that kind of like every time that I have filled has led me to.\n",
+       "\n",
+       "00:09:41.861 --> 00:09:47.861\n",
+       "Figure out what I could do better, or what I was doing, what I was doing wrong entirely.\n",
+       "\n",
+       "00:09:48.066 --> 00:09:50.066\n",
+       "My heart was just wasn't in the right place.\n",
+       "\n",
+       "00:09:50.715 --> 00:09:54.715\n",
+       "And so it kind of redirected me to the things that I enjoyed more.\n",
+       "\n",
+       "00:09:54.715 --> 00:09:58.715\n",
+       "Part of the reason that Zack and I were talking was because, like one of the things that.\n",
+       "\n",
+       "00:09:58.160 --> 00:10:02.160\n",
+       "I didn't get to have that. You guys are really lucky to have.\n",
+       "\n",
+       "00:10:02.415 --> 00:10:04.415\n",
+       "Is this data science track.\n",
+       "\n",
+       "00:10:04.115 --> 00:10:06.115\n",
+       "And this AI track.\n",
+       "\n",
+       "00:10:06.763 --> 00:10:10.763\n",
+       "Especially with the way that the tech industry is changing so far.\n",
+       "\n",
+       "00:10:10.515 --> 00:10:20.515\n",
+       "And the way so that, like paradigms for software engineering are changing like, it'll be really important to be like, you don't have to become an AI engineer, but like it's important to understand.\n",
+       "\n",
+       "00:10:20.315 --> 00:10:23.315\n",
+       "How it works, and what it does to.\n",
+       "\n",
+       "00:10:24.515 --> 00:10:28.515\n",
+       "How people are thinking about it, because this is one of the the areas of interest where.\n",
+       "\n",
+       "00:10:29.715 --> 00:10:30.715\n",
+       "If you're.\n",
+       "\n",
+       "00:10:29.860 --> 00:10:35.860\n",
+       "Because it's because it's so new. You have a chance to become an expert, and you have a chance to become like somebody who's.\n",
+       "\n",
+       "00:10:36.860 --> 00:10:45.860\n",
+       "Proficient and thinking about this. But it's also something that's gonna carry you forward in your career in terms of like, okay, by having the skill set, you're able to be able to determine.\n",
+       "\n",
+       "00:10:45.166 --> 00:10:50.166\n",
+       "What you want to do, instead of trying to fit into a model of what everybody else wants you to be.\n",
+       "\n",
+       "00:10:50.215 --> 00:10:51.215\n",
+       "Right like.\n",
+       "\n",
+       "00:10:51.016 --> 00:10:58.016\n",
+       "The the thing that makes it really easy for you to pursue your own path and pursue your own careers by being.\n",
+       "\n",
+       "00:10:58.516 --> 00:11:00.516\n",
+       "Able to have a skill set, and the experiences.\n",
+       "\n",
+       "00:11:00.816 --> 00:11:03.816\n",
+       "To do things that are unique, that aren't like traditional.\n",
+       "\n",
+       "00:11:04.467 --> 00:11:05.467\n",
+       "And like, let that push you forward.\n",
+       "\n",
+       "00:11:05.167 --> 00:11:13.167\n",
+       "And that's not a knock on traditional career path, either. Like, if that's something you want to do like all this will only help you further and help you stand apart.\n",
+       "\n",
+       "00:11:13.216 --> 00:11:15.216\n",
+       "And push forward in your career.\n",
+       "\n",
+       "00:11:15.417 --> 00:11:16.417\n",
+       "So like.\n",
+       "\n",
+       "00:11:16.261 --> 00:11:22.261\n",
+       "It's really important to, especially like right now in your life, like it's it's important to like, explore as much as possible.\n",
+       "\n",
+       "00:11:22.715 --> 00:11:26.715\n",
+       "Because these opportunities become a lot rarer and a lot more.\n",
+       "\n",
+       "00:11:25.916 --> 00:11:28.916\n",
+       "It will require a lot more effort from you.\n",
+       "\n",
+       "00:11:29.017 --> 00:11:41.017\n",
+       "And the future. Not because you're getting any lazier, not because you're you don't have the same energy as before, but you're just gonna have so many other things that you want to kind of take care of like your health. Maybe family.\n",
+       "\n",
+       "00:11:41.316 --> 00:11:47.316\n",
+       "Relationships, etc. And so like, you're at a really crucial time right now to take advantage of it as as best as you can.\n",
+       "\n",
+       "00:11:47.616 --> 00:11:50.616\n",
+       "And you have, like a team of people that are here.\n",
+       "\n",
+       "00:11:51.115 --> 00:11:54.115\n",
+       "To support you right like you have the slack channel.\n",
+       "\n",
+       "00:11:54.367 --> 00:11:57.367\n",
+       "That you can always kind of post to ask for advice.\n",
+       "\n",
+       "00:11:57.462 --> 00:12:00.462\n",
+       "You have the career coaching team.\n",
+       "\n",
+       "00:12:00.059 --> 00:12:02.059\n",
+       "And you have Zack and.\n",
+       "\n",
+       "00:12:02.062 --> 00:12:06.062\n",
+       "I mean, like Zack is a pretty awesome instructor, like I don't know.\n",
+       "\n",
+       "00:12:06.861 --> 00:12:08.861\n",
+       "I don't know how many like.\n",
+       "\n",
+       "00:12:09.516 --> 00:12:12.516\n",
+       "How many kind of college courses you all had, but, like.\n",
+       "\n",
+       "00:12:11.860 --> 00:12:15.860\n",
+       "I think you can all agree that not many of your professors are like Zack.\n",
+       "\n",
+       "00:12:15.959 --> 00:12:17.959\n",
+       "Thank you very much. I try.\n",
+       "\n",
+       "00:12:17.760 --> 00:12:20.760\n",
+       "I try to be relatable and like.\n",
+       "\n",
+       "00:12:20.716 --> 00:12:25.716\n",
+       "At least like I've had classes where I've stoked, and the instructor just.\n",
+       "\n",
+       "00:12:25.867 --> 00:12:30.867\n",
+       "Turn me off. And I that's the most. That's what motivates me to do it like the way I do it.\n",
+       "\n",
+       "00:12:31.162 --> 00:12:34.162\n",
+       "But Akbar, like literally was in y'all shoes.\n",
+       "\n",
+       "00:12:34.260 --> 00:12:36.260\n",
+       "A while ago. So Akbar, like.\n",
+       "\n",
+       "00:12:35.864 --> 00:12:37.864\n",
+       "If you could go back.\n",
+       "\n",
+       "00:12:37.915 --> 00:12:38.915\n",
+       "In time, and like.\n",
+       "\n",
+       "00:12:39.615 --> 00:12:41.615\n",
+       "Whisper some things to you like. What.\n",
+       "\n",
+       "00:12:41.115 --> 00:12:43.115\n",
+       "Whispers, I'm like.\n",
+       "\n",
+       "00:12:43.516 --> 00:12:48.516\n",
+       "Things to you to your old self when you were going through the program like, what would they be like? What would you tell yourself like.\n",
+       "\n",
+       "00:12:49.564 --> 00:12:52.564\n",
+       "Like things like tips and things that you would.\n",
+       "\n",
+       "00:12:52.115 --> 00:12:54.115\n",
+       "I think in a lot of ways.\n",
+       "\n",
+       "00:12:54.660 --> 00:12:57.660\n",
+       "I was very focused on a very particular path.\n",
+       "\n",
+       "00:12:58.315 --> 00:13:00.315\n",
+       "When I had started college.\n",
+       "\n",
+       "00:12:59.660 --> 00:13:06.660\n",
+       "And I kind of went like I went through this kind of like phase where I was like, oh, I'm not really sure this is what I want to do. Like, I started off computer science.\n",
+       "\n",
+       "00:13:07.261 --> 00:13:10.261\n",
+       "And like something was missing, and I wasn't sure what it was.\n",
+       "\n",
+       "00:13:09.760 --> 00:13:14.760\n",
+       "And ended up going through this like really weird journey where I was out of school for 2 years.\n",
+       "\n",
+       "00:13:16.516 --> 00:13:19.516\n",
+       "Still trying to figure it out, but also just like, for other personal reasons.\n",
+       "\n",
+       "00:13:19.116 --> 00:13:27.116\n",
+       "And in that period, like was taking on odd jobs. I was like a technologyer at a college. I was working at a at a startup as a web developer like.\n",
+       "\n",
+       "00:13:27.860 --> 00:13:32.860\n",
+       "One who didn't know anything like one of the things that I'm still surprised and shocked about to this day is like.\n",
+       "\n",
+       "00:13:32.516 --> 00:13:34.516\n",
+       "There were a couple of times where.\n",
+       "\n",
+       "00:13:35.066 --> 00:13:39.066\n",
+       "I had no idea what I was doing in terms of like.\n",
+       "\n",
+       "00:13:39.360 --> 00:13:41.360\n",
+       "Web development. Ios, whatever.\n",
+       "\n",
+       "00:13:41.461 --> 00:13:45.461\n",
+       "And was just given like a shot by somebody.\n",
+       "\n",
+       "00:13:45.215 --> 00:13:48.215\n",
+       "And able to kind of like, prove myself just by like.\n",
+       "\n",
+       "00:13:48.415 --> 00:13:49.415\n",
+       "Trying to figure it out.\n",
+       "\n",
+       "00:13:49.016 --> 00:13:53.016\n",
+       "And failing miserably at it, but also like trying. You know that.\n",
+       "\n",
+       "00:13:53.459 --> 00:13:57.459\n",
+       "Being being like being the type of person that really didn't want to let anybody down.\n",
+       "\n",
+       "00:13:56.716 --> 00:13:59.716\n",
+       "And trying to take in.\n",
+       "\n",
+       "00:13:59.916 --> 00:14:01.916\n",
+       "Try to just like, do my best and.\n",
+       "\n",
+       "00:14:02.115 --> 00:14:03.115\n",
+       "Learn something from it.\n",
+       "\n",
+       "00:14:03.217 --> 00:14:04.217\n",
+       "That.\n",
+       "\n",
+       "00:14:05.316 --> 00:14:12.316\n",
+       "Really change the way that I looked at things in school and and like looked at the the way that I used like the Ctp program where.\n",
+       "\n",
+       "00:14:11.915 --> 00:14:13.915\n",
+       "I wasn't so much, and.\n",
+       "\n",
+       "00:14:13.960 --> 00:14:17.960\n",
+       "Don't. Don't say that I said this, but like I wasn't so much focused on like the content.\n",
+       "\n",
+       "00:14:18.017 --> 00:14:23.017\n",
+       "But like the networking and the resources that came from Ctp, right like, I was more interested in.\n",
+       "\n",
+       "00:14:22.916 --> 00:14:25.916\n",
+       "Going to the events I was more interested in doing like the.\n",
+       "\n",
+       "00:14:26.016 --> 00:14:30.016\n",
+       "Technical interview. Prep. Because that was the area that I felt I was the most lacking.\n",
+       "\n",
+       "00:14:30.662 --> 00:14:35.662\n",
+       "And like I would, I would urge, like all of you, to be very, very.\n",
+       "\n",
+       "00:14:36.161 --> 00:14:40.161\n",
+       "Intentional about the way that you approach things in terms of like.\n",
+       "\n",
+       "00:14:40.316 --> 00:14:44.316\n",
+       "Don't just follow a format just because somebody's telling you that's the format to follow.\n",
+       "\n",
+       "00:14:44.015 --> 00:14:46.015\n",
+       "Think about the things that.\n",
+       "\n",
+       "00:14:46.017 --> 00:14:51.017\n",
+       "You need to work on the most like. Think about the things where you struggle the most or the most nervous about.\n",
+       "\n",
+       "00:14:51.460 --> 00:14:54.460\n",
+       "And let that guide you to how you can kind of.\n",
+       "\n",
+       "00:14:54.464 --> 00:14:58.464\n",
+       "How you can kind of shore up your skill, set, and make yourself more well rounded.\n",
+       "\n",
+       "00:14:58.616 --> 00:15:00.616\n",
+       "And then the things that you're really good at.\n",
+       "\n",
+       "00:15:00.615 --> 00:15:04.615\n",
+       "Keep investing in that the things that you really enjoy keep investing in that cause. That's.\n",
+       "\n",
+       "00:15:04.116 --> 00:15:08.116\n",
+       "That's that's the difference between somebody who's like average and somebody who's exceptional.\n",
+       "\n",
+       "00:15:08.566 --> 00:15:13.566\n",
+       "Yeah, I wanna just really hit home like what stands out to me about Akbar is like.\n",
+       "\n",
+       "00:15:14.261 --> 00:15:17.261\n",
+       "Is like, like you said, like the networking and stuff, or just.\n",
+       "\n",
+       "00:15:18.916 --> 00:15:20.916\n",
+       "Just showing up right like.\n",
+       "\n",
+       "00:15:21.660 --> 00:15:24.660\n",
+       "He's got a good job like he didn't need to be a ta, but like.\n",
+       "\n",
+       "00:15:24.559 --> 00:15:29.559\n",
+       "You did it. I feel like as you wanted to. You wanted to be part of this network community like right now.\n",
+       "\n",
+       "00:15:30.415 --> 00:15:34.415\n",
+       "He doesn't have to be here. But he's like, I wanna be here like, yeah, like, I'll come kick it like.\n",
+       "\n",
+       "00:15:33.960 --> 00:15:37.960\n",
+       "Just like going to networks like doing these things like.\n",
+       "\n",
+       "00:15:38.160 --> 00:15:41.160\n",
+       "Just just immersing yourself in it.\n",
+       "\n",
+       "00:15:40.816 --> 00:15:43.816\n",
+       "You can be the best coder in the world.\n",
+       "\n",
+       "00:15:44.117 --> 00:15:48.117\n",
+       "But if and you don't know anyone, nobody knows that. What good is it.\n",
+       "\n",
+       "00:15:48.016 --> 00:15:55.016\n",
+       "So like, that's that was an exceptional skill I kind of want to point out about him. He's just like even like the just the other day with Brandon like.\n",
+       "\n",
+       "00:15:56.016 --> 00:16:02.016\n",
+       "I was just like it didn't surprise me that of all the people he was like, I'm meeting with Akbar, and I was like.\n",
+       "\n",
+       "00:16:02.116 --> 00:16:05.116\n",
+       "That's so random. But like, yeah, it tracks.\n",
+       "\n",
+       "00:16:05.716 --> 00:16:06.716\n",
+       "And\n",
+       "\n",
+       "00:16:07.516 --> 00:16:19.516\n",
+       "He was even, I think, actually he was even featured in our like end of year slide for like just sayings, I think you donated, and you said you quoted something really nice in the donation, and that made it into our like. Our final presentation, so like.\n",
+       "\n",
+       "00:16:19.116 --> 00:16:21.116\n",
+       "Things here and there.\n",
+       "\n",
+       "00:16:21.015 --> 00:16:24.015\n",
+       "You're making it seem like I bought my way here.\n",
+       "\n",
+       "00:16:23.017 --> 00:16:25.017\n",
+       "You did not.\n",
+       "\n",
+       "00:16:26.615 --> 00:16:32.615\n",
+       "No, yeah. I'm so stoked. You're here. Is there any final words of wisdom? Because we gotta get jumping with the lecture.\n",
+       "\n",
+       "00:16:34.859 --> 00:16:35.859\n",
+       "I mean, just.\n",
+       "\n",
+       "00:16:36.559 --> 00:16:40.559\n",
+       "Kind of like. The thing that I would also say is like, don't give yourself too much pressure.\n",
+       "\n",
+       "00:16:40.016 --> 00:16:41.016\n",
+       "Like.\n",
+       "\n",
+       "00:16:41.015 --> 00:16:45.015\n",
+       "That's 1 of the things. Even when I, when I was taing students like.\n",
+       "\n",
+       "00:16:45.615 --> 00:16:46.615\n",
+       "I think a lot of you all.\n",
+       "\n",
+       "00:16:46.365 --> 00:16:48.365\n",
+       "Put yourself under a lot of pressure.\n",
+       "\n",
+       "00:16:48.063 --> 00:16:53.063\n",
+       "And some of you might think that it makes you a better like. It makes you more productive that way.\n",
+       "\n",
+       "00:16:52.959 --> 00:16:56.959\n",
+       "But it only works for a limited amount of time, like.\n",
+       "\n",
+       "00:16:57.159 --> 00:17:01.159\n",
+       "Really think about the things that you want and the why you're doing this.\n",
+       "\n",
+       "00:17:01.364 --> 00:17:03.364\n",
+       "And it helps you tell a better story.\n",
+       "\n",
+       "00:17:02.658 --> 00:17:05.658\n",
+       "When you're talking about the work that you're done.\n",
+       "\n",
+       "00:17:06.816 --> 00:17:10.816\n",
+       "Right like when it comes to. You're kind of far away from this. But when it comes to like Demo Day and stuff like that.\n",
+       "\n",
+       "00:17:10.562 --> 00:17:15.562\n",
+       "Right, like the thing that the things that are gonna set you apart aren't necessarily gonna be your projects.\n",
+       "\n",
+       "00:17:15.316 --> 00:17:21.316\n",
+       "But how you talk about your projects, and how you're able to explain and get somebody to believe in.\n",
+       "\n",
+       "00:17:20.959 --> 00:17:25.959\n",
+       "The work that you're doing because of why you care about it, and what you can share about that.\n",
+       "\n",
+       "00:17:26.715 --> 00:17:28.715\n",
+       "Like your own unique perspective.\n",
+       "\n",
+       "00:17:30.565 --> 00:17:41.565\n",
+       "Epic, all right. So everybody round big round applause bar hopefully, you'll be Akbar in a year or 2 coming back to Spith fire. Give some inspiration. Thank you so much. Akbar.\n",
+       "\n",
+       "00:17:42.715 --> 00:17:45.715\n",
+       "I mean, at least I'll see you at will. You be a demo night? Probably.\n",
+       "\n",
+       "00:17:46.065 --> 00:17:48.065\n",
+       "Is that in December.\n",
+       "\n",
+       "00:17:48.059 --> 00:17:49.059\n",
+       "Yeah.\n",
+       "\n",
+       "00:17:48.715 --> 00:17:53.715\n",
+       "Yeah, may maybe, especially if it's in queens like it should not be too hard for me to get there.\n",
+       "\n",
+       "00:17:54.816 --> 00:17:54.915\n",
+       "That.\n",
+       "\n",
+       "00:17:54.915 --> 00:17:57.915\n",
+       "Or you'll be in Japan or doing something cool. He's always like.\n",
+       "\n",
+       "00:17:55.517 --> 00:31:28.517\n",
+       "Just.\n",
+       "\n",
+       "00:17:57.060 --> 00:18:04.060\n",
+       "Yeah, I I got a crazy schedule right now. So that, like I'm hopefully, I won't be too exhausted and burnt out by the end of the year.\n",
+       "\n",
+       "00:18:04.315 --> 00:18:06.315\n",
+       "But I think by November I should be okay.\n",
+       "\n",
+       "00:18:06.961 --> 00:18:10.961\n",
+       "Alright. Thank you so much, Akvar, and have a great weekend. Thanks again.\n",
+       "\n",
+       "00:18:12.116 --> 00:18:13.116\n",
+       "Base.\n",
+       "\n",
+       "00:18:13.764 --> 00:18:15.764\n",
+       "Yeah.\n",
+       "\n",
+       "00:18:15.566 --> 00:18:19.566\n",
+       "He's a great dude, and again, like he's just continually.\n",
+       "\n",
+       "00:18:21.116 --> 00:18:22.116\n",
+       "Touching.\n",
+       "\n",
+       "00:18:21.815 --> 00:18:24.815\n",
+       "Like staying, relevant, touching, like networking.\n",
+       "\n",
+       "00:18:25.315 --> 00:18:32.315\n",
+       "Volunteering doing stuff like someone said, Oh, yeah, he like I had to interview with him. And like, he's just doing this stuff and like it's an.\n",
+       "\n",
+       "00:18:32.365 --> 00:18:34.365\n",
+       "Unbelievably important.\n",
+       "\n",
+       "00:18:33.815 --> 00:18:38.815\n",
+       "Thing that you can do totally for free. You don't have to pay anyone.\n",
+       "\n",
+       "00:18:39.315 --> 00:18:41.315\n",
+       "I can't tell you how many conferences I went to.\n",
+       "\n",
+       "00:18:40.716 --> 00:18:46.716\n",
+       "I didn't know anyone, and I ended up making like a lot of friends. I go there alone and just be like.\n",
+       "\n",
+       "00:18:47.316 --> 00:18:48.316\n",
+       "I don't know anyone here.\n",
+       "\n",
+       "00:18:48.516 --> 00:18:51.516\n",
+       "You know. Instantly you start me because most people don't, and like.\n",
+       "\n",
+       "00:18:50.816 --> 00:18:55.816\n",
+       "You at least have one thing in common that you like that really nerdy thing. Whatever conference you're going to.\n",
+       "\n",
+       "00:18:56.215 --> 00:19:04.215\n",
+       "Fantastic alright. So we're already behind schedule. Were there any other major hiccups or questions about the homework?\n",
+       "\n",
+       "00:19:05.959 --> 00:19:08.959\n",
+       "Speak now forever! Hold your piece.\n",
+       "\n",
+       "00:19:09.615 --> 00:19:10.615\n",
+       "Oops!\n",
+       "\n",
+       "00:19:10.716 --> 00:19:11.716\n",
+       "Let's go.\n",
+       "\n",
+       "00:19:16.659 --> 00:19:19.659\n",
+       "Alright. So if we go into the readme.\n",
+       "\n",
+       "00:19:20.816 --> 00:19:31.816\n",
+       "So this week, we're doing data analytics and data visits, bi tools. So what we're gonna do is we start a recording, we turn on? Zc, we did this.\n",
+       "\n",
+       "00:19:32.967 --> 00:19:35.967\n",
+       "I updated this.\n",
+       "\n",
+       "00:19:37.260 --> 00:19:40.260\n",
+       "What it. Oh, I update this for next week, but.\n",
+       "\n",
+       "00:19:40.516 --> 00:19:43.516\n",
+       "So obviously, you guys are doing really well.\n",
+       "\n",
+       "00:19:44.215 --> 00:19:47.215\n",
+       "The asking questions in slack.\n",
+       "\n",
+       "00:19:46.816 --> 00:19:48.816\n",
+       "And like, I wanna say, like.\n",
+       "\n",
+       "00:19:49.559 --> 00:19:51.559\n",
+       "When I people, it's it's hard. It's like.\n",
+       "\n",
+       "00:19:51.260 --> 00:19:55.260\n",
+       "Even from the sometimes for me. I'm like, I don't wanna look stupid.\n",
+       "\n",
+       "00:19:56.060 --> 00:20:01.060\n",
+       "But like, and it's bold. So anytime you do it. I'm like F yet, like yes.\n",
+       "\n",
+       "00:20:01.266 --> 00:20:03.266\n",
+       "And like you get good. Answer quick.\n",
+       "\n",
+       "00:20:02.859 --> 00:20:06.859\n",
+       "Like, I read the answers, I'm like, Yeah, they did it really? Well.\n",
+       "\n",
+       "00:20:07.616 --> 00:20:10.616\n",
+       "The only thing that's always a back and forth. I don't know if I mentioned this last time. It's like.\n",
+       "\n",
+       "00:20:10.215 --> 00:20:12.215\n",
+       "The 1st question I.\n",
+       "\n",
+       "00:20:12.259 --> 00:20:13.259\n",
+       "Ask.\n",
+       "\n",
+       "00:20:13.359 --> 00:20:15.359\n",
+       "Is like, what error did you get.\n",
+       "\n",
+       "00:20:14.716 --> 00:20:20.716\n",
+       "The best way to get a quick answer, or get the answer without any back and forth, is to post.\n",
+       "\n",
+       "00:20:21.016 --> 00:20:25.016\n",
+       "A screenshot of the like the last snippet of code that got you there.\n",
+       "\n",
+       "00:20:25.159 --> 00:20:28.159\n",
+       "And the error like it's the easiest thing to do.\n",
+       "\n",
+       "00:20:28.259 --> 00:20:30.259\n",
+       "Does everyone understand that.\n",
+       "\n",
+       "00:20:30.960 --> 00:20:39.960\n",
+       "Because every single time someone's like, Oh, I'm having help, I need trouble doing this. I'm like, I need to see the error like this is the only way I can help you.\n",
+       "\n",
+       "00:20:41.015 --> 00:20:42.015\n",
+       "So\n",
+       "\n",
+       "00:20:42.659 --> 00:20:44.659\n",
+       "That just clears things up right away.\n",
+       "\n",
+       "00:20:44.415 --> 00:20:48.415\n",
+       "The other thing which I'm gonna talk about next week, but is also this week is\n",
+       "\n",
+       "00:20:48.015 --> 00:20:50.015\n",
+       "The.\n",
+       "\n",
+       "00:20:50.558 --> 00:20:51.558\n",
+       "The\n",
+       "\n",
+       "00:20:51.959 --> 00:20:57.959\n",
+       "The job. I don't know if you guys know about this, the job board channel. You guys, are you guys aware of this thing.\n",
+       "\n",
+       "00:20:57.716 --> 00:21:01.716\n",
+       "I think you should be members of it, but like there was a job posted this week.\n",
+       "\n",
+       "00:21:03.515 --> 00:21:06.515\n",
+       "There is a lot of resources like. So there is a lift job.\n",
+       "\n",
+       "00:21:08.058 --> 00:21:14.058\n",
+       "Here there was like another workshop. There was. There was 2 workshops.\n",
+       "\n",
+       "00:21:13.816 --> 00:21:17.816\n",
+       "Posted, of having you write your resume and cover letters.\n",
+       "\n",
+       "00:21:18.319 --> 00:21:20.319\n",
+       "And then this other thing that was like.\n",
+       "\n",
+       "00:21:20.559 --> 00:21:24.559\n",
+       "Oh, it's a single application where you apply to a bunch of things in New York City. So I'm like.\n",
+       "\n",
+       "00:21:25.260 --> 00:21:29.260\n",
+       "I just wanna re put this on your radar like this is a really good.\n",
+       "\n",
+       "00:21:28.916 --> 00:21:29.916\n",
+       "Resource.\n",
+       "\n",
+       "00:21:29.959 --> 00:21:30.959\n",
+       "Like, just.\n",
+       "\n",
+       "00:21:31.158 --> 00:21:33.158\n",
+       "Turn on notifications, star it.\n",
+       "\n",
+       "00:21:32.759 --> 00:21:34.759\n",
+       "You just do this. I.\n",
+       "\n",
+       "00:21:35.415 --> 00:21:38.415\n",
+       "You click on the thing and you click the star button.\n",
+       "\n",
+       "00:21:40.565 --> 00:21:41.565\n",
+       "It'll bring it to the top.\n",
+       "\n",
+       "00:21:40.715 --> 00:21:43.715\n",
+       "Like. I have our classes.\n",
+       "\n",
+       "00:21:43.859 --> 00:21:45.859\n",
+       "I'm gonna unstar it because I don't.\n",
+       "\n",
+       "00:21:46.015 --> 00:21:49.015\n",
+       "I have a job, but it is really good.\n",
+       "\n",
+       "00:21:49.758 --> 00:21:52.758\n",
+       "Cool. So I just want to remind you all about that.\n",
+       "\n",
+       "00:21:52.915 --> 00:21:54.915\n",
+       "Let's go back here.\n",
+       "\n",
+       "00:21:55.558 --> 00:21:56.558\n",
+       "Review homework.\n",
+       "\n",
+       "00:21:56.116 --> 00:21:58.116\n",
+       "This one.\n",
+       "\n",
+       "00:21:58.315 --> 00:22:01.315\n",
+       "Cool. And now we're gonna go to slide lecture.\n",
+       "\n",
+       "00:22:00.816 --> 00:22:07.816\n",
+       "Database analytics. We're gonna take a break. We're gonna do the same thing in python and tableau.\n",
+       "\n",
+       "00:22:08.558 --> 00:22:13.558\n",
+       "And then we're gonna review what's due for next weekly survey.\n",
+       "\n",
+       "00:22:13.559 --> 00:22:14.559\n",
+       "I don't.\n",
+       "\n",
+       "00:22:14.358 --> 00:22:17.358\n",
+       "Think we'll have time for breakout rooms. Hopefully we will. But.\n",
+       "\n",
+       "00:22:16.759 --> 00:22:18.759\n",
+       "That's just the way it is.\n",
+       "\n",
+       "00:22:18.966 --> 00:22:20.966\n",
+       "So let's get started.\n",
+       "\n",
+       "00:22:22.869 --> 00:22:27.869\n",
+       "Okidoki. We are here. So data, analytics and visualization.\n",
+       "\n",
+       "00:22:30.516 --> 00:22:38.516\n",
+       "Just kind of describing the difference between analyst and a data scientist. Again, like analysts, I don't want to say it's below, but it's like.\n",
+       "\n",
+       "00:22:39.260 --> 00:22:42.260\n",
+       "You can't be a data scientist without being a data analyst.\n",
+       "\n",
+       "00:22:41.416 --> 00:22:45.416\n",
+       "But you can be in a data analyst and not be a data scientist.\n",
+       "\n",
+       "00:22:46.116 --> 00:22:49.116\n",
+       "And the main difference between the 2.\n",
+       "\n",
+       "00:22:49.416 --> 00:22:53.416\n",
+       "Is like an analyst like they what they will usually get.\n",
+       "\n",
+       "00:22:53.116 --> 00:22:58.116\n",
+       "Problems, and, like their job responsibilities will be more narrow and specific.\n",
+       "\n",
+       "00:22:57.666 --> 00:22:59.666\n",
+       "It's like.\n",
+       "\n",
+       "00:22:59.560 --> 00:23:04.560\n",
+       "Hey, look at this data. And like, we have this problem here. Can you explain it to us?\n",
+       "\n",
+       "00:23:05.060 --> 00:23:06.060\n",
+       "Whereas.\n",
+       "\n",
+       "00:23:05.858 --> 00:23:10.858\n",
+       "The data. Scientists will just be given a data set and they'll be like, we don't know what's happening like.\n",
+       "\n",
+       "00:23:11.115 --> 00:23:13.115\n",
+       "Find the trends and patterns, and like.\n",
+       "\n",
+       "00:23:13.559 --> 00:23:14.559\n",
+       "Figure it out.\n",
+       "\n",
+       "00:23:14.866 --> 00:23:17.866\n",
+       "And this is like, more like, okay, like.\n",
+       "\n",
+       "00:23:18.459 --> 00:23:20.459\n",
+       "You have data like, you have, like.\n",
+       "\n",
+       "00:23:20.659 --> 00:23:23.659\n",
+       "A team that you're like working with and under.\n",
+       "\n",
+       "00:23:23.516 --> 00:23:29.516\n",
+       "Or 4, and they're like, Oh, we need these metrics in like this dashboard. And we need you to like.\n",
+       "\n",
+       "00:23:29.859 --> 00:23:34.859\n",
+       "Make it the most presentable. And like basically arise anything that's happening bad.\n",
+       "\n",
+       "00:23:36.015 --> 00:23:38.015\n",
+       "So it's like, really and again, like.\n",
+       "\n",
+       "00:23:38.815 --> 00:23:41.815\n",
+       "Engine data engineer, analyst, scientist.\n",
+       "\n",
+       "00:23:42.266 --> 00:23:43.266\n",
+       "So like.\n",
+       "\n",
+       "00:23:43.216 --> 00:23:49.216\n",
+       "Hopefully, you'll have a data engineer working with you. But chances are you're not going to. You're just going to do the engineering yourself.\n",
+       "\n",
+       "00:23:49.959 --> 00:23:52.959\n",
+       "And engineering. I mean cleaning and getting the data.\n",
+       "\n",
+       "00:23:54.615 --> 00:23:55.615\n",
+       "And like.\n",
+       "\n",
+       "00:23:56.359 --> 00:24:00.359\n",
+       "Again, like the most important thing, and the biggest thing to do is like building dashboards.\n",
+       "\n",
+       "00:24:00.916 --> 00:24:01.916\n",
+       "It's not this.\n",
+       "\n",
+       "00:24:01.860 --> 00:24:08.860\n",
+       "It's not the most exciting thing like I love it. But like some people, don't, they feel they? I don't know. Some people don't like it, but like.\n",
+       "\n",
+       "00:24:09.515 --> 00:24:11.515\n",
+       "Like, I said. I think last class it's like.\n",
+       "\n",
+       "00:24:12.759 --> 00:24:15.759\n",
+       "But a really good way of making yourself indispensable at a company.\n",
+       "\n",
+       "00:24:16.515 --> 00:24:20.515\n",
+       "Cause you make a good dashboard that people check every day or like, have it up all the time, like.\n",
+       "\n",
+       "00:24:20.157 --> 00:24:22.157\n",
+       "If it goes wrong, like.\n",
+       "\n",
+       "00:24:23.258 --> 00:24:24.258\n",
+       "They gotta come to like.\n",
+       "\n",
+       "00:24:24.016 --> 00:24:28.016\n",
+       "It's it's not. It's like they come to you. And like, when they need updates, they come to you.\n",
+       "\n",
+       "00:24:28.860 --> 00:24:32.860\n",
+       "Or they need like a new feature, like they come to you and like.\n",
+       "\n",
+       "00:24:35.860 --> 00:24:42.860\n",
+       "Yes, like you should make all your code reusable. But like I've had to take over someone else's dashboard. And that 1st week thing started breaking and like.\n",
+       "\n",
+       "00:24:43.259 --> 00:24:49.259\n",
+       "I looked at their code, and I was like, I have no idea what this problem is, and it took me like a week just to find the problem.\n",
+       "\n",
+       "00:24:49.116 --> 00:24:54.116\n",
+       "I was like, I gotta just rebuild it myself. So like he has write good code and stuff. But like.\n",
+       "\n",
+       "00:24:54.258 --> 00:24:57.258\n",
+       "You'll be indispensable, and people know them.\n",
+       "\n",
+       "00:24:57.864 --> 00:25:06.864\n",
+       "So the skills are like data visualization python for data, cleaning pandas for data cleaning visualization. We can use seabor, but also tableau.\n",
+       "\n",
+       "00:25:07.466 --> 00:25:08.466\n",
+       "Google data, studio.\n",
+       "\n",
+       "00:25:09.115 --> 00:25:11.115\n",
+       "We do some statistical analysis and inference.\n",
+       "\n",
+       "00:25:10.759 --> 00:25:13.759\n",
+       "And business intelligence.\n",
+       "\n",
+       "00:25:14.616 --> 00:25:17.616\n",
+       "That's like the main core things of it. So like, what is analytics like.\n",
+       "\n",
+       "00:25:18.316 --> 00:25:22.316\n",
+       "You analyze trends. This is called Ana model. It's time series, data.\n",
+       "\n",
+       "00:25:23.366 --> 00:25:24.366\n",
+       "But like.\n",
+       "\n",
+       "00:25:25.116 --> 00:25:28.116\n",
+       "Yeah, you're like, okay, this is where we're projected to go.\n",
+       "\n",
+       "00:25:30.616 --> 00:25:35.616\n",
+       "Deliver like building dashboards to deliver Kpis, who, he remembers what Kpi means.\n",
+       "\n",
+       "00:25:35.615 --> 00:25:36.615\n",
+       "Big buzzword.\n",
+       "\n",
+       "00:25:38.658 --> 00:25:43.658\n",
+       "E performance indicator, like sales might be a kpi, like amount of sales.\n",
+       "\n",
+       "00:25:45.359 --> 00:25:48.359\n",
+       "Like there are internal kpis, like.\n",
+       "\n",
+       "00:25:47.859 --> 00:25:55.859\n",
+       "For like, how well you're doing your job like a really bad one is like, how many lines of code have you written or committed to Github, push to Github.\n",
+       "\n",
+       "00:25:48.366 --> 00:25:49.366\n",
+       "Key performance indicator.\n",
+       "\n",
+       "00:25:56.417 --> 00:25:57.417\n",
+       "But like.\n",
+       "\n",
+       "00:25:57.316 --> 00:26:02.316\n",
+       "You know how many projects have you maintained? And Da, but like it's mostly like.\n",
+       "\n",
+       "00:26:03.416 --> 00:26:05.416\n",
+       "The business's kpis like sales.\n",
+       "\n",
+       "00:26:05.215 --> 00:26:09.215\n",
+       "Like incoming customers like conversions, or like.\n",
+       "\n",
+       "00:26:09.258 --> 00:26:10.258\n",
+       "People that have.\n",
+       "\n",
+       "00:26:10.458 --> 00:26:12.458\n",
+       "Unsubscribe or churn, we call them.\n",
+       "\n",
+       "00:26:14.115 --> 00:26:17.115\n",
+       "So the stakeholder is like.\n",
+       "\n",
+       "00:26:18.460 --> 00:26:23.460\n",
+       "It is the state. It's like your boss. I guess you could say it's the best way to describe it, like the person that's like.\n",
+       "\n",
+       "00:26:24.316 --> 00:26:28.316\n",
+       "Has staken like is impacting these metrics like that's who you're doing it for.\n",
+       "\n",
+       "00:26:30.616 --> 00:26:34.616\n",
+       "A B testing is another one. I love it. I think it's really fun.\n",
+       "\n",
+       "00:26:35.016 --> 00:26:39.016\n",
+       "Who here can describe what a B testing is? Does anybody have any experience with it?\n",
+       "\n",
+       "00:26:44.359 --> 00:26:49.359\n",
+       "From what I've heard, it's you're comparing 2 versions of a similar element, like an app.\n",
+       "\n",
+       "00:26:49.015 --> 00:26:54.015\n",
+       "With them 2 different characteristics characteristic A and B, and then, you see, which one performs better.\n",
+       "\n",
+       "00:26:54.416 --> 00:26:59.416\n",
+       "Yeah, in essence. Yes. The only kind of diff like. Yes, it's completely true.\n",
+       "\n",
+       "00:27:00.159 --> 00:27:05.159\n",
+       "The only way I would word. It is like you have your existing thing, which is the control group.\n",
+       "\n",
+       "00:27:05.217 --> 00:27:06.217\n",
+       "Right.\n",
+       "\n",
+       "00:27:06.458 --> 00:27:10.458\n",
+       "And then you would implement like a new thing like, so the red might be the normal one.\n",
+       "\n",
+       "00:27:10.158 --> 00:27:13.158\n",
+       "And then here you're like, we want to test out a new.\n",
+       "\n",
+       "00:27:12.661 --> 00:27:13.661\n",
+       "Design.\n",
+       "\n",
+       "00:27:14.459 --> 00:27:17.459\n",
+       "So you compare it to the normal one.\n",
+       "\n",
+       "00:27:17.216 --> 00:27:22.216\n",
+       "Instead of like doing 2 different ones and comparing those you compared to what's already right.\n",
+       "\n",
+       "00:27:21.815 --> 00:27:24.815\n",
+       "And then there's like this process, this.\n",
+       "\n",
+       "00:27:25.515 --> 00:27:27.515\n",
+       "So it statistical process that you run.\n",
+       "\n",
+       "00:27:27.558 --> 00:27:28.558\n",
+       "That'll.\n",
+       "\n",
+       "00:27:29.166 --> 00:27:30.166\n",
+       "Give you an answer if it's.\n",
+       "\n",
+       "00:27:30.059 --> 00:27:36.059\n",
+       "Different in either direction. If it's worse, better, or worse. This is a whole art.\n",
+       "\n",
+       "00:27:36.216 --> 00:27:39.216\n",
+       "It's actually derived from psychological statistics.\n",
+       "\n",
+       "00:27:39.560 --> 00:27:44.560\n",
+       "So you guys know, like drug studies, right? Like, I don't know if you guys heard of this and like placebo pills.\n",
+       "\n",
+       "00:27:43.858 --> 00:27:48.858\n",
+       "Like my dad was in a so drug study for lyme disease. You know the tick.\n",
+       "\n",
+       "00:27:49.459 --> 00:27:51.459\n",
+       "If you get bit by a tick you can get lime disease.\n",
+       "\n",
+       "00:27:53.316 --> 00:27:56.316\n",
+       "And he was in a study, and like they don't tell you like.\n",
+       "\n",
+       "00:27:56.016 --> 00:27:59.016\n",
+       "Some people will be the control group and they get a pill.\n",
+       "\n",
+       "00:27:59.215 --> 00:28:02.215\n",
+       "But it's a it's just nothing's in it. It's like a sugar pill.\n",
+       "\n",
+       "00:28:02.016 --> 00:28:07.016\n",
+       "And then some people get the actual medicine, and then what they do is they compare the 2 to see if they're different.\n",
+       "\n",
+       "00:28:07.659 --> 00:28:09.659\n",
+       "We're essentially doing the same thing here. But.\n",
+       "\n",
+       "00:28:08.760 --> 00:28:11.760\n",
+       "Instead of trying to make people healthier, we're trying to.\n",
+       "\n",
+       "00:28:11.915 --> 00:28:13.915\n",
+       "Make more money or something, you know.\n",
+       "\n",
+       "00:28:13.760 --> 00:28:19.760\n",
+       "Instead of it being a pill, it's like a different colored button, or like a new feature, or something of that nature.\n",
+       "\n",
+       "00:28:21.615 --> 00:28:25.615\n",
+       "So this is like also a huge thing like if you can demonstrate, and you can do this well like.\n",
+       "\n",
+       "00:28:25.758 --> 00:28:27.758\n",
+       "Huge skill. It is a lot.\n",
+       "\n",
+       "00:28:27.315 --> 00:28:29.315\n",
+       "Learning to do with it, but that.\n",
+       "\n",
+       "00:28:33.017 --> 00:28:34.017\n",
+       "Cool.\n",
+       "\n",
+       "00:28:34.316 --> 00:28:38.316\n",
+       "In. For instance, this has to do with a B testing. So imagine.\n",
+       "\n",
+       "00:28:38.860 --> 00:28:39.860\n",
+       "The.\n",
+       "\n",
+       "00:28:39.160 --> 00:28:44.160\n",
+       "Bogger colored blob in the middle is like the population or the control group.\n",
+       "\n",
+       "00:28:44.216 --> 00:28:53.216\n",
+       "You can infer other factors that are like, oh, what is this other group? Is this like a? Is this the same as this or not?\n",
+       "\n",
+       "00:28:52.959 --> 00:28:56.959\n",
+       "And this chart will show you that like oh, they're not actually the same.\n",
+       "\n",
+       "00:28:57.515 --> 00:28:58.515\n",
+       "Because of this factor.\n",
+       "\n",
+       "00:28:58.715 --> 00:29:01.715\n",
+       "This is called statistical inference.\n",
+       "\n",
+       "00:29:02.060 --> 00:29:07.060\n",
+       "Analysts will be called upon to try to understand, like, how similar data is.\n",
+       "\n",
+       "00:29:07.660 --> 00:29:09.660\n",
+       "So like when you're comparing to groups.\n",
+       "\n",
+       "00:29:10.116 --> 00:29:15.116\n",
+       "If they're exact, same if they're exactly the same like, there's no difference in like.\n",
+       "\n",
+       "00:29:15.216 --> 00:29:19.216\n",
+       "That's not very insightful. But if they're different on a certain measure, like.\n",
+       "\n",
+       "00:29:19.016 --> 00:29:24.016\n",
+       "These people buy a lot more than these people like. Then you could.\n",
+       "\n",
+       "00:29:23.616 --> 00:29:25.616\n",
+       "Effectively.\n",
+       "\n",
+       "00:29:26.215 --> 00:29:27.215\n",
+       "Somehow make more money.\n",
+       "\n",
+       "00:29:28.415 --> 00:29:30.415\n",
+       "Who knows the very famous story.\n",
+       "\n",
+       "00:29:29.559 --> 00:29:32.559\n",
+       "About it was either target.\n",
+       "\n",
+       "00:29:33.158 --> 00:29:34.158\n",
+       "Or Walgreens.\n",
+       "\n",
+       "00:29:34.166 --> 00:29:36.166\n",
+       "Identifying\n",
+       "\n",
+       "00:29:35.858 --> 00:29:37.858\n",
+       "Expecting parents.\n",
+       "\n",
+       "00:29:38.716 --> 00:29:42.716\n",
+       "Yeah. It was very ethically questionable. But.\n",
+       "\n",
+       "00:29:43.159 --> 00:29:44.159\n",
+       "They realized.\n",
+       "\n",
+       "00:29:46.415 --> 00:29:48.415\n",
+       "Expecting parents or people that just have children.\n",
+       "\n",
+       "00:29:49.558 --> 00:29:51.558\n",
+       "Spend a lot of money on stuff.\n",
+       "\n",
+       "00:29:50.816 --> 00:29:54.816\n",
+       "At their store. So they were like, we're gonna predict when they're.\n",
+       "\n",
+       "00:29:55.759 --> 00:30:02.759\n",
+       "Pregnant. And they did. And it was kind of questionably outlier analysis. So outliers.\n",
+       "\n",
+       "00:30:03.458 --> 00:30:06.458\n",
+       "Are really good and or bad.\n",
+       "\n",
+       "00:30:09.458 --> 00:30:14.458\n",
+       "Bad, because sometimes it's just an outlier that doesn't make it's like an error in the data.\n",
+       "\n",
+       "00:30:15.068 --> 00:30:16.068\n",
+       "Usually it is.\n",
+       "\n",
+       "00:30:16.917 --> 00:30:22.917\n",
+       "Bad, because sometimes as an outlier to the point where it's, it doesn't really make you money or doesn't.\n",
+       "\n",
+       "00:30:23.063 --> 00:30:25.063\n",
+       "Mean anything.\n",
+       "\n",
+       "00:30:24.658 --> 00:30:26.658\n",
+       "But they could also be good cause, like.\n",
+       "\n",
+       "00:30:28.359 --> 00:30:29.359\n",
+       "What if that person.\n",
+       "\n",
+       "00:30:30.116 --> 00:30:34.116\n",
+       "What if the average person spends a hundred dollars and that person spent a million dollars.\n",
+       "\n",
+       "00:30:33.915 --> 00:30:39.915\n",
+       "Right like you'd be like, well, how? Why do they spend a million dollars so identifying these and like.\n",
+       "\n",
+       "00:30:40.016 --> 00:30:42.016\n",
+       "Then being able to dig into them, and like.\n",
+       "\n",
+       "00:30:43.259 --> 00:30:45.259\n",
+       "Look at them like you can identify like.\n",
+       "\n",
+       "00:30:45.358 --> 00:30:47.358\n",
+       "Potential ways of doing it.\n",
+       "\n",
+       "00:30:47.959 --> 00:30:48.959\n",
+       "So.\n",
+       "\n",
+       "00:30:48.916 --> 00:30:54.916\n",
+       "It's really important. Who here has read the book called Outliers by Malcolm Gladwell.\n",
+       "\n",
+       "00:30:57.066 --> 00:30:59.066\n",
+       "Okay, you guys.\n",
+       "\n",
+       "00:30:58.816 --> 00:31:01.816\n",
+       "Malcolm Gladwell is an amazingly smart human.\n",
+       "\n",
+       "00:31:03.516 --> 00:31:13.516\n",
+       "I think he's technically a psychologist or mathematician, but he also writes books that are actually good to read. They're like entertaining books, he wrote the tipping point.\n",
+       "\n",
+       "00:31:14.715 --> 00:31:16.715\n",
+       "Which the roots named their album. After.\n",
+       "\n",
+       "00:31:17.616 --> 00:31:21.616\n",
+       "Like this is like inflection. Things, he wrote outliers, which is like.\n",
+       "\n",
+       "00:31:22.661 --> 00:31:23.661\n",
+       "He talks about like.\n",
+       "\n",
+       "00:31:24.959 --> 00:31:26.959\n",
+       "Like Bill Gates.\n",
+       "\n",
+       "00:31:28.759 --> 00:34:05.759\n",
+       "This is.\n",
+       "\n",
+       "00:31:29.315 --> 00:31:34.315\n",
+       "Like, have you guys ever heard the 10,000Ā h rule like you need 10,000Ā h like that's his term.\n",
+       "\n",
+       "00:31:34.115 --> 00:31:39.115\n",
+       "He's in. He also has a new podcast. That's really good. But it's just not doesn't come out. That often.\n",
+       "\n",
+       "00:31:39.516 --> 00:31:44.516\n",
+       "These good, these books, and you can buy this book for $4 like they're kind of older now. But like these are amazing.\n",
+       "\n",
+       "00:31:44.815 --> 00:31:47.815\n",
+       "Malcolm Gladwell. He's from New York.\n",
+       "\n",
+       "00:31:47.915 --> 00:31:50.915\n",
+       "But he's he's from Canada, but he lives in New York.\n",
+       "\n",
+       "00:31:52.359 --> 00:31:57.359\n",
+       "Talking to stranger outliers. Blank is another good one like you look at someone you can like.\n",
+       "\n",
+       "00:31:57.457 --> 00:31:59.457\n",
+       "Your initial judgment, or like.\n",
+       "\n",
+       "00:31:58.760 --> 00:32:02.760\n",
+       "In the blink of an eye like you can get like 80 to 90% of the picture.\n",
+       "\n",
+       "00:32:07.259 --> 00:32:12.259\n",
+       "Or like if it kpis outside of his normal range, like all of a sudden, sales dropped.\n",
+       "\n",
+       "00:32:12.016 --> 00:32:16.016\n",
+       "To an outlier person like outside of our normal things, like.\n",
+       "\n",
+       "00:32:16.816 --> 00:32:19.816\n",
+       "Why is that like? Oh, the website, the buy button was disabled.\n",
+       "\n",
+       "00:32:19.860 --> 00:32:21.860\n",
+       "Like, yeah, it'll happen.\n",
+       "\n",
+       "00:32:22.059 --> 00:32:23.059\n",
+       "That's how you can identify it.\n",
+       "\n",
+       "00:32:23.859 --> 00:32:28.859\n",
+       "But like, so going, all is like, well, okay, like, we have all this data like, how do we communicate it? How do we tell stories?\n",
+       "\n",
+       "00:32:27.915 --> 00:32:29.915\n",
+       "So like.\n",
+       "\n",
+       "00:32:30.315 --> 00:32:34.315\n",
+       "Pictures worth a thousand words. If I just give this you this, this exact data sheet to you like.\n",
+       "\n",
+       "00:32:34.859 --> 00:32:36.859\n",
+       "I see the date, and I see the money.\n",
+       "\n",
+       "00:32:37.116 --> 00:32:40.116\n",
+       "But like it's meaningless. All the dates about a.\n",
+       "\n",
+       "00:32:41.258 --> 00:32:42.258\n",
+       "Like.\n",
+       "\n",
+       "00:32:42.359 --> 00:32:44.359\n",
+       "What even kind of money is this.\n",
+       "\n",
+       "00:32:45.058 --> 00:32:47.058\n",
+       "Like, if you visualize it, it's very clear.\n",
+       "\n",
+       "00:32:47.516 --> 00:32:52.516\n",
+       "Like we were up. And now we're down and like we're in a kind of on a downward trend. It doesn't seem that crazy, but.\n",
+       "\n",
+       "00:32:52.859 --> 00:32:59.859\n",
+       "So I like to pose it like this way. There's like a lot of really bad ways to do it, even though you're presenting factual information. So.\n",
+       "\n",
+       "00:33:00.515 --> 00:33:02.515\n",
+       "Where was Covid? The worst? Right.\n",
+       "\n",
+       "00:33:02.657 --> 00:33:04.657\n",
+       "So I show you this visualization.\n",
+       "\n",
+       "00:33:04.759 --> 00:33:06.759\n",
+       "Who can tell me?\n",
+       "\n",
+       "00:33:06.816 --> 00:33:07.816\n",
+       "What's wrong.\n",
+       "\n",
+       "00:33:07.666 --> 00:33:10.666\n",
+       "Like what what you can improve with this.\n",
+       "\n",
+       "00:33:11.516 --> 00:33:12.516\n",
+       "The States are like.\n",
+       "\n",
+       "00:33:12.317 --> 00:33:13.317\n",
+       "Not.\n",
+       "\n",
+       "00:33:12.615 --> 00:33:16.615\n",
+       "It's so jumbled together I could barely even read it.\n",
+       "\n",
+       "00:33:13.060 --> 00:33:14.060\n",
+       "Readable.\n",
+       "\n",
+       "00:33:16.558 --> 00:33:20.558\n",
+       "I literally saw Andrew doing this. And it's like, yeah, 1st thing like.\n",
+       "\n",
+       "00:33:21.415 --> 00:33:23.415\n",
+       "You can't read it. Everything's close together.\n",
+       "\n",
+       "00:33:23.058 --> 00:33:25.058\n",
+       "They're sideways.\n",
+       "\n",
+       "00:33:26.915 --> 00:33:27.915\n",
+       "I'm like, okay, what if we fix that.\n",
+       "\n",
+       "00:33:28.516 --> 00:33:32.516\n",
+       "Yeah. Also, it wasn't labeled. There was no way to tell what the metric was even about.\n",
+       "\n",
+       "00:33:32.415 --> 00:33:37.415\n",
+       "Exactly what 70,000! What I don't 70,000 what.\n",
+       "\n",
+       "00:33:37.460 --> 00:33:38.460\n",
+       "Douglas.\n",
+       "\n",
+       "00:33:39.415 --> 00:33:44.415\n",
+       "Maybe sort it, too, that way. It's like clear which one's the tallest you got.\n",
+       "\n",
+       "00:33:43.415 --> 00:33:46.415\n",
+       "That's a really good thing. You're like, what's the 3.rd\n",
+       "\n",
+       "00:33:46.266 --> 00:33:48.266\n",
+       "Where is it? The worst? The 3.rd\n",
+       "\n",
+       "00:33:47.915 --> 00:33:53.915\n",
+       "So let's fix those things and be like, okay, these are total total Covid deaths.\n",
+       "\n",
+       "00:33:54.315 --> 00:33:57.315\n",
+       "And then we sorted it, and we're like, all right.\n",
+       "\n",
+       "00:33:57.015 --> 00:33:59.015\n",
+       "California is the worst.\n",
+       "\n",
+       "00:33:58.715 --> 00:34:00.715\n",
+       "Texas is the second worst.\n",
+       "\n",
+       "00:34:00.959 --> 00:34:02.959\n",
+       "Florida, New York. Who's.\n",
+       "\n",
+       "00:34:06.571 --> 00:35:53.571\n",
+       "Really.\n",
+       "\n",
+       "00:34:06.660 --> 00:34:08.660\n",
+       "Factual information. But why is this.\n",
+       "\n",
+       "00:34:08.315 --> 00:34:10.315\n",
+       "What's a problem with this.\n",
+       "\n",
+       "00:34:12.659 --> 00:34:14.659\n",
+       "There is no title.\n",
+       "\n",
+       "00:34:14.815 --> 00:34:16.815\n",
+       "That could be it.\n",
+       "\n",
+       "00:34:17.416 --> 00:34:18.416\n",
+       "There's something that's more misleading.\n",
+       "\n",
+       "00:34:18.259 --> 00:34:20.259\n",
+       "What is common about New York, California.\n",
+       "\n",
+       "00:34:19.859 --> 00:34:25.859\n",
+       "Could it? Could it do like with population like obviously California, Texas, because they have a higher population.\n",
+       "\n",
+       "00:34:25.161 --> 00:34:30.161\n",
+       "Yeah, it's gotta be like average per person or per 100,000. Maybe.\n",
+       "\n",
+       "00:34:30.660 --> 00:34:37.660\n",
+       "Yes, right? If I put a population be the basically the exact same thing. So like, yeah, you can be like, this is facts like.\n",
+       "\n",
+       "00:34:38.159 --> 00:34:46.159\n",
+       "California, like, yeah, no, Doug, California has the most. They just have the most people. Of course, they're gonna have the most desks like this and say anything. The technical.\n",
+       "\n",
+       "00:34:46.266 --> 00:34:48.266\n",
+       "It's too closely correlated with.\n",
+       "\n",
+       "00:34:49.559 --> 00:34:52.559\n",
+       "Alright, if I put a population map, it's gonna look almost exactly the same.\n",
+       "\n",
+       "00:34:52.916 --> 00:34:57.916\n",
+       "So what if we flip that? So then we divide it by per capita, or per 100 people.\n",
+       "\n",
+       "00:34:57.966 --> 00:34:58.966\n",
+       "Per State.\n",
+       "\n",
+       "00:34:59.015 --> 00:35:01.015\n",
+       "Draws a very new picture.\n",
+       "\n",
+       "00:35:00.816 --> 00:35:04.816\n",
+       "Right now we get totally different things. Mississippi.\n",
+       "\n",
+       "00:35:05.315 --> 00:35:06.315\n",
+       "Alabama.\n",
+       "\n",
+       "00:35:05.660 --> 00:35:10.660\n",
+       "Louisiana, New Jersey, like totally different results. And I'm like, Okay, like.\n",
+       "\n",
+       "00:35:11.515 --> 00:35:16.515\n",
+       "I know these States. I'm American. I've lived in the South like, I know. These are in the South.\n",
+       "\n",
+       "00:35:19.860 --> 00:35:22.860\n",
+       "But what what could we even do? That's better than this.\n",
+       "\n",
+       "00:35:25.166 --> 00:35:29.166\n",
+       "So this actually gives us a good indicator of like, okay, these are actually spots that are.\n",
+       "\n",
+       "00:35:29.517 --> 00:35:31.517\n",
+       "This. But, like, What's that? Step further? We could go.\n",
+       "\n",
+       "00:35:31.215 --> 00:35:33.215\n",
+       "There's 2 steps further.\n",
+       "\n",
+       "00:35:32.760 --> 00:35:33.760\n",
+       "David.\n",
+       "\n",
+       "00:35:33.815 --> 00:35:36.815\n",
+       "Maybe you could color code them by region.\n",
+       "\n",
+       "00:35:36.859 --> 00:35:38.859\n",
+       "Really, good idea, right?\n",
+       "\n",
+       "00:35:39.358 --> 00:35:43.358\n",
+       "We're like, okay, like, these are the wet. This is different data like these are the West.\n",
+       "\n",
+       "00:35:42.458 --> 00:35:44.458\n",
+       "This is the south.\n",
+       "\n",
+       "00:35:44.667 --> 00:35:48.667\n",
+       "Okay, cool, like, yeah. The west and the south is more hot.\n",
+       "\n",
+       "00:35:50.459 --> 00:35:52.459\n",
+       "What is an even better way to do it?\n",
+       "\n",
+       "00:35:53.559 --> 00:35:57.559\n",
+       "Like a map might look better like we did in the tableau.\n",
+       "\n",
+       "00:35:53.861 --> 00:43:46.861\n",
+       "Right.\n",
+       "\n",
+       "00:35:55.560 --> 00:35:56.560\n",
+       "Oh!\n",
+       "\n",
+       "00:35:56.816 --> 00:35:59.816\n",
+       "Yeah, where there's like, okay.\n",
+       "\n",
+       "00:36:00.059 --> 00:36:05.059\n",
+       "I get color like bars are like good, but like I get color and maps.\n",
+       "\n",
+       "00:36:06.215 --> 00:36:07.215\n",
+       "Like this.\n",
+       "\n",
+       "00:36:07.257 --> 00:36:08.257\n",
+       "And I'm like.\n",
+       "\n",
+       "00:36:07.917 --> 00:36:09.917\n",
+       "Not so bad over here.\n",
+       "\n",
+       "00:36:09.965 --> 00:36:11.965\n",
+       "Really bad over here.\n",
+       "\n",
+       "00:36:12.115 --> 00:36:14.115\n",
+       "Because like here, like.\n",
+       "\n",
+       "00:36:14.057 --> 00:36:21.057\n",
+       "Nevada and Arizona like Utah, like I don't know how close they are actually right like, I don't like.\n",
+       "\n",
+       "00:36:21.215 --> 00:36:24.215\n",
+       "I don't know which states these are closest to.\n",
+       "\n",
+       "00:36:24.360 --> 00:36:25.360\n",
+       "Like.\n",
+       "\n",
+       "00:36:25.116 --> 00:36:30.116\n",
+       "I don't know like, but here you can just see like an outbreak kind of you know.\n",
+       "\n",
+       "00:36:30.259 --> 00:36:33.259\n",
+       "Like like it. There's like a hotspot.\n",
+       "\n",
+       "00:36:35.316 --> 00:36:41.316\n",
+       "Always. The New York Times is like the best spot for any visualization. If you ever need to like.\n",
+       "\n",
+       "00:36:40.816 --> 00:36:53.816\n",
+       "Sometimes I'll have a visualization like in mind, or like an idea I want to do. And I'm like, not quite sure. And I just go there and like steal their designs. And I'm like, Oh, this is what I want. This is what I want it to look like and like I can fit my data to like.\n",
+       "\n",
+       "00:36:53.758 --> 00:36:56.758\n",
+       "This is what I'm trying to communicate, and just like Rip, it.\n",
+       "\n",
+       "00:36:57.116 --> 00:37:03.116\n",
+       "They do use. J. 3D. 3 js, which is a Javascript library. So it's kind of annoying for me to do it, but.\n",
+       "\n",
+       "00:37:02.857 --> 00:37:06.857\n",
+       "D. Through Js is Javascript. I hate Javascript, but.\n",
+       "\n",
+       "00:37:07.416 --> 00:37:08.416\n",
+       "Is really good.\n",
+       "\n",
+       "00:37:08.316 --> 00:37:13.316\n",
+       "Oh, the other thing they do here is like they have like, if you do, hover has, like a cases like.\n",
+       "\n",
+       "00:37:13.359 --> 00:37:15.359\n",
+       "They do it per 100,000 people.\n",
+       "\n",
+       "00:37:15.816 --> 00:37:22.816\n",
+       "They do it correctly. This I haven't. Read this book. Chris. Put this in here, the other instructor. 200 of nothing, he said. It's a really good book.\n",
+       "\n",
+       "00:37:22.959 --> 00:37:25.959\n",
+       "I I'm going to read it. I haven't read it yet.\n",
+       "\n",
+       "00:37:27.457 --> 00:37:33.457\n",
+       "Alright. So that's some good and bad. But like, I just wanna highlight some really bad ones. So like, I'm not gonna go too much on this.\n",
+       "\n",
+       "00:37:32.659 --> 00:37:35.659\n",
+       "So again, we're talking about bad ones. So like.\n",
+       "\n",
+       "00:37:35.966 --> 00:37:40.966\n",
+       "This is what we call cart they add, they make the things 3 dimensional.\n",
+       "\n",
+       "00:37:41.658 --> 00:37:45.658\n",
+       "But the 3rd dimension doesn't add any information.\n",
+       "\n",
+       "00:37:45.916 --> 00:37:49.916\n",
+       "The 3rd dimension here, for each one of these bars is just one.\n",
+       "\n",
+       "00:37:49.716 --> 00:37:54.716\n",
+       "Which means it's nothing. It would be the same exact amount of data if it was flat.\n",
+       "\n",
+       "00:37:56.616 --> 00:37:59.616\n",
+       "And this is like, I don't even know why they allow you to do 3D.\n",
+       "\n",
+       "00:37:58.658 --> 00:38:02.658\n",
+       "Charts in like excel without a Z access.\n",
+       "\n",
+       "00:38:02.866 --> 00:38:05.866\n",
+       "You can use 3D charts, but if you're gonna use it.\n",
+       "\n",
+       "00:38:05.860 --> 00:38:12.860\n",
+       "Use it. Don't just do it because it looks cooler, like if you want to use it. I wouldn't suggest it unless it's like, really.\n",
+       "\n",
+       "00:38:12.817 --> 00:38:13.817\n",
+       "Obvious.\n",
+       "\n",
+       "00:38:14.115 --> 00:38:18.115\n",
+       "But like make the z axis another feature.\n",
+       "\n",
+       "00:38:18.760 --> 00:38:20.760\n",
+       "Like time. Oh, not time! That would be a bad one, but like.\n",
+       "\n",
+       "00:38:20.515 --> 00:38:23.515\n",
+       "I don't know. Like depth like.\n",
+       "\n",
+       "00:38:24.358 --> 00:38:28.358\n",
+       "Something, another measurable feature that will change, make all the things differently.\n",
+       "\n",
+       "00:38:28.957 --> 00:38:30.957\n",
+       "This is just called cart junk.\n",
+       "\n",
+       "00:38:31.215 --> 00:38:36.215\n",
+       "Or bad data ink ratio. This is my favorite one.\n",
+       "\n",
+       "00:38:36.965 --> 00:38:39.965\n",
+       "I'll tell you one thing I really like about it.\n",
+       "\n",
+       "00:38:40.359 --> 00:38:45.359\n",
+       "Is that this call out? I think this is the only good thing about this visualization.\n",
+       "\n",
+       "00:38:45.558 --> 00:38:47.558\n",
+       "And his call outs like, Look here.\n",
+       "\n",
+       "00:38:47.121 --> 00:38:51.121\n",
+       "At 2,005, Florida enacted Stan, your grandma.\n",
+       "\n",
+       "00:38:50.858 --> 00:38:54.858\n",
+       "And I'm like, Oh, and then it's like, Oh, goes down. I'm like.\n",
+       "\n",
+       "00:38:55.058 --> 00:38:58.058\n",
+       "Wow! This is released by Reuters. Who can tell me what.\n",
+       "\n",
+       "00:38:57.815 --> 00:39:00.815\n",
+       "Why, why, I think this is a bad chart.\n",
+       "\n",
+       "00:39:01.858 --> 00:39:03.858\n",
+       "So y-axis is flipped.\n",
+       "\n",
+       "00:39:04.259 --> 00:39:07.259\n",
+       "Yes, dude, the axes are really important.\n",
+       "\n",
+       "00:39:07.420 --> 00:39:10.420\n",
+       "Do you guys see this? It goes from 0.\n",
+       "\n",
+       "00:39:10.458 --> 00:39:11.458\n",
+       "It goes down.\n",
+       "\n",
+       "00:39:12.660 --> 00:39:14.660\n",
+       "So actually, gun death went up.\n",
+       "\n",
+       "00:39:15.217 --> 00:39:16.217\n",
+       "When this happened.\n",
+       "\n",
+       "00:39:17.459 --> 00:39:20.459\n",
+       "It starts at 0, and it goes to a thousand.\n",
+       "\n",
+       "00:39:21.016 --> 00:39:22.016\n",
+       "So like.\n",
+       "\n",
+       "00:39:22.159 --> 00:39:23.159\n",
+       "We.\n",
+       "\n",
+       "00:39:23.815 --> 00:39:26.815\n",
+       "As humans read left to right, and we look up to down.\n",
+       "\n",
+       "00:39:25.959 --> 00:39:28.959\n",
+       "Like. That's just how we naturally do it.\n",
+       "\n",
+       "00:39:29.016 --> 00:39:30.016\n",
+       "And this.\n",
+       "\n",
+       "00:39:31.367 --> 00:39:34.367\n",
+       "I think it was like intentionally misleading, but maybe they're just bad.\n",
+       "\n",
+       "00:39:34.557 --> 00:39:37.557\n",
+       "But it also like looks very credible and like.\n",
+       "\n",
+       "00:39:37.916 --> 00:39:42.916\n",
+       "Release in Reuters, which is a credible so like they do a lot of good work.\n",
+       "\n",
+       "00:39:42.916 --> 00:39:44.916\n",
+       "But like.\n",
+       "\n",
+       "00:39:45.359 --> 00:39:51.359\n",
+       "People are misleading and like this is powerful. If you're just scrolling through Instagram and like you, just see this.\n",
+       "\n",
+       "00:39:51.960 --> 00:39:52.960\n",
+       "You might think that.\n",
+       "\n",
+       "00:39:53.116 --> 00:39:55.116\n",
+       "And unfortunately, that's the case.\n",
+       "\n",
+       "00:39:55.065 --> 00:39:56.065\n",
+       "You guys see that.\n",
+       "\n",
+       "00:39:58.357 --> 00:40:07.357\n",
+       "So Gundest actually went up. But what they did do really effectively is a call out, because, imagine, just down here, they just had text, like in 2,005.\n",
+       "\n",
+       "00:40:08.515 --> 00:40:10.515\n",
+       "Like they enacted this law, like.\n",
+       "\n",
+       "00:40:10.760 --> 00:40:14.760\n",
+       "I wouldn't. I might read the tap caption, but I wouldn't.\n",
+       "\n",
+       "00:40:15.316 --> 00:40:16.316\n",
+       "Like.\n",
+       "\n",
+       "00:40:16.915 --> 00:40:17.915\n",
+       "I wouldn't.\n",
+       "\n",
+       "00:40:18.459 --> 00:40:19.459\n",
+       "See the impact.\n",
+       "\n",
+       "00:40:20.316 --> 00:40:25.316\n",
+       "I wouldn't see the impact. And like here, you're forced to see it. So those are really important.\n",
+       "\n",
+       "00:40:24.959 --> 00:40:33.959\n",
+       "They are extremely hard to do programmatically what I highly recommend you doing. It is just like, take a screenshot of your graph and then go in. There.\n",
+       "\n",
+       "00:40:33.615 --> 00:40:37.615\n",
+       "With Photoshop or anything, and just draw it yourself.\n",
+       "\n",
+       "00:40:38.857 --> 00:40:41.857\n",
+       "Doing this programmatically is Madh doing it.\n",
+       "\n",
+       "00:40:41.315 --> 00:40:47.315\n",
+       "Not programmatically, and just getting it done is way easy, and we'll take your charts to a whole new level.\n",
+       "\n",
+       "00:40:49.566 --> 00:40:52.566\n",
+       "Live factor again. I'm not gonna get into this, but.\n",
+       "\n",
+       "00:40:52.659 --> 00:41:02.659\n",
+       "The effects shown in the Graphic is not the same as here. So this is actually a bad example. But let me actually go to last year's.\n",
+       "\n",
+       "00:41:03.217 --> 00:41:04.217\n",
+       "Spring.\n",
+       "\n",
+       "00:41:05.415 --> 00:41:07.415\n",
+       "Let me actually just look.\n",
+       "\n",
+       "00:41:07.057 --> 00:41:08.057\n",
+       "Quick!\n",
+       "\n",
+       "00:41:09.215 --> 00:41:12.215\n",
+       "Was it 2,000.\n",
+       "\n",
+       "00:41:12.267 --> 00:41:14.267\n",
+       "Re.\n",
+       "\n",
+       "00:41:13.915 --> 00:41:15.915\n",
+       "This is 24.\n",
+       "\n",
+       "00:41:17.267 --> 00:41:21.267\n",
+       "I have a better one. Georgia, as you remember, the data.\n",
+       "\n",
+       "00:41:21.366 --> 00:41:23.366\n",
+       "The the picture.\n",
+       "\n",
+       "00:41:23.358 --> 00:41:24.358\n",
+       "Is.\n",
+       "\n",
+       "00:41:28.315 --> 00:41:31.315\n",
+       "The one of, like the heights of the different.\n",
+       "\n",
+       "00:41:31.516 --> 00:41:34.516\n",
+       "Nationalities. You remember that one that's the one I'm looking for.\n",
+       "\n",
+       "00:41:43.215 --> 00:41:45.215\n",
+       "I should have put this in there, I would.\n",
+       "\n",
+       "00:41:44.558 --> 00:41:47.558\n",
+       "Let me let me check, if you have like.\n",
+       "\n",
+       "00:41:47.716 --> 00:41:49.716\n",
+       "I mean, I definitely have it.\n",
+       "\n",
+       "00:41:49.559 --> 00:41:51.559\n",
+       "I just\n",
+       "\n",
+       "00:41:56.817 --> 00:41:58.817\n",
+       "Sorry guys.\n",
+       "\n",
+       "00:41:59.415 --> 00:42:02.415\n",
+       "I was able to get to it in 2 seconds.\n",
+       "\n",
+       "00:42:05.715 --> 00:42:10.715\n",
+       "Summer, fall, 23, spring, 2024.\n",
+       "\n",
+       "00:42:21.916 --> 00:42:29.916\n",
+       "Okay, if you can find it, please. I'm gonna move forward just because I I honestly can't think of where to find it right now. But the concept of this is that.\n",
+       "\n",
+       "00:42:29.817 --> 00:42:30.817\n",
+       "Here.\n",
+       "\n",
+       "00:42:31.370 --> 00:42:32.370\n",
+       "The.\n",
+       "\n",
+       "00:42:33.158 --> 00:42:36.158\n",
+       "The effect is way bigger in the data.\n",
+       "\n",
+       "00:42:36.759 --> 00:42:37.759\n",
+       "Or.\n",
+       "\n",
+       "00:42:38.516 --> 00:42:42.516\n",
+       "Than it is being displayed. So it's like it's misleading. It's like making.\n",
+       "\n",
+       "00:42:42.059 --> 00:42:44.059\n",
+       "Lines that are not.\n",
+       "\n",
+       "00:42:44.215 --> 00:42:46.215\n",
+       "That are not properly.\n",
+       "\n",
+       "00:42:46.218 --> 00:42:47.218\n",
+       "Scaled.\n",
+       "\n",
+       "00:42:47.358 --> 00:42:49.358\n",
+       "Like it's a lie factor.\n",
+       "\n",
+       "00:42:49.057 --> 00:42:53.057\n",
+       "This is coined by Edward Tufte. Anyone know who that is.\n",
+       "\n",
+       "00:42:54.815 --> 00:42:55.815\n",
+       "Lives in New York as well.\n",
+       "\n",
+       "00:42:55.660 --> 00:43:09.660\n",
+       "The New York Times called them the Leaner da Vinci of data visualization. He's like an expert. He's got really nice coffee table books, and they're like really cool looking. I have all 4 of them. He gives talks in New York City I highly recommend, and they're for students.\n",
+       "\n",
+       "00:43:11.956 --> 00:43:14.956\n",
+       "I. It's a lot less money, and you also get the books.\n",
+       "\n",
+       "00:43:16.558 --> 00:43:19.558\n",
+       "Alright. So this is some examples of good data, visualization.\n",
+       "\n",
+       "00:43:19.058 --> 00:43:21.058\n",
+       "Who can?\n",
+       "\n",
+       "00:43:21.466 --> 00:43:24.466\n",
+       "Tell me why they think this is a good one. Just at 1st glance this is.\n",
+       "\n",
+       "00:43:24.015 --> 00:43:26.015\n",
+       "Mapping.\n",
+       "\n",
+       "00:43:25.866 --> 00:43:31.866\n",
+       "Where different types of relationships live so single, cohabiting versus married in London.\n",
+       "\n",
+       "00:43:32.015 --> 00:43:33.015\n",
+       "Douglas.\n",
+       "\n",
+       "00:43:34.616 --> 00:43:37.616\n",
+       "Well, those are all disjoint sets, so there shouldn't be any overlap.\n",
+       "\n",
+       "00:43:37.815 --> 00:43:38.815\n",
+       "Like\n",
+       "\n",
+       "00:43:39.160 --> 00:43:41.160\n",
+       "And it. It's gives you a lot of information.\n",
+       "\n",
+       "00:43:40.717 --> 00:43:41.717\n",
+       "2.\n",
+       "\n",
+       "00:43:42.857 --> 00:43:45.857\n",
+       "The married. The married state is like completely different from the single.\n",
+       "\n",
+       "00:43:45.959 --> 00:43:46.959\n",
+       "And you can.\n",
+       "\n",
+       "00:43:47.161 --> 00:43:48.161\n",
+       "Yeah.\n",
+       "\n",
+       "00:43:47.516 --> 00:43:51.516\n",
+       "So there's information again, and it's not like any overlapping or misleading.\n",
+       "\n",
+       "00:43:47.817 --> 00:43:53.817\n",
+       "Join.\n",
+       "\n",
+       "00:43:52.716 --> 00:43:53.716\n",
+       "Not overlapping, not.\n",
+       "\n",
+       "00:43:54.716 --> 00:51:50.716\n",
+       "Like.\n",
+       "\n",
+       "00:43:55.059 --> 00:43:56.059\n",
+       "And like.\n",
+       "\n",
+       "00:43:55.216 --> 00:44:01.216\n",
+       "It shows you that married people tend to live by the coast as opposed to single and cohabitation, which, more towards center.\n",
+       "\n",
+       "00:44:01.219 --> 00:44:02.219\n",
+       "Yeah.\n",
+       "\n",
+       "00:44:02.415 --> 00:44:04.415\n",
+       "Not necessarily the coast, but like.\n",
+       "\n",
+       "00:44:04.567 --> 00:44:07.567\n",
+       "Outside of the city cause. They're like, oh, these are the cities.\n",
+       "\n",
+       "00:44:08.257 --> 00:44:11.257\n",
+       "And like this like, if you just did like.\n",
+       "\n",
+       "00:44:12.359 --> 00:44:19.359\n",
+       "I'm sure they did it by population like. So here above, like well, the coloring is really good, because darker. We interpret as more.\n",
+       "\n",
+       "00:44:18.915 --> 00:44:21.915\n",
+       "If you flipped it, it would look opposite.\n",
+       "\n",
+       "00:44:23.315 --> 00:44:27.315\n",
+       "But they also just didn't do numbers, because more people live in the city regardless.\n",
+       "\n",
+       "00:44:27.258 --> 00:44:28.258\n",
+       "But.\n",
+       "\n",
+       "00:44:27.957 --> 00:44:31.957\n",
+       "They did averages like, Oh, this is well above average.\n",
+       "\n",
+       "00:44:31.658 --> 00:44:33.658\n",
+       "And this is well below average. So like.\n",
+       "\n",
+       "00:44:34.857 --> 00:44:36.857\n",
+       "And yeah, like, you're like in a snapshot.\n",
+       "\n",
+       "00:44:36.915 --> 00:44:38.915\n",
+       "You can see like.\n",
+       "\n",
+       "00:44:38.815 --> 00:44:42.815\n",
+       "Oh, married people move outside of the city, which, in a way like I get it. Douglas.\n",
+       "\n",
+       "00:44:44.716 --> 00:44:58.716\n",
+       "It's really cool, like the progression, too, because it almost kinda like it goes from single to cohabiting to married, which is like the natural progression of our relationship. Really, you know, you're single, and then you move in together. And then so it like, if there's a kind of a trend, the order makes sense.\n",
+       "\n",
+       "00:44:57.818 --> 00:44:59.818\n",
+       "Very good. Yeah.\n",
+       "\n",
+       "00:44:59.817 --> 00:45:00.817\n",
+       "Yeah.\n",
+       "\n",
+       "00:45:01.216 --> 00:45:06.216\n",
+       "So it's like, Okay, just live in the city. And then you're co- happening like they live in the city, too. But like.\n",
+       "\n",
+       "00:45:07.016 --> 00:45:10.016\n",
+       "Sorry to spread out and then married. They just book it. They're out.\n",
+       "\n",
+       "00:45:11.015 --> 00:45:13.015\n",
+       "Cool\n",
+       "\n",
+       "00:45:13.815 --> 00:45:16.815\n",
+       "Florence Nightingale. They were a\n",
+       "\n",
+       "00:45:15.916 --> 00:45:17.916\n",
+       "She was a.\n",
+       "\n",
+       "00:45:17.958 --> 00:45:19.958\n",
+       "A statistician during.\n",
+       "\n",
+       "00:45:20.115 --> 00:45:23.115\n",
+       "I believe the Civil War.\n",
+       "\n",
+       "00:45:23.815 --> 00:45:25.815\n",
+       "Or world of civil. We must have been civil war.\n",
+       "\n",
+       "00:45:26.359 --> 00:45:32.359\n",
+       "And through her data visualization, which is hard to read now, because she obviously did it by hand, because computers aren't around.\n",
+       "\n",
+       "00:45:32.116 --> 00:45:39.116\n",
+       "But with this data visualization, like, she saved thousands of people's lives and discovered that in war.\n",
+       "\n",
+       "00:45:39.256 --> 00:45:41.256\n",
+       "Most people don't die from like.\n",
+       "\n",
+       "00:45:42.360 --> 00:45:43.360\n",
+       "Bullets and stuff.\n",
+       "\n",
+       "00:45:43.060 --> 00:45:45.060\n",
+       "They die.\n",
+       "\n",
+       "00:45:45.515 --> 00:45:46.515\n",
+       "From infections.\n",
+       "\n",
+       "00:45:47.416 --> 00:45:53.416\n",
+       "Totally change the change the way we fight, unfortunately, but also saved a lot of people's lives. So the blue.\n",
+       "\n",
+       "00:45:53.216 --> 00:45:56.216\n",
+       "Is like. It's a timescale as a time wheel.\n",
+       "\n",
+       "00:45:57.416 --> 00:46:01.416\n",
+       "And the blue is like the desperation, and the black and the red.\n",
+       "\n",
+       "00:46:01.061 --> 00:46:04.061\n",
+       "Are like guess from battle.\n",
+       "\n",
+       "00:46:04.066 --> 00:46:05.066\n",
+       "Battle Ones.\n",
+       "\n",
+       "00:46:05.658 --> 00:46:06.658\n",
+       "So it's like.\n",
+       "\n",
+       "00:46:06.817 --> 00:46:07.817\n",
+       "Oh!\n",
+       "\n",
+       "00:46:07.815 --> 00:46:08.815\n",
+       "This is wild.\n",
+       "\n",
+       "00:46:10.016 --> 00:46:12.016\n",
+       "So that's pretty cool.\n",
+       "\n",
+       "00:46:12.159 --> 00:46:14.159\n",
+       "Another thing is that they're like.\n",
+       "\n",
+       "00:46:13.916 --> 00:46:16.916\n",
+       "They can be fun and easily comprehensible.\n",
+       "\n",
+       "00:46:17.116 --> 00:46:22.116\n",
+       "Specifically what I like about this chart, I mean not only I think the drawings are cute and fun.\n",
+       "\n",
+       "00:46:22.415 --> 00:46:25.415\n",
+       "But like, instead of having a dot and saying human.\n",
+       "\n",
+       "00:46:26.359 --> 00:46:27.359\n",
+       "It's a picture of a human.\n",
+       "\n",
+       "00:46:27.416 --> 00:46:34.416\n",
+       "Or, instead of what's even worse, is like, imagine it was a dot with a color, and then the side. It was like the color, and then it said, human, next to it.\n",
+       "\n",
+       "00:46:34.457 --> 00:46:35.457\n",
+       "I would never read that.\n",
+       "\n",
+       "00:46:35.515 --> 00:46:37.515\n",
+       "But I just look at this.\n",
+       "\n",
+       "00:46:37.066 --> 00:46:39.066\n",
+       "And I'm like cool.\n",
+       "\n",
+       "00:46:41.160 --> 00:46:45.160\n",
+       "It shows a trend, but it also, it's like, okay, like this.\n",
+       "\n",
+       "00:46:45.166 --> 00:46:49.166\n",
+       "Like if they're above this line, like the or smarter, if they're below this line.\n",
+       "\n",
+       "00:46:48.919 --> 00:46:52.919\n",
+       "Like a goldfish, is dumb like a fish is pretty dumb, you know.\n",
+       "\n",
+       "00:46:53.159 --> 00:46:56.159\n",
+       "Something cool. Who can tell me about the scale of this.\n",
+       "\n",
+       "00:46:57.815 --> 00:46:59.815\n",
+       "Why, it looks weird. Kind of.\n",
+       "\n",
+       "00:47:05.159 --> 00:47:10.159\n",
+       "It doesn't really increase increments like it starts\n",
+       "\n",
+       "00:47:10.515 --> 00:47:12.515\n",
+       "This goes by point O. 5, then.\n",
+       "\n",
+       "00:47:12.560 --> 00:47:17.560\n",
+       "What does increasing but like it shows the gap on how much the distance so.\n",
+       "\n",
+       "00:47:17.966 --> 00:47:19.966\n",
+       "I'm listening. It's terrible my apologies, but.\n",
+       "\n",
+       "00:47:20.359 --> 00:47:22.359\n",
+       "If it if it's like the.\n",
+       "\n",
+       "00:47:20.859 --> 00:47:23.859\n",
+       "Not linear. It's logarithm.\n",
+       "\n",
+       "00:47:22.116 --> 00:47:25.116\n",
+       "Yeah, exactly. Thank you. That's what I was.\n",
+       "\n",
+       "00:47:24.115 --> 00:47:26.115\n",
+       "This is a log scale.\n",
+       "\n",
+       "00:47:26.816 --> 00:47:31.816\n",
+       "And if we tried to plot this on a linear scale, it would be like goldfish are like.\n",
+       "\n",
+       "00:47:31.816 --> 00:47:33.816\n",
+       "This big and whales are.\n",
+       "\n",
+       "00:47:35.116 --> 00:47:38.116\n",
+       "Thousands and thousands and thousands of pounds and goldfish is like.\n",
+       "\n",
+       "00:47:39.315 --> 00:47:42.315\n",
+       "Very small, so you wouldn't be able to see anything.\n",
+       "\n",
+       "00:47:42.716 --> 00:47:48.716\n",
+       "But it's a nonlinear relationship. And luckily it's a logistic relationship of the size and like it, plots very nice.\n",
+       "\n",
+       "00:47:49.357 --> 00:47:51.357\n",
+       "And it's not misleading. It's just easier to read.\n",
+       "\n",
+       "00:47:52.860 --> 00:47:57.860\n",
+       "It's also good because it's like brain mass and body mast. You're like, okay, so it's like.\n",
+       "\n",
+       "00:47:58.060 --> 00:48:03.060\n",
+       "The size of the brain compared to the size of the body. If you just did brain size.\n",
+       "\n",
+       "00:48:03.117 --> 00:48:04.117\n",
+       "Like.\n",
+       "\n",
+       "00:48:04.759 --> 00:48:06.759\n",
+       "Humans would be like.\n",
+       "\n",
+       "00:48:07.258 --> 00:48:14.258\n",
+       "They wouldn't show that humans are the smartest. Who else can see why I really like this chart? There's like 2 thing. They're like Easter eggs.\n",
+       "\n",
+       "00:48:16.415 --> 00:48:17.415\n",
+       "Focusing on the fun part.\n",
+       "\n",
+       "00:48:18.517 --> 00:48:19.517\n",
+       "Douglas.\n",
+       "\n",
+       "00:48:20.759 --> 00:48:21.759\n",
+       "So the one thing that.\n",
+       "\n",
+       "00:48:21.716 --> 00:48:28.716\n",
+       "How do you read the mouse? Do you read it from the tail to like the body, or just the body like? No, no, to the right more. Not that one.\n",
+       "\n",
+       "00:48:28.415 --> 00:48:30.415\n",
+       "Yeah, I see that. Yeah, I don't really care.\n",
+       "\n",
+       "00:48:30.360 --> 00:48:36.360\n",
+       "Like, yeah, it's not trying to be a hundred 1,000%. It's just like.\n",
+       "\n",
+       "00:48:36.415 --> 00:48:38.415\n",
+       "Okay, this is the general zone.\n",
+       "\n",
+       "00:48:37.720 --> 00:48:39.720\n",
+       "No, nor.\n",
+       "\n",
+       "00:48:41.715 --> 00:48:44.715\n",
+       "There's a centaur, and the elephant has a crown.\n",
+       "\n",
+       "00:48:44.217 --> 00:48:45.217\n",
+       "Yeah.\n",
+       "\n",
+       "00:48:45.060 --> 00:48:47.060\n",
+       "Like cool.\n",
+       "\n",
+       "00:48:48.315 --> 00:48:49.315\n",
+       "They're just having fun.\n",
+       "\n",
+       "00:48:50.115 --> 00:48:53.115\n",
+       "Sabia is that if I mispronounce your name, please.\n",
+       "\n",
+       "00:48:54.116 --> 00:49:00.116\n",
+       "So, yeah, so like I saw that it's labeled on both sides like for each of the axis.\n",
+       "\n",
+       "00:49:01.015 --> 00:49:04.015\n",
+       "Oh, on the top, I didn't even notice that. Yeah.\n",
+       "\n",
+       "00:49:04.715 --> 00:49:07.715\n",
+       "You're right. I didn't notice that either. That's really good. Usually they're just labeled.\n",
+       "\n",
+       "00:49:06.916 --> 00:49:10.916\n",
+       "The side and the side, but this one has them all labeled, which is really cool.\n",
+       "\n",
+       "00:49:11.158 --> 00:49:13.158\n",
+       "Cool\n",
+       "\n",
+       "00:49:14.466 --> 00:49:19.466\n",
+       "That's so. That's fine, easily comprehensible. These are just some general guidelines like part pie charts. Get a lot of shit.\n",
+       "\n",
+       "00:49:19.856 --> 00:49:24.856\n",
+       "I like them a lot. I I personally think they're very. If they're done right, they're really helpful.\n",
+       "\n",
+       "00:49:24.915 --> 00:49:25.915\n",
+       "This is not right.\n",
+       "\n",
+       "00:49:26.015 --> 00:49:28.015\n",
+       "Too many categories.\n",
+       "\n",
+       "00:49:28.615 --> 00:49:29.615\n",
+       "If you got more than 6.\n",
+       "\n",
+       "00:49:28.715 --> 00:49:30.715\n",
+       "Don't use a pie chart.\n",
+       "\n",
+       "00:49:33.466 --> 00:49:35.466\n",
+       "Cause. It's just impossible to read.\n",
+       "\n",
+       "00:49:36.316 --> 00:49:40.316\n",
+       "The other thing is you have to do percentages of a whole. It has to be in percentages.\n",
+       "\n",
+       "00:49:40.416 --> 00:49:41.416\n",
+       "So like.\n",
+       "\n",
+       "00:49:41.116 --> 00:49:45.116\n",
+       "They have to add to a hundred percent cause. That's how our minds.\n",
+       "\n",
+       "00:49:46.166 --> 00:49:47.166\n",
+       "So it's like.\n",
+       "\n",
+       "00:49:47.359 --> 00:49:48.359\n",
+       "You're only including.\n",
+       "\n",
+       "00:49:49.015 --> 00:49:52.015\n",
+       "50% of the data. And you show a whole pie set.\n",
+       "\n",
+       "00:49:52.315 --> 00:49:56.315\n",
+       "It's, doesn't it? We interpret it as a hundred percent.\n",
+       "\n",
+       "00:49:56.016 --> 00:49:59.016\n",
+       "That's how we do it, and it will visualize as a hundred percent.\n",
+       "\n",
+       "00:50:01.756 --> 00:50:03.756\n",
+       "Line charts are really powerful.\n",
+       "\n",
+       "00:50:04.367 --> 00:50:06.367\n",
+       "For time, data.\n",
+       "\n",
+       "00:50:06.058 --> 00:50:09.058\n",
+       "We will naturally.\n",
+       "\n",
+       "00:50:08.660 --> 00:50:09.660\n",
+       "Think.\n",
+       "\n",
+       "00:50:10.516 --> 00:50:15.516\n",
+       "This is before, and this we read, left or right. This is earlier in time. This is later in time.\n",
+       "\n",
+       "00:50:15.615 --> 00:50:17.615\n",
+       "I have seen thousands of charts.\n",
+       "\n",
+       "00:50:16.758 --> 00:50:18.758\n",
+       "That are line charts.\n",
+       "\n",
+       "00:50:19.260 --> 00:50:20.260\n",
+       "And the.\n",
+       "\n",
+       "00:50:20.217 --> 00:50:26.217\n",
+       "Bottom access is not time, and it's just some category, and they connect it. And you're like these are not.\n",
+       "\n",
+       "00:50:26.816 --> 00:50:28.816\n",
+       "In sequence. They're UN sequenced.\n",
+       "\n",
+       "00:50:28.616 --> 00:50:32.616\n",
+       "And they. You're presenting them a sequence, and we interpret a sequence. But they're not.\n",
+       "\n",
+       "00:50:32.916 --> 00:50:37.916\n",
+       "Again. This is New York time, and they show trends like here.\n",
+       "\n",
+       "00:50:38.015 --> 00:50:40.015\n",
+       "Very clear thing, like.\n",
+       "\n",
+       "00:50:40.116 --> 00:50:41.116\n",
+       "Oh!\n",
+       "\n",
+       "00:50:41.215 --> 00:50:43.215\n",
+       "Right at this time like this was.\n",
+       "\n",
+       "00:50:42.859 --> 00:50:44.859\n",
+       "I was, I.\n",
+       "\n",
+       "00:50:44.916 --> 00:50:45.916\n",
+       "A peak.\n",
+       "\n",
+       "00:50:46.515 --> 00:50:47.515\n",
+       "And then it went down.\n",
+       "\n",
+       "00:50:47.058 --> 00:50:48.058\n",
+       "Like.\n",
+       "\n",
+       "00:50:48.315 --> 00:50:51.315\n",
+       "Oh, this is the 7 day average like call out.\n",
+       "\n",
+       "00:50:53.615 --> 00:50:56.615\n",
+       "Scatter plots.\n",
+       "\n",
+       "00:50:57.460 --> 00:51:03.460\n",
+       "So the one on the top left looks cool, right like kind of looks impressive. I'm like, Oh, there's all this data and colors and things.\n",
+       "\n",
+       "00:51:03.615 --> 00:51:06.615\n",
+       "So like a lot of people do this sort of stuff.\n",
+       "\n",
+       "00:51:06.757 --> 00:51:08.757\n",
+       "But like it's me.\n",
+       "\n",
+       "00:51:08.960 --> 00:51:11.960\n",
+       "There's no relationship. You're not showing.\n",
+       "\n",
+       "00:51:12.215 --> 00:51:16.215\n",
+       "Anything there's like no like. Look at the one below.\n",
+       "\n",
+       "00:51:16.315 --> 00:51:17.315\n",
+       "The Iris Data Set.\n",
+       "\n",
+       "00:51:18.615 --> 00:51:21.615\n",
+       "You can clearly see pedal length of pedal width.\n",
+       "\n",
+       "00:51:22.358 --> 00:51:26.358\n",
+       "Okay, the bigger the pedal with the bigger the pedal length I'm like, that's fine, like.\n",
+       "\n",
+       "00:51:26.015 --> 00:51:29.015\n",
+       "Kind of dub. But okay, that's information.\n",
+       "\n",
+       "00:51:29.215 --> 00:51:32.215\n",
+       "But then also you color them by their different.\n",
+       "\n",
+       "00:51:32.459 --> 00:51:35.459\n",
+       "Species of specific types. And you're like, okay, like.\n",
+       "\n",
+       "00:51:35.657 --> 00:51:39.657\n",
+       "The blues are all together, and the greens are all together, and the Reds are all together.\n",
+       "\n",
+       "00:51:40.216 --> 00:51:41.216\n",
+       "Look up here!\n",
+       "\n",
+       "00:51:42.359 --> 00:51:43.359\n",
+       "Nothing's clustered.\n",
+       "\n",
+       "00:51:43.515 --> 00:51:46.515\n",
+       "There is no linear relationship. There's nothing.\n",
+       "\n",
+       "00:51:47.115 --> 00:51:48.115\n",
+       "Yes, it looks.\n",
+       "\n",
+       "00:51:49.816 --> 00:51:51.816\n",
+       "Complex.\n",
+       "\n",
+       "00:51:51.657 --> 00:52:46.657\n",
+       "Like.\n",
+       "\n",
+       "00:51:51.915 --> 00:51:53.915\n",
+       "But like and like.\n",
+       "\n",
+       "00:51:54.216 --> 00:51:58.216\n",
+       "Kind of impressive, and it looks complicated. That's what it looks complicated. But like.\n",
+       "\n",
+       "00:51:57.815 --> 00:51:59.815\n",
+       "If you can't talk about it.\n",
+       "\n",
+       "00:51:59.859 --> 00:52:01.859\n",
+       "You don't do it.\n",
+       "\n",
+       "00:52:02.116 --> 00:52:05.116\n",
+       "So the dues like use it to.\n",
+       "\n",
+       "00:52:05.360 --> 00:52:07.360\n",
+       "Telemessage, story, or insight, like.\n",
+       "\n",
+       "00:52:07.315 --> 00:52:09.315\n",
+       "Bottom line. If you can't.\n",
+       "\n",
+       "00:52:09.566 --> 00:52:13.566\n",
+       "Talk about your visualization. You're just putting it up there like. Don't show it.\n",
+       "\n",
+       "00:52:14.715 --> 00:52:16.715\n",
+       "Like, that's it.\n",
+       "\n",
+       "00:52:17.656 --> 00:52:22.656\n",
+       "Caveat when you're doing dashboards sometimes. Not being able to talk about is a good thing.\n",
+       "\n",
+       "00:52:22.858 --> 00:52:26.858\n",
+       "Right? You're just like monitoring average performance level.\n",
+       "\n",
+       "00:52:27.715 --> 00:52:29.715\n",
+       "And then you're waiting for an outlier.\n",
+       "\n",
+       "00:52:29.859 --> 00:52:33.859\n",
+       "So you're like, Okay, like, nothing is wrong. And you want to show that's okay.\n",
+       "\n",
+       "00:52:33.667 --> 00:52:37.667\n",
+       "Well, I guess you could still talk about it like this is just everything's normal.\n",
+       "\n",
+       "00:52:38.666 --> 00:52:39.666\n",
+       "Everything is normal. Right now.\n",
+       "\n",
+       "00:52:40.456 --> 00:52:41.456\n",
+       "The don'ts like.\n",
+       "\n",
+       "00:52:41.616 --> 00:52:45.616\n",
+       "Don't purposely or in purposely, meet, mislead people.\n",
+       "\n",
+       "00:52:46.456 --> 00:52:49.456\n",
+       "For example, the extreme example is the.\n",
+       "\n",
+       "00:52:47.712 --> 01:08:08.712\n",
+       "On average.\n",
+       "\n",
+       "00:52:49.916 --> 00:52:50.916\n",
+       "This.\n",
+       "\n",
+       "00:52:51.515 --> 00:52:52.515\n",
+       "Flipping the axes.\n",
+       "\n",
+       "00:52:53.157 --> 00:52:55.157\n",
+       "But another example is just.\n",
+       "\n",
+       "00:52:56.815 --> 00:52:57.815\n",
+       "You know this chart.\n",
+       "\n",
+       "00:52:57.016 --> 00:53:00.016\n",
+       "Where you're like. Oh, this is where Covid's the worst.\n",
+       "\n",
+       "00:52:59.757 --> 00:53:03.757\n",
+       "Right. They're the most deaths in California, Texas, and New York. And you're like.\n",
+       "\n",
+       "00:53:04.116 --> 00:53:09.116\n",
+       "Well, yeah, there are. But they're also the most populated states. So it's like not really saying anything about where it's.\n",
+       "\n",
+       "00:53:09.716 --> 00:53:10.716\n",
+       "Impacting.\n",
+       "\n",
+       "00:53:12.016 --> 00:53:16.016\n",
+       "So let's get into business intelligence. So again, we're gonna.\n",
+       "\n",
+       "00:53:15.715 --> 00:53:18.715\n",
+       "Do this for a job which is business.\n",
+       "\n",
+       "00:53:19.717 --> 00:53:26.717\n",
+       "And Chris put this in there. It's pretty funny. It's like business. Intelligence is an oxymoronic. An oxymoron is a figure of speech where 2 things.\n",
+       "\n",
+       "00:53:27.458 --> 00:53:30.458\n",
+       "Don't make sense like combined. Don't make sense. His joke is like.\n",
+       "\n",
+       "00:53:30.015 --> 00:53:32.015\n",
+       "Businesses are stupid.\n",
+       "\n",
+       "00:53:32.255 --> 00:53:34.255\n",
+       "Which I will also kind of vouch.\n",
+       "\n",
+       "00:53:35.355 --> 00:53:37.355\n",
+       "But they also pay money, so.\n",
+       "\n",
+       "00:53:39.656 --> 00:53:42.656\n",
+       "Really business intelligence is like the collection of information.\n",
+       "\n",
+       "00:53:43.058 --> 00:53:48.058\n",
+       "For military or political value. This was made in 1,900. So now you could say for economic value.\n",
+       "\n",
+       "00:53:47.716 --> 00:53:50.716\n",
+       "A collection of information about a business to make money.\n",
+       "\n",
+       "00:53:52.015 --> 00:53:54.015\n",
+       "There are a lot of different tools you can use.\n",
+       "\n",
+       "00:53:54.616 --> 00:53:55.616\n",
+       "I.\n",
+       "\n",
+       "00:53:56.216 --> 00:53:59.216\n",
+       "Don't like tableau, but it's like the biggest one.\n",
+       "\n",
+       "00:53:59.356 --> 00:54:01.356\n",
+       "I like Looker, which is Google's.\n",
+       "\n",
+       "00:54:02.017 --> 00:54:03.017\n",
+       "But.\n",
+       "\n",
+       "00:54:03.256 --> 00:54:06.256\n",
+       "We're teaching you tableau because it is the biggest.\n",
+       "\n",
+       "00:54:07.516 --> 00:54:11.516\n",
+       "Most recognized one. Therefore you can put it on your resume, and it's a good resume keyword.\n",
+       "\n",
+       "00:54:12.615 --> 00:54:13.615\n",
+       "Remember what's our hidden.\n",
+       "\n",
+       "00:54:13.716 --> 00:54:21.716\n",
+       "Get you all jobs like this is a good thing to have on your thing, and most companies do use it. Looker studios like newer.\n",
+       "\n",
+       "00:54:22.256 --> 00:54:23.256\n",
+       "But\n",
+       "\n",
+       "00:54:23.216 --> 00:54:29.216\n",
+       "And the reason I don't like tableau is because, like I use looker data. And I got used to that went to tableau. And I'm like.\n",
+       "\n",
+       "00:54:29.315 --> 00:54:31.315\n",
+       "This is stupid, like.\n",
+       "\n",
+       "00:54:30.915 --> 00:54:33.915\n",
+       "Better you do it, you know I'm just a hater.\n",
+       "\n",
+       "00:54:33.960 --> 00:54:39.960\n",
+       "There are a bunch of other ones, the other really big ones, power Bi, we were originally gonna show you that.\n",
+       "\n",
+       "00:54:40.616 --> 00:54:44.616\n",
+       "It's a little bit more complicated, but you have to have a credit card. It's not free.\n",
+       "\n",
+       "00:54:43.715 --> 00:54:45.715\n",
+       "Tableau is free, so.\n",
+       "\n",
+       "00:54:48.416 --> 00:54:56.416\n",
+       "Thing important thing about tableau, which you guys should have seen is like you 1st create widgets, which is just a chart, and then you combine widgets to make a dashboard.\n",
+       "\n",
+       "00:54:56.260 --> 00:55:02.260\n",
+       "And I do like about that, because then each thing is each like sheet or each page that you're working on is just one.\n",
+       "\n",
+       "00:55:02.016 --> 00:55:06.016\n",
+       "And then once you master, that you can go to the next one, the next one.\n",
+       "\n",
+       "00:55:06.157 --> 00:55:09.157\n",
+       "And then you put them, combine them together for a dashboard.\n",
+       "\n",
+       "00:55:11.659 --> 00:55:16.659\n",
+       "So what we're gonna do is we're gonna take a break. And then we're gonna code. Some stuff.\n",
+       "\n",
+       "00:55:17.916 --> 00:55:20.916\n",
+       "In python, and then we're gonna do the same thing in tableau.\n",
+       "\n",
+       "00:55:20.756 --> 00:55:25.756\n",
+       "And we're gonna chill. Alright, you guys cool with that. Let's take a 7Ā min. Break real quick.\n",
+       "\n",
+       "00:55:32.717 --> 00:55:33.717\n",
+       "Alright! I'll see you all in 7.\n",
+       "\n",
+       "01:01:08.607 --> 01:01:10.607\n",
+       "Right here. How cool is this.\n",
+       "\n",
+       "01:01:11.365 --> 01:01:13.365\n",
+       "It's like, how do I like.\n",
+       "\n",
+       "01:01:13.266 --> 01:01:17.266\n",
+       "Scroll around. Look, I like this is where I'm living.\n",
+       "\n",
+       "01:01:17.168 --> 01:01:18.168\n",
+       "Which I.\n",
+       "\n",
+       "01:01:18.466 --> 01:01:26.466\n",
+       "Whatever you guys 100 miles away from you guys. But look how cool like I got this little beach here like zoom out in this like little harbor.\n",
+       "\n",
+       "01:01:25.465 --> 01:01:27.465\n",
+       "Super quaint.\n",
+       "\n",
+       "01:01:29.609 --> 01:01:33.609\n",
+       "And last week last week my buddy and I traded houses. His.\n",
+       "\n",
+       "01:01:34.166 --> 01:01:36.166\n",
+       "He just had a baby, and his parents, wife.\n",
+       "\n",
+       "01:01:36.466 --> 01:01:39.466\n",
+       "Is wife's parents came, and I live a mile from him.\n",
+       "\n",
+       "01:01:40.565 --> 01:01:41.565\n",
+       "And he was like, Hey, like.\n",
+       "\n",
+       "01:01:42.710 --> 01:01:43.710\n",
+       "He was like.\n",
+       "\n",
+       "01:01:44.666 --> 01:01:48.666\n",
+       "He's like, Oh, do you want us like? Can I? Can they stay there like kind of trade houses.\n",
+       "\n",
+       "01:01:48.965 --> 01:01:52.965\n",
+       "For a week, and I was like sure he has a houseboat like this one.\n",
+       "\n",
+       "01:01:53.407 --> 01:01:58.407\n",
+       "And I was like sleeping on this boat in Montauk. It was like it's super. It was super fun.\n",
+       "\n",
+       "01:01:59.165 --> 01:02:02.165\n",
+       "I digress. We got 30 seconds left.\n",
+       "\n",
+       "01:02:03.510 --> 01:02:07.510\n",
+       "I sent you. The visualization slides for my code and slack.\n",
+       "\n",
+       "01:02:07.668 --> 01:02:08.668\n",
+       "Host.\n",
+       "\n",
+       "01:02:08.909 --> 01:02:11.909\n",
+       "I don't know if you can find something in there.\n",
+       "\n",
+       "01:02:11.865 --> 01:02:14.865\n",
+       "I send you a direct message.\n",
+       "\n",
+       "01:02:19.009 --> 01:02:21.009\n",
+       "Yep, this is it.\n",
+       "\n",
+       "01:02:21.766 --> 01:02:24.766\n",
+       "Is it a link, or the actual slide deck?\n",
+       "\n",
+       "01:02:26.711 --> 01:02:27.711\n",
+       "I send you this slide thing.\n",
+       "\n",
+       "01:02:26.865 --> 01:02:28.865\n",
+       "The actual slides.\n",
+       "\n",
+       "01:02:29.368 --> 01:02:30.368\n",
+       "Alex.\n",
+       "\n",
+       "01:02:29.511 --> 01:02:30.511\n",
+       "The air.\n",
+       "\n",
+       "01:02:30.113 --> 01:02:31.113\n",
+       "Yeah.\n",
+       "\n",
+       "01:02:32.165 --> 01:02:36.165\n",
+       "There's a lot I this slide deck they had to like really cut it down.\n",
+       "\n",
+       "01:02:37.869 --> 01:02:38.869\n",
+       "Where was it?\n",
+       "\n",
+       "01:02:39.917 --> 01:02:44.917\n",
+       "It was. Oh, it's not in here. This is old. This is older one, I guess.\n",
+       "\n",
+       "01:02:50.666 --> 01:02:57.666\n",
+       "Yeah, but I didn't do it. I did it in the spring in 2023. So I don't know. I just can't find it. So it is what it is. Okay.\n",
+       "\n",
+       "01:02:55.409 --> 01:03:00.409\n",
+       "Yeah, this 2022. So you are. If you did it, then it has to be 23.\n",
+       "\n",
+       "01:02:58.865 --> 01:03:01.865\n",
+       "Alright. Everyone back cameras on. Let's go.\n",
+       "\n",
+       "01:03:05.216 --> 01:03:12.216\n",
+       "So here we're gonna be going into the python lecture doing data visualizations in Python.\n",
+       "\n",
+       "01:03:13.109 --> 01:03:16.109\n",
+       "Python is great for data visualization.\n",
+       "\n",
+       "01:03:16.115 --> 01:03:24.115\n",
+       "The only thing is they're not really that great for interactive data visualization. But it's good for data visualization exploration. What I usually do is like.\n",
+       "\n",
+       "01:03:24.510 --> 01:03:27.510\n",
+       "Sketch out my ideas here and make sure there are patterns, and then.\n",
+       "\n",
+       "01:03:26.609 --> 01:03:29.609\n",
+       "If there are, I'll put it into a dashboard.\n",
+       "\n",
+       "01:03:30.110 --> 01:03:31.110\n",
+       "Personally.\n",
+       "\n",
+       "01:03:31.065 --> 01:03:36.065\n",
+       "So if I open up my, oh, yeah, one thing to remember. If you want this like.\n",
+       "\n",
+       "01:03:35.865 --> 01:03:38.865\n",
+       "Which you have to do. So do this, I guess. Now.\n",
+       "\n",
+       "01:03:38.808 --> 01:03:39.808\n",
+       "Is like.\n",
+       "\n",
+       "01:03:40.410 --> 01:03:47.410\n",
+       "You had to pull this new information into your thing, so you do it by going to your directory. Get fetch upstream.\n",
+       "\n",
+       "01:03:47.608 --> 01:03:49.608\n",
+       "Get merge upstream, aiming, get push.\n",
+       "\n",
+       "01:03:49.910 --> 01:03:56.910\n",
+       "Do those 3 commands. You should get the new information into your thing. But I'm gonna go into my editor.\n",
+       "\n",
+       "01:03:57.067 --> 01:03:58.067\n",
+       "Here.\n",
+       "\n",
+       "01:03:57.715 --> 01:03:59.715\n",
+       "Does it get push, or get, pull.\n",
+       "\n",
+       "01:04:00.366 --> 01:04:07.366\n",
+       "No. So get Poll. Only if you just do get pull, it works for like 10% of people, or like.\n",
+       "\n",
+       "01:04:07.509 --> 01:04:08.509\n",
+       "Cause. I that's I think that's what I did.\n",
+       "\n",
+       "01:04:07.611 --> 01:04:08.611\n",
+       "N.\n",
+       "\n",
+       "01:04:09.710 --> 01:04:12.710\n",
+       "But no, you gotta fetch it, and you gotta merge it, and then you gotta push it.\n",
+       "\n",
+       "01:04:12.112 --> 01:04:13.112\n",
+       "Okay. Okay.\n",
+       "\n",
+       "01:04:14.565 --> 01:04:17.565\n",
+       "But you're you're close to your almost right\n",
+       "\n",
+       "01:04:18.165 --> 01:04:20.165\n",
+       "Data, science. Tuesday.\n",
+       "\n",
+       "01:04:23.065 --> 01:04:26.065\n",
+       "I'm just gonna make another cut.\n",
+       "\n",
+       "01:04:28.066 --> 01:04:31.066\n",
+       "I'm just gonna because I just don't want to update it and mess it up.\n",
+       "\n",
+       "01:04:30.766 --> 01:04:34.766\n",
+       "So what we gotta do 1st is we're gonna be making some map.\n",
+       "\n",
+       "01:04:34.865 --> 01:04:38.865\n",
+       "Or like using dimensions and\n",
+       "\n",
+       "01:04:39.365 --> 01:04:43.365\n",
+       "You need to install new libraries. So in\n",
+       "\n",
+       "01:04:44.409 --> 01:04:48.409\n",
+       "In Python are specifically in Python, who's here familiar with Pip.\n",
+       "\n",
+       "01:04:49.967 --> 01:04:50.967\n",
+       "Pip is like.\n",
+       "\n",
+       "01:04:51.116 --> 01:04:54.116\n",
+       "The number one way to install new libraries on Python.\n",
+       "\n",
+       "01:04:53.811 --> 01:04:54.811\n",
+       "Right.\n",
+       "\n",
+       "01:04:56.465 --> 01:04:58.465\n",
+       "And these don't come built in with anaconda.\n",
+       "\n",
+       "01:04:57.709 --> 01:05:00.709\n",
+       "But geopandas was built.\n",
+       "\n",
+       "01:05:01.266 --> 01:05:02.266\n",
+       "Literally, just.\n",
+       "\n",
+       "01:05:02.465 --> 01:05:04.465\n",
+       "Pandas. But for geography.\n",
+       "\n",
+       "01:05:04.265 --> 01:05:08.265\n",
+       "Great, and then geoplot. So in\n",
+       "\n",
+       "01:05:10.165 --> 01:05:14.165\n",
+       "In Jupiter. Notebooks on Mac. If you do! Exclamation.\n",
+       "\n",
+       "01:05:15.210 --> 01:05:20.210\n",
+       "It me in the beginning of a cell. It just means like, Oh, this is the same language.\n",
+       "\n",
+       "01:05:19.765 --> 01:05:21.765\n",
+       "As terminal.\n",
+       "\n",
+       "01:05:21.865 --> 01:05:23.865\n",
+       "Right. So if I view\n",
+       "\n",
+       "01:05:24.065 --> 01:05:26.065\n",
+       "Terminal. And I just do like.\n",
+       "\n",
+       "01:05:25.711 --> 01:05:28.711\n",
+       "Ls, it's the same exact thing.\n",
+       "\n",
+       "01:05:29.112 --> 01:05:33.112\n",
+       "It just says talk terminal. So if you do that, and you just do pip, install pipel.\n",
+       "\n",
+       "01:05:32.916 --> 01:05:35.916\n",
+       "It'll install it. Let me clear all outputs.\n",
+       "\n",
+       "01:05:35.866 --> 01:05:36.866\n",
+       "However.\n",
+       "\n",
+       "01:05:37.166 --> 01:05:39.166\n",
+       "If you don't have, if you're working on a\n",
+       "\n",
+       "01:05:39.713 --> 01:05:41.713\n",
+       "Pip is only for Mac.\n",
+       "\n",
+       "01:05:42.666 --> 01:05:46.666\n",
+       "And if you're already on a windows you can run these commands. Install this conda, install this.\n",
+       "\n",
+       "01:05:45.765 --> 01:05:48.765\n",
+       "If you're having problems, don't worry about it.\n",
+       "\n",
+       "01:05:49.070 --> 01:05:52.070\n",
+       "We can fix it before the end of today, and you don't need to do it.\n",
+       "\n",
+       "01:05:55.566 --> 01:05:57.566\n",
+       "Not installed. Did I pick the wrong.\n",
+       "\n",
+       "01:05:58.616 --> 01:05:59.616\n",
+       "Yeah, I picked the wrong term.\n",
+       "\n",
+       "01:05:59.512 --> 01:06:03.512\n",
+       "So I do this all the time I picked, or the kernel.\n",
+       "\n",
+       "01:06:03.066 --> 01:06:05.066\n",
+       "That isn't.\n",
+       "\n",
+       "01:06:05.110 --> 01:06:10.110\n",
+       "The anaconda terminal I picked just a regular one, and I was like, I don't know what you're talking about.\n",
+       "\n",
+       "01:06:11.011 --> 01:06:12.011\n",
+       "So here.\n",
+       "\n",
+       "01:06:12.065 --> 01:06:13.065\n",
+       "Run it!\n",
+       "\n",
+       "01:06:13.365 --> 01:06:15.365\n",
+       "So.\n",
+       "\n",
+       "01:06:18.366 --> 01:06:23.366\n",
+       "So you're here, we're gonna be working with the same data as before. Which is the listings data. Right?\n",
+       "\n",
+       "01:06:23.966 --> 01:06:28.966\n",
+       "And say from a stakeholder, they're like, all right. We wanna find out, like on average.\n",
+       "\n",
+       "01:06:29.417 --> 01:06:31.417\n",
+       "What type of room?\n",
+       "\n",
+       "01:06:31.166 --> 01:06:33.166\n",
+       "Is the most expensive and least expensive.\n",
+       "\n",
+       "01:06:33.211 --> 01:06:35.211\n",
+       "How would we do that? Anybody.\n",
+       "\n",
+       "01:06:35.465 --> 01:06:38.465\n",
+       "Because remember, room type. The room type is like.\n",
+       "\n",
+       "01:06:39.165 --> 01:06:40.165\n",
+       "Let's just do tail.\n",
+       "\n",
+       "01:06:41.865 --> 01:06:43.865\n",
+       "Is like\n",
+       "\n",
+       "01:06:44.266 --> 01:06:48.266\n",
+       "Entire room, apartment, private thing like, how do we look on average.\n",
+       "\n",
+       "01:06:48.915 --> 01:06:49.915\n",
+       "How much they go for.\n",
+       "\n",
+       "01:06:51.566 --> 01:06:53.566\n",
+       "Let's go.\n",
+       "\n",
+       "01:06:54.314 --> 01:06:55.314\n",
+       "Jake.\n",
+       "\n",
+       "01:06:57.719 --> 01:06:59.719\n",
+       "But who? I'm not.\n",
+       "\n",
+       "01:06:59.666 --> 01:07:03.666\n",
+       "Familiar. I'm not good with memorizing stuff off the top of my head.\n",
+       "\n",
+       "01:07:04.411 --> 01:07:05.411\n",
+       "Let me look at my other stuff.\n",
+       "\n",
+       "01:07:05.512 --> 01:07:08.512\n",
+       "Walk me through it like walk me through. We want to find we want to find.\n",
+       "\n",
+       "01:07:07.666 --> 01:07:13.666\n",
+       "What the average is for all of these things like, how would you just say that for all the room types.\n",
+       "\n",
+       "01:07:13.768 --> 01:07:14.768\n",
+       "Bottom.\n",
+       "\n",
+       "01:07:14.516 --> 01:07:16.516\n",
+       "I mean average. You would add up all the.\n",
+       "\n",
+       "01:07:16.666 --> 01:07:18.666\n",
+       "The price is then divided by the amount.\n",
+       "\n",
+       "01:07:18.667 --> 01:07:21.667\n",
+       "Yeah. But we don't do something first, st right? Because, like.\n",
+       "\n",
+       "01:07:19.013 --> 01:07:20.013\n",
+       "Out of place.\n",
+       "\n",
+       "01:07:22.266 --> 01:07:26.266\n",
+       "We just add them all up, all the prices it would keep them all together. So how do we separate them?\n",
+       "\n",
+       "01:07:28.115 --> 01:07:29.115\n",
+       "My apologies.\n",
+       "\n",
+       "01:07:28.867 --> 01:07:30.867\n",
+       "Let me look at my.\n",
+       "\n",
+       "01:07:30.012 --> 01:07:32.012\n",
+       "If I want to bail them out.\n",
+       "\n",
+       "01:07:32.367 --> 01:07:34.367\n",
+       "If these.\n",
+       "\n",
+       "01:07:34.515 --> 01:07:35.515\n",
+       "Matthew.\n",
+       "\n",
+       "01:07:34.813 --> 01:07:36.813\n",
+       "Here to the rescue.\n",
+       "\n",
+       "01:07:36.615 --> 01:07:39.615\n",
+       "Can you? Can you filter it 1st using\n",
+       "\n",
+       "01:07:39.912 --> 01:07:41.912\n",
+       "Oh, my! Gosh!\n",
+       "\n",
+       "01:07:41.618 --> 01:07:42.618\n",
+       "DF.\n",
+       "\n",
+       "01:07:42.813 --> 01:07:46.813\n",
+       "The Df room type is equal to.\n",
+       "\n",
+       "01:07:46.814 --> 01:07:47.814\n",
+       "Whatever you're looking for.\n",
+       "\n",
+       "01:07:48.314 --> 01:07:50.314\n",
+       "That's 1 way the better way to do it, Douglas.\n",
+       "\n",
+       "01:07:51.565 --> 01:07:52.565\n",
+       "Can't use Groupai.\n",
+       "\n",
+       "01:07:51.665 --> 01:07:57.665\n",
+       "Buy right cause that that does all the filtering and partitioning for you. So room.\n",
+       "\n",
+       "01:07:58.468 --> 01:08:01.468\n",
+       "Type, and then we wanna what will we do next? Kevin.\n",
+       "\n",
+       "01:08:01.115 --> 01:08:04.115\n",
+       "So we grew by this thing. Now we want to find.\n",
+       "\n",
+       "01:08:04.815 --> 01:08:05.815\n",
+       "The most.\n",
+       "\n",
+       "01:08:06.015 --> 01:08:08.015\n",
+       "Expensive ones.\n",
+       "\n",
+       "01:08:08.720 --> 01:09:49.720\n",
+       "How about now?\n",
+       "\n",
+       "01:08:09.013 --> 01:08:11.013\n",
+       "So we do\n",
+       "\n",
+       "01:08:10.665 --> 01:08:12.665\n",
+       "Well, price?\n",
+       "\n",
+       "01:08:12.966 --> 01:08:14.966\n",
+       "We do brackets price, and then.\n",
+       "\n",
+       "01:08:16.317 --> 01:08:18.317\n",
+       "Dot main, I believe.\n",
+       "\n",
+       "01:08:18.017 --> 01:08:19.017\n",
+       "Held it.\n",
+       "\n",
+       "01:08:19.017 --> 01:08:20.017\n",
+       "Right.\n",
+       "\n",
+       "01:08:20.614 --> 01:08:21.614\n",
+       "Alright, cool.\n",
+       "\n",
+       "01:08:21.415 --> 01:08:24.415\n",
+       "This is hard, like I showed this to someone I'd be like.\n",
+       "\n",
+       "01:08:25.517 --> 01:08:26.517\n",
+       "Embarrassed.\n",
+       "\n",
+       "01:08:27.214 --> 01:08:29.214\n",
+       "Let's do. If you do reset.\n",
+       "\n",
+       "01:08:29.267 --> 01:08:30.267\n",
+       "Index.\n",
+       "\n",
+       "01:08:30.166 --> 01:08:33.166\n",
+       "It turns it back into a data frame and then.\n",
+       "\n",
+       "01:08:33.416 --> 01:08:37.416\n",
+       "This is just a pep, and I think it's not just a pepe, but also just makes it look worse.\n",
+       "\n",
+       "01:08:38.215 --> 01:08:41.215\n",
+       "87.0 6 3 8 3 0.\n",
+       "\n",
+       "01:08:41.968 --> 01:08:44.968\n",
+       "Doesn't matter right?\n",
+       "\n",
+       "01:08:45.066 --> 01:08:46.066\n",
+       "Dot round.\n",
+       "\n",
+       "01:08:50.116 --> 01:08:54.116\n",
+       "Okay, this is better if you want to be like, really round one.\n",
+       "\n",
+       "01:08:54.166 --> 01:08:55.166\n",
+       "Or 2 actually.\n",
+       "\n",
+       "01:08:56.312 --> 01:08:57.312\n",
+       "To get the pennies.\n",
+       "\n",
+       "01:08:56.913 --> 01:08:58.913\n",
+       "So we got this information.\n",
+       "\n",
+       "01:09:00.067 --> 01:09:01.067\n",
+       "This away!\n",
+       "\n",
+       "01:09:01.766 --> 01:09:03.766\n",
+       "What can we? What can we infer from this.\n",
+       "\n",
+       "01:09:05.916 --> 01:09:06.916\n",
+       "Like.\n",
+       "\n",
+       "01:09:07.614 --> 01:09:09.614\n",
+       "What would we say here.\n",
+       "\n",
+       "01:09:12.065 --> 01:09:14.065\n",
+       "What's the most? What's the cheapest one.\n",
+       "\n",
+       "01:09:14.665 --> 01:09:15.665\n",
+       "On average.\n",
+       "\n",
+       "01:09:15.869 --> 01:09:16.869\n",
+       "Bungalow.\n",
+       "\n",
+       "01:09:17.113 --> 01:09:19.113\n",
+       "Bungalow. Yeah, let's do sort values.\n",
+       "\n",
+       "01:09:22.615 --> 01:09:26.615\n",
+       "Okay. Cool bungalow shared room, private room.\n",
+       "\n",
+       "01:09:26.914 --> 01:09:30.914\n",
+       "So this is enough information. Just be like, all right. Cool penthouses make the most money by a lot.\n",
+       "\n",
+       "01:09:31.665 --> 01:09:32.665\n",
+       "And bungalow's the cheapest.\n",
+       "\n",
+       "01:09:33.714 --> 01:09:37.714\n",
+       "Quick question, are you sharing your screen? Because for some reason I can't see it.\n",
+       "\n",
+       "01:09:36.214 --> 01:09:38.214\n",
+       "Am I not sharing my screen.\n",
+       "\n",
+       "01:09:39.168 --> 01:09:40.168\n",
+       "ER.\n",
+       "\n",
+       "01:09:39.365 --> 01:09:43.365\n",
+       "I don't know. I don't know if it's on my screen. I I think it's on my end.\n",
+       "\n",
+       "01:09:41.015 --> 01:09:43.015\n",
+       "Oh, you are! I see it. I see it. Yeah.\n",
+       "\n",
+       "01:09:41.265 --> 01:09:42.265\n",
+       "I see it.\n",
+       "\n",
+       "01:09:42.466 --> 01:09:43.466\n",
+       "You're sharing with you.\n",
+       "\n",
+       "01:09:43.016 --> 01:09:47.016\n",
+       "Okay, let me just re let me just redo it.\n",
+       "\n",
+       "01:09:49.768 --> 01:09:53.768\n",
+       "Alright!\n",
+       "\n",
+       "01:09:50.116 --> 01:09:52.116\n",
+       "Thank you. That fixed me.\n",
+       "\n",
+       "01:09:51.670 --> 01:09:52.670\n",
+       "Yeah, cool.\n",
+       "\n",
+       "01:09:53.615 --> 01:09:56.615\n",
+       "Well, here's the here's the thing is not.\n",
+       "\n",
+       "01:09:54.717 --> 01:10:09.717\n",
+       "Hmm.\n",
+       "\n",
+       "01:09:56.715 --> 01:09:58.715\n",
+       "This is right, but wrong.\n",
+       "\n",
+       "01:09:58.616 --> 01:10:02.616\n",
+       "And why? What? What makes you think it could be right by wrong, right for wrong.\n",
+       "\n",
+       "01:10:04.267 --> 01:10:06.267\n",
+       "This is right, but it's also wrong.\n",
+       "\n",
+       "01:10:07.468 --> 01:10:08.468\n",
+       "Douglas.\n",
+       "\n",
+       "01:10:09.816 --> 01:10:14.816\n",
+       "I don't think anyone's renting anything for 4 bucks on average. I don't understand the bungalow.\n",
+       "\n",
+       "01:10:10.367 --> 01:15:27.367\n",
+       "Set.\n",
+       "\n",
+       "01:10:15.114 --> 01:10:17.114\n",
+       "Right. And what's like a way that we can.\n",
+       "\n",
+       "01:10:17.214 --> 01:10:18.214\n",
+       "Do that like.\n",
+       "\n",
+       "01:10:17.967 --> 01:10:19.967\n",
+       "The these.\n",
+       "\n",
+       "01:10:18.816 --> 01:10:22.816\n",
+       "I would just look at like what those values are.\n",
+       "\n",
+       "01:10:22.716 --> 01:10:24.716\n",
+       "That's a really good way to do.\n",
+       "\n",
+       "01:10:24.267 --> 01:10:30.267\n",
+       "I would I would make a separate data frame where it was just the bungalow and just pull up like header. Maybe even samples.\n",
+       "\n",
+       "01:10:29.715 --> 01:10:32.715\n",
+       "Do that a few times like a sample, 50 or something.\n",
+       "\n",
+       "01:10:32.766 --> 01:10:34.766\n",
+       "See what it looks like.\n",
+       "\n",
+       "01:10:33.316 --> 01:10:34.316\n",
+       "Yeah. Let's go.\n",
+       "\n",
+       "01:10:34.916 --> 01:10:39.916\n",
+       "That's that is a bunch of different ways, like what I would do 1st is be like.\n",
+       "\n",
+       "01:10:39.966 --> 01:10:42.966\n",
+       "How many of each listing is there.\n",
+       "\n",
+       "01:10:43.615 --> 01:10:45.615\n",
+       "Right? So let's just do that real quick.\n",
+       "\n",
+       "01:10:45.266 --> 01:10:47.266\n",
+       "Df dot.\n",
+       "\n",
+       "01:10:46.966 --> 01:10:48.966\n",
+       "Group, by.\n",
+       "\n",
+       "01:10:48.965 --> 01:10:49.965\n",
+       "Room.\n",
+       "\n",
+       "01:10:50.116 --> 01:10:51.116\n",
+       "Type.\n",
+       "\n",
+       "01:10:52.119 --> 01:10:53.119\n",
+       "Dot count.\n",
+       "\n",
+       "01:10:53.814 --> 01:10:55.814\n",
+       "Or like.\n",
+       "\n",
+       "01:10:57.567 --> 01:10:58.567\n",
+       "Price.\n",
+       "\n",
+       "01:10:59.017 --> 01:11:00.017\n",
+       "Dot count.\n",
+       "\n",
+       "01:11:00.215 --> 01:11:02.215\n",
+       "What does this tell us.\n",
+       "\n",
+       "01:11:03.466 --> 01:11:06.466\n",
+       "And why is this more informational.\n",
+       "\n",
+       "01:11:09.366 --> 01:11:10.366\n",
+       "The samples for.\n",
+       "\n",
+       "01:11:10.669 --> 01:11:12.669\n",
+       "Size for bungalows. Only one.\n",
+       "\n",
+       "01:11:12.314 --> 01:11:13.314\n",
+       "So that's why.\n",
+       "\n",
+       "01:11:13.016 --> 01:11:16.016\n",
+       "There's only one bungalow. Yeah, it's 4 bucks.\n",
+       "\n",
+       "01:11:15.816 --> 01:11:19.816\n",
+       "What is the other one that's big. So if we go.\n",
+       "\n",
+       "01:11:20.015 --> 01:11:23.015\n",
+       "We just do this right EFRM. Dot.\n",
+       "\n",
+       "01:11:23.066 --> 01:11:24.066\n",
+       "Kind Equals.\n",
+       "\n",
+       "01:11:35.265 --> 01:11:37.265\n",
+       "Whoa! Whoa! Whoa! I'm sorry, guys.\n",
+       "\n",
+       "01:11:40.415 --> 01:11:42.415\n",
+       "You're the kind is\n",
+       "\n",
+       "01:11:42.215 --> 01:11:45.215\n",
+       "Bar H instead of bar. I think you meant to type.\n",
+       "\n",
+       "01:11:46.216 --> 01:11:51.216\n",
+       "So here we got this. And I'm like, all right. Well, like, yeah, penthouse is like way bigger.\n",
+       "\n",
+       "01:11:51.465 --> 01:11:53.465\n",
+       "Than the other ones. But if we.\n",
+       "\n",
+       "01:11:53.266 --> 01:11:57.266\n",
+       "If we actually look at the data a little more.\n",
+       "\n",
+       "01:11:57.667 --> 01:12:01.667\n",
+       "It's like, well, there's only 2 penthouses in one like that, doesn't.\n",
+       "\n",
+       "01:12:02.215 --> 01:12:04.215\n",
+       "Really make much sense to me.\n",
+       "\n",
+       "01:12:04.317 --> 01:12:05.317\n",
+       "So.\n",
+       "\n",
+       "01:12:04.716 --> 01:12:08.716\n",
+       "Let's let's do a bunch of different aggregations together.\n",
+       "\n",
+       "01:12:08.815 --> 01:12:10.815\n",
+       "So let's just do\n",
+       "\n",
+       "01:12:11.965 --> 01:12:13.965\n",
+       "Remember if we do ag.\n",
+       "\n",
+       "01:12:13.915 --> 01:12:18.915\n",
+       "I'll just do it by hand. So DFRM. Equals df group by.\n",
+       "\n",
+       "01:12:19.017 --> 01:12:21.017\n",
+       "Room, type.\n",
+       "\n",
+       "01:12:22.415 --> 01:12:25.415\n",
+       "Wow! This is like kind of pop up is annoying. And then I do, price.\n",
+       "\n",
+       "01:12:25.216 --> 01:12:28.216\n",
+       "And then, instead of doing mean, I do. Ag.\n",
+       "\n",
+       "01:12:28.367 --> 01:12:31.367\n",
+       "And then why is this like really getting in the way.\n",
+       "\n",
+       "01:12:31.566 --> 01:12:34.566\n",
+       "And then in ag, I can do the mean.\n",
+       "\n",
+       "01:12:35.216 --> 01:12:36.216\n",
+       "I can do the Count.\n",
+       "\n",
+       "01:12:36.915 --> 01:12:39.915\n",
+       "I can do the standard deviation.\n",
+       "\n",
+       "01:12:41.115 --> 01:12:43.115\n",
+       "And if I now display this.\n",
+       "\n",
+       "01:12:45.266 --> 01:12:46.266\n",
+       "And I do round.\n",
+       "\n",
+       "01:12:54.016 --> 01:12:58.016\n",
+       "This is this little like Pop up thing is really getting annoying.\n",
+       "\n",
+       "01:12:59.366 --> 01:13:03.366\n",
+       "So I'm like, all right. Bungalow only has accounted one standard. Nothing like.\n",
+       "\n",
+       "01:13:04.567 --> 01:13:05.567\n",
+       "Can we trust that.\n",
+       "\n",
+       "01:13:05.715 --> 01:13:07.715\n",
+       "What's we.\n",
+       "\n",
+       "01:13:07.716 --> 01:13:12.716\n",
+       "There's other ones of 2 20,000 like that's bigger sample size right.\n",
+       "\n",
+       "01:13:14.065 --> 01:13:18.065\n",
+       "So one way to kind of use some statistics like.\n",
+       "\n",
+       "01:13:18.066 --> 01:13:23.066\n",
+       "Is it just like a little snippet of statistics? You, if you take a statistics class, you'll learn all about this. But, like.\n",
+       "\n",
+       "01:13:23.316 --> 01:13:27.316\n",
+       "You can get the standard deviation of the mean of the sample. So like.\n",
+       "\n",
+       "01:13:28.316 --> 01:13:29.316\n",
+       "By taking.\n",
+       "\n",
+       "01:13:29.515 --> 01:13:31.515\n",
+       "This square root of the Count.\n",
+       "\n",
+       "01:13:32.316 --> 01:13:35.316\n",
+       "And dividing that by the standard deviation you get this like.\n",
+       "\n",
+       "01:13:34.916 --> 01:13:36.916\n",
+       "Sample standard deviation.\n",
+       "\n",
+       "01:13:37.166 --> 01:13:40.166\n",
+       "And then, if you do 2 times that.\n",
+       "\n",
+       "01:13:41.266 --> 01:13:42.266\n",
+       "Minus the mean.\n",
+       "\n",
+       "01:13:43.316 --> 01:13:49.316\n",
+       "2 standard deviations below and 2 standard deviation standard deviations above. It'll give you like this, 95%.\n",
+       "\n",
+       "01:13:49.917 --> 01:13:50.917\n",
+       "Competence.\n",
+       "\n",
+       "01:13:51.567 --> 01:13:53.567\n",
+       "That, like the real price.\n",
+       "\n",
+       "01:13:53.316 --> 01:13:56.316\n",
+       "From our sample will fall between these 2 values.\n",
+       "\n",
+       "01:13:56.116 --> 01:14:01.116\n",
+       "What did I use? 2 and not one, and not 3, if standard deviations, who knows that answer.\n",
+       "\n",
+       "01:14:03.315 --> 01:14:04.315\n",
+       "What does to mean? Douglas.\n",
+       "\n",
+       "01:14:04.315 --> 01:14:08.315\n",
+       "Isn't 2 97% of the population.\n",
+       "\n",
+       "01:14:08.219 --> 01:14:11.219\n",
+       "I think that's 3, 2 is 95.\n",
+       "\n",
+       "01:14:08.518 --> 01:14:09.518\n",
+       "Like to end up.\n",
+       "\n",
+       "01:14:10.466 --> 01:14:11.466\n",
+       "I think I.\n",
+       "\n",
+       "01:14:11.367 --> 01:14:13.367\n",
+       "Oh, you're right. You're yeah.\n",
+       "\n",
+       "01:14:13.315 --> 01:14:15.315\n",
+       "We use like 99. But yeah.\n",
+       "\n",
+       "01:14:14.016 --> 01:14:19.016\n",
+       "Yeah, it's not much of a difference between 2 and 3, but it's a massive difference between one.\n",
+       "\n",
+       "01:14:19.615 --> 01:14:21.615\n",
+       "These are standard deviations. So now I'm.\n",
+       "\n",
+       "01:14:22.216 --> 01:14:23.216\n",
+       "Oh, okay.\n",
+       "\n",
+       "01:14:22.916 --> 01:14:24.916\n",
+       "The low like.\n",
+       "\n",
+       "01:14:25.415 --> 01:14:26.415\n",
+       "Penthouses.\n",
+       "\n",
+       "01:14:27.415 --> 01:14:31.415\n",
+       "Range between negative 4,000. 14,000. I'm like.\n",
+       "\n",
+       "01:14:32.515 --> 01:14:34.515\n",
+       "What it literally doesn't make any sense.\n",
+       "\n",
+       "01:14:35.515 --> 01:14:42.515\n",
+       "And like it's not even available for this for the bungalows, because it doesn't do it. So I'm like, Oh, and look at the bound on like.\n",
+       "\n",
+       "01:14:42.315 --> 01:14:45.315\n",
+       "The ones with the most, so there are the most.\n",
+       "\n",
+       "01:14:46.466 --> 01:14:48.466\n",
+       "The most entire room, apartments.\n",
+       "\n",
+       "01:14:48.366 --> 01:14:53.366\n",
+       "Average is 200, and the lower bounds 1 95. In the upper bounds, 2 0. 5.\n",
+       "\n",
+       "01:14:52.915 --> 01:14:54.915\n",
+       "So we're like, Oh.\n",
+       "\n",
+       "01:14:55.015 --> 01:14:56.015\n",
+       "That makes sense.\n",
+       "\n",
+       "01:14:56.621 --> 01:14:57.621\n",
+       "Like.\n",
+       "\n",
+       "01:14:58.017 --> 01:15:03.017\n",
+       "That makes sense. The average thing is going to be around 295.\n",
+       "\n",
+       "01:15:03.617 --> 01:15:04.617\n",
+       "Upper 204.\n",
+       "\n",
+       "01:15:05.465 --> 01:15:08.465\n",
+       "But like, even if we go as low as the shared room.\n",
+       "\n",
+       "01:15:08.615 --> 01:15:10.615\n",
+       "Average is 80.\n",
+       "\n",
+       "01:15:10.816 --> 01:15:19.816\n",
+       "The lower bound is 65, and the upper bound is 1 10. So it's a bigger band. We are less confident because we have less samples.\n",
+       "\n",
+       "01:15:19.465 --> 01:15:21.465\n",
+       "We have less data points to be like.\n",
+       "\n",
+       "01:15:21.567 --> 01:15:22.567\n",
+       "Confirm our idea.\n",
+       "\n",
+       "01:15:22.666 --> 01:15:24.666\n",
+       "You guys get that.\n",
+       "\n",
+       "01:15:27.417 --> 01:26:02.417\n",
+       "Okay.\n",
+       "\n",
+       "01:15:31.416 --> 01:15:35.416\n",
+       "So if we just do some basic analysis, here's inference. So like.\n",
+       "\n",
+       "01:15:35.266 --> 01:15:40.266\n",
+       "I admitted. This last class like I don't really know how to read mathematical notation.\n",
+       "\n",
+       "01:15:39.865 --> 01:15:41.865\n",
+       "Personally, so like.\n",
+       "\n",
+       "01:15:42.065 --> 01:15:50.065\n",
+       "What Chris was trying is is showing you here is like, if we have, imagine this is our actual population.\n",
+       "\n",
+       "01:15:49.866 --> 01:15:53.866\n",
+       "We don't ever actually know the population. But like.\n",
+       "\n",
+       "01:15:54.066 --> 01:15:56.066\n",
+       "We're assuming this is the ground truth.\n",
+       "\n",
+       "01:15:57.815 --> 01:15:59.815\n",
+       "If you sample the ground, truth.\n",
+       "\n",
+       "01:16:00.115 --> 01:16:04.115\n",
+       "Just say we're gonna sample it a thousand times even just a hundred.\n",
+       "\n",
+       "01:16:03.866 --> 01:16:05.866\n",
+       "Let's just do a hundred times.\n",
+       "\n",
+       "01:16:06.015 --> 01:16:09.015\n",
+       "In each 100 time. We're gonna randomly select.\n",
+       "\n",
+       "01:16:10.265 --> 01:16:12.265\n",
+       "Each round. We're gonna randomly select.\n",
+       "\n",
+       "01:16:12.065 --> 01:16:15.065\n",
+       "100 people of this sample size. That's a.\n",
+       "\n",
+       "01:16:15.026 --> 01:16:16.026\n",
+       "10,000.\n",
+       "\n",
+       "01:16:16.365 --> 01:16:18.365\n",
+       "Nt. Average in the standard deviation.\n",
+       "\n",
+       "01:16:19.316 --> 01:16:24.316\n",
+       "We're gonna do that here. So I do. I randomly sample the population. I make sure it's shuffled.\n",
+       "\n",
+       "01:16:23.466 --> 01:16:27.466\n",
+       "And then I get the sample mean sample standard deviation.\n",
+       "\n",
+       "01:16:28.216 --> 01:16:31.216\n",
+       "I put that into a data frame and I plot it.\n",
+       "\n",
+       "01:16:32.116 --> 01:16:36.116\n",
+       "And it should look very similar to the thing.\n",
+       "\n",
+       "01:16:36.765 --> 01:16:38.765\n",
+       "The more samples you have.\n",
+       "\n",
+       "01:16:39.065 --> 01:16:43.065\n",
+       "The more it's gonna closely resemble the population.\n",
+       "\n",
+       "01:16:44.216 --> 01:16:51.216\n",
+       "And this is the idea of like inferring and using sample groups. A big thing of this is like political polls.\n",
+       "\n",
+       "01:16:50.915 --> 01:16:53.915\n",
+       "Right like you. It says like.\n",
+       "\n",
+       "01:16:53.619 --> 01:16:56.619\n",
+       "I don't know. Let's do 5 30 right now.\n",
+       "\n",
+       "01:16:57.215 --> 01:16:58.215\n",
+       "Just so. I'm like.\n",
+       "\n",
+       "01:16:58.816 --> 01:16:59.816\n",
+       "5.\n",
+       "\n",
+       "01:16:59.015 --> 01:17:02.015\n",
+       "38. You guys know 5, 38.\n",
+       "\n",
+       "01:17:02.315 --> 01:17:05.315\n",
+       "It's the number one spot for.\n",
+       "\n",
+       "01:17:06.115 --> 01:17:09.115\n",
+       "This, and this is who has a head in the polls.\n",
+       "\n",
+       "01:17:09.716 --> 01:17:15.716\n",
+       "And it's 48 to 45 like they can't go out and ask every single person.\n",
+       "\n",
+       "01:17:16.765 --> 01:17:18.765\n",
+       "How they think of each candidate right?\n",
+       "\n",
+       "01:17:18.915 --> 01:17:19.915\n",
+       "But like.\n",
+       "\n",
+       "01:17:20.566 --> 01:17:21.566\n",
+       "They can use samples.\n",
+       "\n",
+       "01:17:22.516 --> 01:17:27.516\n",
+       "Of like. They'll ask a hundred people here, a hundred people, there, 100 people, there, 100 people, there, 100 people here, 100 people there.\n",
+       "\n",
+       "01:17:27.615 --> 01:17:28.615\n",
+       "And then.\n",
+       "\n",
+       "01:17:27.815 --> 01:17:30.815\n",
+       "The use inferential statistics.\n",
+       "\n",
+       "01:17:31.017 --> 01:17:34.017\n",
+       "To be like, okay, we can generalize this to the entire population.\n",
+       "\n",
+       "01:17:35.167 --> 01:17:37.167\n",
+       "And what's crazy is like.\n",
+       "\n",
+       "01:17:38.965 --> 01:17:40.965\n",
+       "3% is.\n",
+       "\n",
+       "01:17:40.766 --> 01:17:44.766\n",
+       "Is so close it's like unbelievably like.\n",
+       "\n",
+       "01:17:45.416 --> 01:17:47.416\n",
+       "I just remember when I was a professional poker player like.\n",
+       "\n",
+       "01:17:48.416 --> 01:17:53.416\n",
+       "How many times I would get it all in with aces and be against 7 2. And it's like.\n",
+       "\n",
+       "01:17:53.265 --> 01:17:57.265\n",
+       "Even that you're 20 favorite like at best.\n",
+       "\n",
+       "01:17:58.416 --> 01:17:59.416\n",
+       "And like.\n",
+       "\n",
+       "01:17:59.665 --> 01:18:02.665\n",
+       "And you're you're gonna win 80 like. But like.\n",
+       "\n",
+       "01:18:02.316 --> 01:18:04.316\n",
+       "3% favorite, or like.\n",
+       "\n",
+       "01:18:04.566 --> 01:18:08.566\n",
+       "Below or above. That's that is, we call those coin flips.\n",
+       "\n",
+       "01:18:08.117 --> 01:18:09.117\n",
+       "Lesson.\n",
+       "\n",
+       "01:18:10.116 --> 01:18:15.116\n",
+       "So they're effectively quite. I'm div. I'm divulging or not divulging, but.\n",
+       "\n",
+       "01:18:16.267 --> 01:18:17.267\n",
+       "Tangent thing.\n",
+       "\n",
+       "01:18:20.515 --> 01:18:25.515\n",
+       "And so these should basically match your original\n",
+       "\n",
+       "01:18:27.465 --> 01:18:32.465\n",
+       "So now let's talk about like making a plot. This is how you would do it.\n",
+       "\n",
+       "01:18:33.221 --> 01:18:36.221\n",
+       "In Python, and it's really kind of annoying.\n",
+       "\n",
+       "01:18:35.916 --> 01:18:38.916\n",
+       "Like, we gotta do all this stuff. And we're gonna show you how. It's really easy.\n",
+       "\n",
+       "01:18:39.217 --> 01:18:47.217\n",
+       "But the important thing to take away from this is the latitude and longitude coordinates. Now, who knows? Like.\n",
+       "\n",
+       "01:18:51.017 --> 01:18:52.017\n",
+       "Like.\n",
+       "\n",
+       "01:18:51.621 --> 01:18:53.621\n",
+       "Who here is like familiar with.\n",
+       "\n",
+       "01:18:54.266 --> 01:18:55.266\n",
+       "Latitude and longitude.\n",
+       "\n",
+       "01:18:56.366 --> 01:19:01.366\n",
+       "Everybody should be their floats right, 40 degrees 7 5.\n",
+       "\n",
+       "01:19:03.117 --> 01:19:04.117\n",
+       "And.\n",
+       "\n",
+       "01:19:06.716 --> 01:19:07.716\n",
+       "They are numbers.\n",
+       "\n",
+       "01:19:07.966 --> 01:19:13.966\n",
+       "So technically you can plot them. But they're mapped when they're actually latitude and longitude. They're mapped differently.\n",
+       "\n",
+       "01:19:14.316 --> 01:19:20.316\n",
+       "They don't exactly behave as floats. The reason is because you have to project them onto a globe.\n",
+       "\n",
+       "01:19:20.965 --> 01:19:22.965\n",
+       "And that is called\n",
+       "\n",
+       "01:19:25.216 --> 01:19:32.216\n",
+       "It's not Cartesian is the measure, the the projection scale that we use. It's called. I think it's called projection or Mercator.\n",
+       "\n",
+       "01:19:32.915 --> 01:19:35.915\n",
+       "Like, instead of just doing it flat. It has to be like.\n",
+       "\n",
+       "01:19:36.067 --> 01:19:37.067\n",
+       "Bent a little bit.\n",
+       "\n",
+       "01:19:36.566 --> 01:19:42.566\n",
+       "And we use the Cartesian system. But like this, will you have to tell it.\n",
+       "\n",
+       "01:19:43.716 --> 01:19:45.716\n",
+       "It's actually Latin, long.\n",
+       "\n",
+       "01:19:45.415 --> 01:19:47.415\n",
+       "And if you do that it will do it correctly.\n",
+       "\n",
+       "01:19:48.816 --> 01:19:52.816\n",
+       "So here you can call them, and you can do it.\n",
+       "\n",
+       "01:19:53.116 --> 01:19:56.116\n",
+       "And like, yeah, this makes sense to me, like, okay, the red is.\n",
+       "\n",
+       "01:19:55.765 --> 01:19:57.765\n",
+       "Manhattan queens is.\n",
+       "\n",
+       "01:19:57.766 --> 01:19:58.766\n",
+       "Blue.\n",
+       "\n",
+       "01:19:58.916 --> 01:19:59.916\n",
+       "Siam, blue, yellow.\n",
+       "\n",
+       "01:20:02.916 --> 01:20:06.916\n",
+       "We can make histograms. So histograms are really important because it gives.\n",
+       "\n",
+       "01:20:07.766 --> 01:20:11.766\n",
+       "It tells you the distribution of each group.\n",
+       "\n",
+       "01:20:12.416 --> 01:20:14.416\n",
+       "So it's important to know that because.\n",
+       "\n",
+       "01:20:16.067 --> 01:20:22.067\n",
+       "Things usually follow normal distributions, and when they're the same type of distribution, then you can compare them.\n",
+       "\n",
+       "01:20:22.015 --> 01:20:28.015\n",
+       "If something is a uniform distribution and something else is like a Chi squared distribution, or like\n",
+       "\n",
+       "01:20:27.723 --> 01:20:30.723\n",
+       "Normal like you can't really compare them.\n",
+       "\n",
+       "01:20:32.515 --> 01:20:34.515\n",
+       "And like this is a big thing, because it's like.\n",
+       "\n",
+       "01:20:34.216 --> 01:20:36.216\n",
+       "Putting a round peg in a.\n",
+       "\n",
+       "01:20:36.516 --> 01:20:40.516\n",
+       "Square hole. It's like it doesn't fit our square peg in a round hole like it's not gonna fit.\n",
+       "\n",
+       "01:20:40.817 --> 01:20:43.817\n",
+       "So like you had to make sure that they're of same distributions.\n",
+       "\n",
+       "01:20:44.027 --> 01:20:47.027\n",
+       "And the histogram is a way to do that. So.\n",
+       "\n",
+       "01:20:49.416 --> 01:20:51.416\n",
+       "I don't want to be like who here.\n",
+       "\n",
+       "01:20:52.416 --> 01:20:58.416\n",
+       "No. Does everyone here know what a histogram is? Because I'm sure there's people that doesn't but like, can anybody here like.\n",
+       "\n",
+       "01:20:58.566 --> 01:21:01.566\n",
+       "Explain the basics behind a histogram.\n",
+       "\n",
+       "01:21:03.466 --> 01:21:05.466\n",
+       "Or who here doesn't know what it is.\n",
+       "\n",
+       "01:21:05.266 --> 01:21:07.266\n",
+       "Who here does know what it is.\n",
+       "\n",
+       "01:21:07.415 --> 01:21:08.415\n",
+       "Thumbs up.\n",
+       "\n",
+       "01:21:10.516 --> 01:21:13.516\n",
+       "Nobody. We have 2 people. Okay.\n",
+       "\n",
+       "01:21:15.915 --> 01:21:17.915\n",
+       "A histogram.\n",
+       "\n",
+       "01:21:19.417 --> 01:21:23.417\n",
+       "It's called. It's also called a probability density function.\n",
+       "\n",
+       "01:21:23.715 --> 01:21:26.715\n",
+       "And we have a list of numbers.\n",
+       "\n",
+       "01:21:26.966 --> 01:21:29.966\n",
+       "Right, we got a bunch of numbers like these. This is the price.\n",
+       "\n",
+       "01:21:30.116 --> 01:21:31.116\n",
+       "Df price.\n",
+       "\n",
+       "01:21:31.365 --> 01:21:33.365\n",
+       "We just do df, price right now.\n",
+       "\n",
+       "01:21:37.266 --> 01:21:41.266\n",
+       "It's a list of numbers, right? It's just a bunch of different numbers.\n",
+       "\n",
+       "01:21:41.515 --> 01:21:44.515\n",
+       "And what a histogram does is like, Okay.\n",
+       "\n",
+       "01:21:44.666 --> 01:21:46.666\n",
+       "All the numbers right here.\n",
+       "\n",
+       "01:21:46.817 --> 01:21:47.817\n",
+       "Between.\n",
+       "\n",
+       "01:21:47.716 --> 01:21:53.716\n",
+       "They put them into bins that bins them, and it's like, if you're between, if your price is between 0 and 10.\n",
+       "\n",
+       "01:21:54.366 --> 01:21:55.366\n",
+       "Put a count of one.\n",
+       "\n",
+       "01:21:55.716 --> 01:22:00.716\n",
+       "It was between 10 and 20. Put a count of 2, 1 like here, and it just does that.\n",
+       "\n",
+       "01:22:00.466 --> 01:22:02.466\n",
+       "So it shows us the.\n",
+       "\n",
+       "01:22:01.616 --> 01:22:04.616\n",
+       "Of how many things are in each bin.\n",
+       "\n",
+       "01:22:06.265 --> 01:22:07.265\n",
+       "Which is the distribution.\n",
+       "\n",
+       "01:22:06.917 --> 01:22:08.917\n",
+       "As a histogram.\n",
+       "\n",
+       "01:22:08.966 --> 01:22:09.966\n",
+       "Do you guys? Now.\n",
+       "\n",
+       "01:22:09.616 --> 01:22:11.616\n",
+       "Who here now knows what it is.\n",
+       "\n",
+       "01:22:13.966 --> 01:22:16.966\n",
+       "Yeah, it's just like binning it. You're like, okay.\n",
+       "\n",
+       "01:22:16.730 --> 01:22:19.730\n",
+       "Your prices. Let's just say 0 and 100. You're in bin one.\n",
+       "\n",
+       "01:22:20.016 --> 01:22:23.016\n",
+       "100 200 bin, 200 300 bin, 3.\n",
+       "\n",
+       "01:22:23.665 --> 01:22:24.665\n",
+       "So it's taking this.\n",
+       "\n",
+       "01:22:25.266 --> 01:22:26.266\n",
+       "Continuous value.\n",
+       "\n",
+       "01:22:25.916 --> 01:22:27.916\n",
+       "And putting it into it.\n",
+       "\n",
+       "01:22:28.216 --> 01:22:32.216\n",
+       "Discrete value. The difference between discrete and continuous is.\n",
+       "\n",
+       "01:22:32.225 --> 01:22:33.225\n",
+       "Continuous.\n",
+       "\n",
+       "01:22:34.466 --> 01:22:35.466\n",
+       "Can be any price.\n",
+       "\n",
+       "01:22:35.565 --> 01:22:36.565\n",
+       "It can be.\n",
+       "\n",
+       "01:22:36.915 --> 01:22:40.915\n",
+       "$75, $69, $420. It can be anything.\n",
+       "\n",
+       "01:22:40.616 --> 01:22:43.616\n",
+       "But it bins are like you are between one and 10.\n",
+       "\n",
+       "01:22:43.916 --> 01:22:46.916\n",
+       "You're between 10 and 20. It's still a number.\n",
+       "\n",
+       "01:22:46.965 --> 01:22:47.965\n",
+       "But a.\n",
+       "\n",
+       "01:22:49.117 --> 01:22:50.117\n",
+       "Discrete.\n",
+       "\n",
+       "01:22:50.866 --> 01:22:53.866\n",
+       "So he did all that.\n",
+       "\n",
+       "01:22:54.515 --> 01:22:55.515\n",
+       "All this he did.\n",
+       "\n",
+       "01:22:54.816 --> 01:22:59.816\n",
+       "It took them all this. I'm a little bit more familiar with these.\n",
+       "\n",
+       "01:23:00.066 --> 01:23:08.066\n",
+       "So I did like this, but also you can. I believe you can use? See? I I couldn't color it. But you can use familiar.\n",
+       "\n",
+       "01:23:09.165 --> 01:23:10.165\n",
+       "The best.\n",
+       "\n",
+       "01:23:10.466 --> 01:23:13.466\n",
+       "It's like the cheating way to do it. Let's go, David.\n",
+       "\n",
+       "01:23:13.725 --> 01:23:17.725\n",
+       "I just look at their gallery. I'm like, Okay, where's the histogram?\n",
+       "\n",
+       "01:23:18.465 --> 01:23:21.465\n",
+       "Where is the histogram? These look like histograms.\n",
+       "\n",
+       "01:23:21.168 --> 01:23:22.168\n",
+       "His plot.\n",
+       "\n",
+       "01:23:26.465 --> 01:23:27.465\n",
+       "Let's do this.\n",
+       "\n",
+       "01:23:28.017 --> 01:23:29.017\n",
+       "Import.\n",
+       "\n",
+       "01:23:29.315 --> 01:23:31.315\n",
+       "Let's do it. A new cell. I just like.\n",
+       "\n",
+       "01:23:30.915 --> 01:23:33.915\n",
+       "Kind of space on doing this and that space.\n",
+       "\n",
+       "01:23:37.665 --> 01:23:38.665\n",
+       "And the way you do.\n",
+       "\n",
+       "01:23:41.165 --> 01:23:44.165\n",
+       "You just pass in the data frame. So.\n",
+       "\n",
+       "01:23:43.915 --> 01:23:45.915\n",
+       "That hissed plot.\n",
+       "\n",
+       "01:23:46.326 --> 01:23:47.326\n",
+       "DF.\n",
+       "\n",
+       "01:23:47.029 --> 01:23:50.029\n",
+       "X Equals.\n",
+       "\n",
+       "01:23:51.217 --> 01:23:52.217\n",
+       "Price.\n",
+       "\n",
+       "01:23:53.165 --> 01:23:58.165\n",
+       "And let's do y equals neighborhood group.\n",
+       "\n",
+       "01:24:02.016 --> 01:24:03.016\n",
+       "Someone has a hand.\n",
+       "\n",
+       "01:24:03.565 --> 01:24:06.565\n",
+       "Yeah, in industry. Have you used? R.\n",
+       "\n",
+       "01:24:09.468 --> 01:24:10.468\n",
+       "No.\n",
+       "\n",
+       "01:24:10.030 --> 01:24:11.030\n",
+       "The language. Oh.\n",
+       "\n",
+       "01:24:10.866 --> 01:24:13.866\n",
+       "No, that's a good point. R. Is.\n",
+       "\n",
+       "01:24:14.316 --> 01:24:19.316\n",
+       "Really good at a limited amount of things, and that's it. And statistics.\n",
+       "\n",
+       "01:24:21.126 --> 01:24:23.126\n",
+       "Is I like to say.\n",
+       "\n",
+       "01:24:23.216 --> 01:24:25.216\n",
+       "R is like.\n",
+       "\n",
+       "01:24:27.715 --> 01:24:30.715\n",
+       "It's like python, for like infants.\n",
+       "\n",
+       "01:24:31.416 --> 01:24:38.416\n",
+       "It's like you'd only do one thing. It is good at what it does, which is statistics, but that's it. You can't do anything else like Python, you can literally do everything.\n",
+       "\n",
+       "01:24:39.765 --> 01:24:43.765\n",
+       "Why is this being so weird?\n",
+       "\n",
+       "01:24:44.212 --> 01:24:45.212\n",
+       "Is still brown.\n",
+       "\n",
+       "01:24:47.965 --> 01:24:48.965\n",
+       "Here we go!\n",
+       "\n",
+       "01:24:50.765 --> 01:24:52.765\n",
+       "Okay, x.\n",
+       "\n",
+       "01:24:52.904 --> 01:24:53.904\n",
+       "Oh, color!\n",
+       "\n",
+       "01:24:59.765 --> 01:25:00.765\n",
+       "Okay, we're not going to get into it.\n",
+       "\n",
+       "01:25:02.767 --> 01:25:06.767\n",
+       "Another really good way. Actually, why don't we just do this like.\n",
+       "\n",
+       "01:25:07.268 --> 01:25:08.268\n",
+       "EF dot.\n",
+       "\n",
+       "01:25:07.866 --> 01:25:08.866\n",
+       "Test.\n",
+       "\n",
+       "01:25:08.967 --> 01:25:09.967\n",
+       "Rights.\n",
+       "\n",
+       "01:25:13.497 --> 01:25:17.497\n",
+       "So here, it's like it because we have these crazy outliers. It's like.\n",
+       "\n",
+       "01:25:18.794 --> 01:25:21.794\n",
+       "Okay. And if we just adjust the bins to be like a hundred.\n",
+       "\n",
+       "01:25:21.065 --> 01:25:25.065\n",
+       "It'll start doing it better cause it defaults to 10.\n",
+       "\n",
+       "01:25:26.693 --> 01:25:32.693\n",
+       "So let's go to box and whisper like again. They're really hard kinda to do in python to do it effectively.\n",
+       "\n",
+       "01:25:32.993 --> 01:25:35.993\n",
+       "But boxing whisker parts are really important. Matthew is a question.\n",
+       "\n",
+       "01:25:36.466 --> 01:25:43.466\n",
+       "Am I getting mixed up with something else? I thought histograms were supposed to have like a linear line in it, or like a a line also with the Graphs.\n",
+       "\n",
+       "01:25:42.569 --> 01:25:44.569\n",
+       "No, definitely, not.\n",
+       "\n",
+       "01:25:45.296 --> 01:25:46.296\n",
+       "You know.\n",
+       "\n",
+       "01:25:46.496 --> 01:25:48.496\n",
+       "Those are like scatter plots.\n",
+       "\n",
+       "01:25:47.991 --> 01:25:51.991\n",
+       "Oh, scatter there! Oh, my God, yeah, plus. Okay, that's what it is. Sorry.\n",
+       "\n",
+       "01:25:52.190 --> 01:25:53.190\n",
+       "Yeah. No. Histogram.\n",
+       "\n",
+       "01:25:53.366 --> 01:25:58.366\n",
+       "You know, should not have like E plots, or like a line version of the histogram. But.\n",
+       "\n",
+       "01:25:58.191 --> 01:26:01.191\n",
+       "It just draws a line over the histogram instead of.\n",
+       "\n",
+       "01:26:01.317 --> 01:26:02.317\n",
+       "And\n",
+       "\n",
+       "01:26:02.466 --> 01:26:03.466\n",
+       "Like this.\n",
+       "\n",
+       "01:26:03.266 --> 01:31:31.266\n",
+       "It.\n",
+       "\n",
+       "01:26:03.816 --> 01:26:05.816\n",
+       "Okay, yeah, that's what I was looking at, too.\n",
+       "\n",
+       "01:26:06.068 --> 01:26:07.068\n",
+       "Katie.\n",
+       "\n",
+       "01:26:06.093 --> 01:26:07.093\n",
+       "Okay.\n",
+       "\n",
+       "01:26:06.816 --> 01:26:09.816\n",
+       "But like that's, it's no real different.\n",
+       "\n",
+       "01:26:11.266 --> 01:26:13.266\n",
+       "I saw. Yeah, I was just getting confused. All right. Thank you.\n",
+       "\n",
+       "01:26:12.965 --> 01:26:15.965\n",
+       "I think if we do, instead of color.\n",
+       "\n",
+       "01:26:17.568 --> 01:26:18.568\n",
+       "Hue.\n",
+       "\n",
+       "01:26:19.291 --> 01:26:21.291\n",
+       "Equals.\n",
+       "\n",
+       "01:26:24.365 --> 01:26:28.365\n",
+       "Didn't work. Okay? i i i guess he did it.\n",
+       "\n",
+       "01:26:28.368 --> 01:26:29.368\n",
+       "So can.\n",
+       "\n",
+       "01:26:28.916 --> 01:26:34.916\n",
+       "Like it took him so long because he wanted to color it differently. I know there's an easy way to do it. I just can't recall it.\n",
+       "\n",
+       "01:26:35.065 --> 01:26:38.065\n",
+       "But if you, if I spend 5Ā min on Google, I'd figure it out.\n",
+       "\n",
+       "01:26:38.592 --> 01:26:44.592\n",
+       "So boxing whisper parts are really good, because it shows you outliers very clearly.\n",
+       "\n",
+       "01:26:45.265 --> 01:26:47.265\n",
+       "So basically.\n",
+       "\n",
+       "01:26:46.969 --> 01:26:49.969\n",
+       "These mean like, it shows the average.\n",
+       "\n",
+       "01:26:50.866 --> 01:27:00.866\n",
+       "It shows what should be. What is a normal range by these whisker plots, these whisker, these bars that come off of it, and then anything that's above or below that are considered to be outliers.\n",
+       "\n",
+       "01:26:59.666 --> 01:27:03.666\n",
+       "So these are like the confidence in roles you can think of.\n",
+       "\n",
+       "01:27:03.966 --> 01:27:07.966\n",
+       "And these are like, outside of what we consider to be normal.\n",
+       "\n",
+       "01:27:07.566 --> 01:27:09.566\n",
+       "These are all the data points.\n",
+       "\n",
+       "01:27:10.292 --> 01:27:11.292\n",
+       "But like I said like.\n",
+       "\n",
+       "01:27:12.693 --> 01:27:17.693\n",
+       "Python isn't really handy, because I'm like, what is the exact value here like I don't know, and like.\n",
+       "\n",
+       "01:27:17.365 --> 01:27:19.365\n",
+       "Is not interactive.\n",
+       "\n",
+       "01:27:18.965 --> 01:27:20.965\n",
+       "So we're gonna do this in tableau.\n",
+       "\n",
+       "01:27:20.691 --> 01:27:23.691\n",
+       "So everyone pull up their tableau.\n",
+       "\n",
+       "01:27:23.865 --> 01:27:26.865\n",
+       "I you can do it on the web, but I.\n",
+       "\n",
+       "01:27:27.666 --> 01:27:30.666\n",
+       "My Internet was really slow. When I was doing this, you can also just download it.\n",
+       "\n",
+       "01:27:32.590 --> 01:27:35.590\n",
+       "I also just it crashed, I mean, like 50 times. But.\n",
+       "\n",
+       "01:27:35.896 --> 01:27:37.896\n",
+       "And it's taking forever to load.\n",
+       "\n",
+       "01:27:37.916 --> 01:27:38.916\n",
+       "Yeah.\n",
+       "\n",
+       "01:27:39.365 --> 01:27:40.365\n",
+       "I want this.\n",
+       "\n",
+       "01:27:40.018 --> 01:27:41.018\n",
+       "There we go!\n",
+       "\n",
+       "01:27:42.716 --> 01:27:44.716\n",
+       "Okay.\n",
+       "\n",
+       "01:27:45.317 --> 01:27:46.317\n",
+       "Open.\n",
+       "\n",
+       "01:27:49.916 --> 01:27:54.916\n",
+       "Yeah, it's like it's always breaking. Let's do a new one.\n",
+       "\n",
+       "01:27:59.367 --> 01:28:00.367\n",
+       "Delete sure.\n",
+       "\n",
+       "01:28:01.565 --> 01:28:03.565\n",
+       "Why are you bumping around.\n",
+       "\n",
+       "01:28:04.766 --> 01:28:05.766\n",
+       "And save.\n",
+       "\n",
+       "01:28:05.790 --> 01:28:07.790\n",
+       "Alright! So we have a new book.\n",
+       "\n",
+       "01:28:08.991 --> 01:28:09.991\n",
+       "Click, on.\n",
+       "\n",
+       "01:28:10.165 --> 01:28:12.165\n",
+       "If you click on data source, it brings you back home.\n",
+       "\n",
+       "01:28:12.168 --> 01:28:18.168\n",
+       "One of the things I just. I just don't like tableau, but it's like it doesn't even have an option for Csv, it says File.\n",
+       "\n",
+       "01:28:18.166 --> 01:28:19.166\n",
+       "It is.\n",
+       "\n",
+       "01:28:20.593 --> 01:28:22.593\n",
+       "Like a Csv is a text file, but like.\n",
+       "\n",
+       "01:28:24.168 --> 01:28:25.168\n",
+       "I'm just.\n",
+       "\n",
+       "01:28:30.766 --> 01:28:34.766\n",
+       "Where and like doesn't show so we are in Ctp.\n",
+       "\n",
+       "01:28:34.992 --> 01:28:37.992\n",
+       "We are in Ctp. 24.\n",
+       "\n",
+       "01:28:38.594 --> 01:28:40.594\n",
+       "Fridays, this.\n",
+       "\n",
+       "01:28:41.192 --> 01:28:42.192\n",
+       "Data.\n",
+       "\n",
+       "01:28:43.317 --> 01:28:44.317\n",
+       "Data.\n",
+       "\n",
+       "01:28:45.465 --> 01:28:46.465\n",
+       "Listing ces.\n",
+       "\n",
+       "01:28:47.566 --> 01:28:48.566\n",
+       "Alright, cool.\n",
+       "\n",
+       "01:28:50.166 --> 01:28:56.166\n",
+       "So this is how you import it. You can import multiple files and join them together through this user interface. But\n",
+       "\n",
+       "01:28:56.916 --> 01:29:02.916\n",
+       "I put in the thing I'm like cool. Let's go to sheet one, and I'm like cool. So this is the general.\n",
+       "\n",
+       "01:29:03.565 --> 01:29:05.565\n",
+       "Interface for it. But can I zoom.\n",
+       "\n",
+       "01:29:06.166 --> 01:29:07.166\n",
+       "I can't zoom.\n",
+       "\n",
+       "01:29:08.865 --> 01:29:10.865\n",
+       "I want to make it bigger, but I can't.\n",
+       "\n",
+       "01:29:11.565 --> 01:29:14.565\n",
+       "So you have your table, and you have different types of metrics.\n",
+       "\n",
+       "01:29:14.416 --> 01:29:17.416\n",
+       "Who can kind of tell me why these are separated.\n",
+       "\n",
+       "01:29:18.265 --> 01:29:20.265\n",
+       "Like above and below, like host site.\n",
+       "\n",
+       "01:29:19.016 --> 01:29:23.016\n",
+       "Cause one of our measures. The others are back. Oh, wait, no, never mind.\n",
+       "\n",
+       "01:29:23.598 --> 01:29:26.598\n",
+       "Well, you did you? I think somebody said this in their slack thing.\n",
+       "\n",
+       "01:29:26.466 --> 01:29:29.466\n",
+       "Who's who's put it in their plaque? Was it David?\n",
+       "\n",
+       "01:29:29.816 --> 01:29:31.816\n",
+       "To mention. He measures.\n",
+       "\n",
+       "01:29:30.616 --> 01:29:34.616\n",
+       "Yeah, dimensions and measures like these are numbers.\n",
+       "\n",
+       "01:29:35.515 --> 01:29:37.515\n",
+       "Right? Availability, 65.\n",
+       "\n",
+       "01:29:37.766 --> 01:29:41.766\n",
+       "You just like click on it. It's like a number. You can't really see it. Well, but like Price is a number.\n",
+       "\n",
+       "01:29:42.315 --> 01:29:43.315\n",
+       "But like neighborhood.\n",
+       "\n",
+       "01:29:44.266 --> 01:29:48.266\n",
+       "Is a thing, and like usually when I am new to something, I just start clicking.\n",
+       "\n",
+       "01:29:48.095 --> 01:29:52.095\n",
+       "I'm just like, okay. I don't even know what I did. Sum up price by availability. 65.\n",
+       "\n",
+       "01:29:52.216 --> 01:29:54.216\n",
+       "Okay. But let's delete this.\n",
+       "\n",
+       "01:29:54.766 --> 01:29:56.766\n",
+       "And the way you delete them is this.\n",
+       "\n",
+       "01:29:57.499 --> 01:29:58.499\n",
+       "So we have.\n",
+       "\n",
+       "01:29:58.066 --> 01:29:59.066\n",
+       "Well.\n",
+       "\n",
+       "01:30:00.265 --> 01:30:02.265\n",
+       "What? Okay?\n",
+       "\n",
+       "01:30:04.692 --> 01:30:07.692\n",
+       "I don't know what this happened. Here, let's just do a new sheet.\n",
+       "\n",
+       "01:30:07.196 --> 01:30:08.196\n",
+       "I don't know.\n",
+       "\n",
+       "01:30:07.495 --> 01:30:11.495\n",
+       "So if we go to back to the.\n",
+       "\n",
+       "01:30:13.015 --> 01:30:18.015\n",
+       "Here, and we go to the instructions.\n",
+       "\n",
+       "01:30:17.866 --> 01:30:20.866\n",
+       "So what we gotta do, we're gonna make a map first.st\n",
+       "\n",
+       "01:30:21.594 --> 01:30:24.594\n",
+       "So like I said before, it's like Latin, long.\n",
+       "\n",
+       "01:30:24.965 --> 01:30:27.965\n",
+       "Their numbers, and it's not quite things you have to.\n",
+       "\n",
+       "01:30:28.466 --> 01:30:30.466\n",
+       "Convert to a dimension. First.st\n",
+       "\n",
+       "01:30:31.366 --> 01:30:35.366\n",
+       "And then it's like, knows that this is a latitude, and this is a longitude.\n",
+       "\n",
+       "01:30:35.565 --> 01:30:37.565\n",
+       "And when you name them Latin long.\n",
+       "\n",
+       "01:30:39.366 --> 01:30:47.366\n",
+       "It usually will default or like LAT. Ln, like it'll it'll know it sometimes you have to specify if it's just like XY, or something.\n",
+       "\n",
+       "01:30:47.616 --> 01:30:56.616\n",
+       "So if you click on these things, and the main way, you click charts is like the show me button, which doesn't make any sense to me, but there should be certain things highlighted or like available.\n",
+       "\n",
+       "01:30:56.965 --> 01:31:00.965\n",
+       "So you click on one. And you're like this is the only thing is available right now.\n",
+       "\n",
+       "01:31:02.366 --> 01:31:06.366\n",
+       "And I'm like, alright cool like this is tracking for me. It's got it correct.\n",
+       "\n",
+       "01:31:07.166 --> 01:31:10.166\n",
+       "So, instead of having we did in python like.\n",
+       "\n",
+       "01:31:10.365 --> 01:31:12.365\n",
+       "Having to explicitly say.\n",
+       "\n",
+       "01:31:12.867 --> 01:31:13.867\n",
+       "Make.\n",
+       "\n",
+       "01:31:13.997 --> 01:31:14.997\n",
+       "Here, like.\n",
+       "\n",
+       "01:31:14.870 --> 01:31:19.870\n",
+       "We were like, make this blue. Make this red, make this thing.\n",
+       "\n",
+       "01:31:20.465 --> 01:31:21.465\n",
+       "It's like.\n",
+       "\n",
+       "01:31:20.894 --> 01:31:25.894\n",
+       "You just drag the thing you want to be a color, and like, put it in the color tab.\n",
+       "\n",
+       "01:31:27.266 --> 01:31:30.266\n",
+       "I was like Bam, if we did a neighborhood itself.\n",
+       "\n",
+       "01:31:31.915 --> 01:37:30.915\n",
+       "Cool.\n",
+       "\n",
+       "01:31:35.165 --> 01:31:37.165\n",
+       "If we did, this might break.\n",
+       "\n",
+       "01:31:40.016 --> 01:31:42.016\n",
+       "Yeah, it's like, don't do this.\n",
+       "\n",
+       "01:31:43.067 --> 01:31:44.067\n",
+       "Let's just close it.\n",
+       "\n",
+       "01:31:46.566 --> 01:31:49.566\n",
+       "Who could tell me? Like an improvement? Where, like, where's the most dense area.\n",
+       "\n",
+       "01:31:51.966 --> 01:31:54.966\n",
+       "Can't really tell right what would be a way to fix this.\n",
+       "\n",
+       "01:31:56.696 --> 01:31:57.696\n",
+       "Douglas.\n",
+       "\n",
+       "01:31:58.967 --> 01:32:03.967\n",
+       "Maybe reduce the it looks like the dots are really big the size of each dot.\n",
+       "\n",
+       "01:32:04.066 --> 01:32:06.066\n",
+       "That's a really good start.\n",
+       "\n",
+       "01:32:07.716 --> 01:32:08.716\n",
+       "Okay, we did that.\n",
+       "\n",
+       "01:32:09.017 --> 01:32:12.017\n",
+       "Who can tell me where the densest spot in Manhattan is.\n",
+       "\n",
+       "01:32:15.792 --> 01:32:17.792\n",
+       "There's another trick we can do.\n",
+       "\n",
+       "01:32:18.367 --> 01:32:22.367\n",
+       "What if 50 dots are stacked on top of each other? The exact same apartment building.\n",
+       "\n",
+       "01:32:21.615 --> 01:32:24.615\n",
+       "Can you use like the color stepping.\n",
+       "\n",
+       "01:32:25.216 --> 01:32:29.216\n",
+       "I don't know what that means, but you can use maybe. But you can also use.\n",
+       "\n",
+       "01:32:29.315 --> 01:32:30.315\n",
+       "We'll pass it.\n",
+       "\n",
+       "01:32:30.966 --> 01:32:36.966\n",
+       "And now you start to see cause if 50 dots are stacked on top of each other, like all really, really close.\n",
+       "\n",
+       "01:32:38.466 --> 01:32:43.466\n",
+       "And they're all 100% capacity. It doesn't make any difference. It'll look the same as one spot on the map.\n",
+       "\n",
+       "01:32:42.791 --> 01:32:45.791\n",
+       "But now, if you change the opacity.\n",
+       "\n",
+       "01:32:45.765 --> 01:32:47.765\n",
+       "They add up on top of each other.\n",
+       "\n",
+       "01:32:48.165 --> 01:32:51.165\n",
+       "Opacity is just like transparency, or how see it is.\n",
+       "\n",
+       "01:32:52.415 --> 01:32:54.415\n",
+       "So then you can see a much more clear picture.\n",
+       "\n",
+       "01:32:54.415 --> 01:32:56.415\n",
+       "You can create\n",
+       "\n",
+       "01:32:56.567 --> 01:32:57.567\n",
+       "Filters.\n",
+       "\n",
+       "01:32:57.997 --> 01:33:01.997\n",
+       "So if you do the filter, and just say you do, let's do price as a filter.\n",
+       "\n",
+       "01:33:04.216 --> 01:33:06.216\n",
+       "And then it'll be like all values.\n",
+       "\n",
+       "01:33:07.366 --> 01:33:09.366\n",
+       "0, 10,000. Okay.\n",
+       "\n",
+       "01:33:09.815 --> 01:33:13.815\n",
+       "And then you had the right click and say, Show filter.\n",
+       "\n",
+       "01:33:14.616 --> 01:33:19.616\n",
+       "You can be like, okay, I just want to see the ones that are like over $3,000.\n",
+       "\n",
+       "01:33:19.965 --> 01:33:22.965\n",
+       "It's not really good. Let's just say, under a hundred.\n",
+       "\n",
+       "01:33:23.065 --> 01:33:24.065\n",
+       "Under a thousand.\n",
+       "\n",
+       "01:33:23.665 --> 01:33:25.665\n",
+       "Right. I like.\n",
+       "\n",
+       "01:33:25.792 --> 01:33:26.792\n",
+       "Okay.\n",
+       "\n",
+       "01:33:28.017 --> 01:33:29.017\n",
+       "Okay.\n",
+       "\n",
+       "01:33:30.195 --> 01:33:33.195\n",
+       "It's better, or you can just select.\n",
+       "\n",
+       "01:33:33.415 --> 01:33:34.415\n",
+       "Brooklyn.\n",
+       "\n",
+       "01:33:33.966 --> 01:33:35.966\n",
+       "Which is now a filter.\n",
+       "\n",
+       "01:33:36.767 --> 01:33:40.767\n",
+       "But if you want to do, I think a better would be like neighborhood.\n",
+       "\n",
+       "01:33:46.196 --> 01:33:47.196\n",
+       "Block from last.\n",
+       "\n",
+       "01:33:55.016 --> 01:34:00.016\n",
+       "And we, well, we're just the story. Yeah. And you're like, cool. Just this.\n",
+       "\n",
+       "01:34:02.915 --> 01:34:04.915\n",
+       "Again. I don't know what these areas are.\n",
+       "\n",
+       "01:34:04.767 --> 01:34:08.767\n",
+       "Maybe because the size is too small or the UN opaque.\n",
+       "\n",
+       "01:34:11.466 --> 01:34:12.466\n",
+       "Breezy point.\n",
+       "\n",
+       "01:34:12.567 --> 01:34:13.567\n",
+       "Wrighton Beach.\n",
+       "\n",
+       "01:34:13.266 --> 01:34:16.266\n",
+       "Okay. Anyway, I'm going off of it. Rails.\n",
+       "\n",
+       "01:34:16.391 --> 01:34:18.391\n",
+       "Are there any questions on how to make a map?\n",
+       "\n",
+       "01:34:22.817 --> 01:34:24.817\n",
+       "Let's go.\n",
+       "\n",
+       "01:34:25.496 --> 01:34:27.496\n",
+       "Back to the thing.\n",
+       "\n",
+       "01:34:27.467 --> 01:34:33.467\n",
+       "Now we're gonna make histograms just like we did before. But way easier. So we're gonna make a new thing.\n",
+       "\n",
+       "01:34:35.716 --> 01:34:37.716\n",
+       "We want to do it by the neighborhood.\n",
+       "\n",
+       "01:34:36.866 --> 01:34:39.866\n",
+       "So we put the neighborhood.\n",
+       "\n",
+       "01:34:42.692 --> 01:34:44.692\n",
+       "Into columns.\n",
+       "\n",
+       "01:34:45.365 --> 01:34:47.365\n",
+       "I'm sorry. Neighborhood group.\n",
+       "\n",
+       "01:34:47.567 --> 01:34:48.567\n",
+       "Into columns.\n",
+       "\n",
+       "01:34:49.165 --> 01:34:50.165\n",
+       "And then we do.\n",
+       "\n",
+       "01:34:50.592 --> 01:34:55.592\n",
+       "The listings that Csv. Count, which is an account. It's just a count of everything.\n",
+       "\n",
+       "01:34:56.413 --> 01:35:00.413\n",
+       "And you put that into rows. I believe. So we're like, okay, we got this.\n",
+       "\n",
+       "01:35:00.916 --> 01:35:01.916\n",
+       "Right.\n",
+       "\n",
+       "01:35:08.593 --> 01:35:12.593\n",
+       "Drag neighbor group in the column and color. Okay, we did that. Coloring wouldn't.\n",
+       "\n",
+       "01:35:11.698 --> 01:35:13.698\n",
+       "Do anything.\n",
+       "\n",
+       "01:35:15.366 --> 01:35:16.366\n",
+       "Wait. How do we make them.\n",
+       "\n",
+       "01:35:16.666 --> 01:35:18.666\n",
+       "Histogram of each.\n",
+       "\n",
+       "01:35:22.066 --> 01:35:26.066\n",
+       "I'm doing the wrong thing. Oh, who knows what I'm doing wrong?\n",
+       "\n",
+       "01:35:26.096 --> 01:35:27.096\n",
+       "This step.\n",
+       "\n",
+       "01:35:26.866 --> 01:35:29.866\n",
+       "Remember, I was like, you gotta do something to it first.st\n",
+       "\n",
+       "01:35:32.595 --> 01:35:39.595\n",
+       "You gotta bin them right. It's just showing the count of all of it, and I want to show the price of each thing in each neighborhood.\n",
+       "\n",
+       "01:35:39.793 --> 01:35:41.793\n",
+       "So I click the price.\n",
+       "\n",
+       "01:35:41.965 --> 01:35:44.965\n",
+       "And I do create. And I do bins.\n",
+       "\n",
+       "01:35:44.892 --> 01:35:49.892\n",
+       "And the size of the bins like before. Remember, I'll show you like 10 versus a hundred.\n",
+       "\n",
+       "01:35:50.316 --> 01:35:53.316\n",
+       "It. I don't know why it's defaulting 324. Let's just do.\n",
+       "\n",
+       "01:35:54.365 --> 01:35:55.365\n",
+       "30. What does this say?\n",
+       "\n",
+       "01:35:55.317 --> 01:35:56.317\n",
+       "30.\n",
+       "\n",
+       "01:35:56.970 --> 01:35:57.970\n",
+       "30 bins.\n",
+       "\n",
+       "01:35:59.165 --> 01:36:00.165\n",
+       "Let's just get rid of this.\n",
+       "\n",
+       "01:36:04.466 --> 01:36:09.466\n",
+       "And now we put that into columns. So now I have this new thing. It even looks like a little histogram. The icon.\n",
+       "\n",
+       "01:36:09.496 --> 01:36:11.496\n",
+       "And I put that into columns.\n",
+       "\n",
+       "01:36:12.093 --> 01:36:13.093\n",
+       "And now I put the.\n",
+       "\n",
+       "01:36:13.865 --> 01:36:16.865\n",
+       "Count of each one into the rows.\n",
+       "\n",
+       "01:36:17.666 --> 01:36:19.666\n",
+       "To me. This doesn't really make much sense.\n",
+       "\n",
+       "01:36:19.765 --> 01:36:21.765\n",
+       "Like, why they do it like this.\n",
+       "\n",
+       "01:36:22.067 --> 01:36:26.067\n",
+       "I just play around till it starts making sense. And I'm like, Okay.\n",
+       "\n",
+       "01:36:26.094 --> 01:36:27.094\n",
+       "What took us?\n",
+       "\n",
+       "01:36:27.269 --> 01:36:29.269\n",
+       "This many lines.\n",
+       "\n",
+       "01:36:32.066 --> 01:36:33.066\n",
+       "In python.\n",
+       "\n",
+       "01:36:33.016 --> 01:36:38.016\n",
+       "Took us like just dragging and clicking, and I didn't even know what I was doing.\n",
+       "\n",
+       "01:36:37.766 --> 01:36:39.766\n",
+       "Kinda you know what I mean.\n",
+       "\n",
+       "01:36:40.165 --> 01:36:41.165\n",
+       "Like you can just.\n",
+       "\n",
+       "01:36:41.693 --> 01:36:42.693\n",
+       "Figure it out.\n",
+       "\n",
+       "01:36:43.265 --> 01:36:44.265\n",
+       "Until you get there.\n",
+       "\n",
+       "01:36:44.566 --> 01:36:47.566\n",
+       "Like in Python like you need to know exactly what you're doing, and it's.\n",
+       "\n",
+       "01:36:47.216 --> 01:36:49.216\n",
+       "Not easy, even when you do.\n",
+       "\n",
+       "01:36:50.366 --> 01:36:57.366\n",
+       "So like this is distribution of each one of the prices like for this one. Now you hover over it, and you're like, Oh, in this.\n",
+       "\n",
+       "01:36:56.966 --> 01:36:58.966\n",
+       "Group of.\n",
+       "\n",
+       "01:36:58.595 --> 01:37:00.595\n",
+       "I wish they said.\n",
+       "\n",
+       "01:37:01.016 --> 01:37:06.016\n",
+       "This the Bin dimension, but between 30 and 60 in queens.\n",
+       "\n",
+       "01:37:05.665 --> 01:37:07.665\n",
+       "This is the bin of 60 s.\n",
+       "\n",
+       "01:37:07.792 --> 01:37:09.792\n",
+       "Plus or minus, I guess 10.\n",
+       "\n",
+       "01:37:10.015 --> 01:37:12.015\n",
+       "Does. It should say it. There's 1,500 houses that are.\n",
+       "\n",
+       "01:37:11.815 --> 01:37:14.815\n",
+       "In this group that are cost $60.\n",
+       "\n",
+       "01:37:16.465 --> 01:37:17.465\n",
+       "From 40 to 60, or whatever.\n",
+       "\n",
+       "01:37:17.693 --> 01:37:20.693\n",
+       "Maybe 30 to 60, it should say the low and the high.\n",
+       "\n",
+       "01:37:23.266 --> 01:37:29.266\n",
+       "And in Manhattan there's 3, 4,000, or 3 to 35. Brooklyn is 4,300.\n",
+       "\n",
+       "01:37:31.093 --> 01:37:33.093\n",
+       "Now, let's do about any questions about this.\n",
+       "\n",
+       "01:37:31.617 --> 01:40:21.617\n",
+       "Detail.\n",
+       "\n",
+       "01:37:34.766 --> 01:37:35.766\n",
+       "So.\n",
+       "\n",
+       "01:37:38.166 --> 01:37:41.166\n",
+       "We were doing decent on time. Actually,\n",
+       "\n",
+       "01:37:41.616 --> 01:37:45.616\n",
+       "So sheet for up, up.\n",
+       "\n",
+       "01:37:46.315 --> 01:37:49.315\n",
+       "Box and whisker. This is interesting.\n",
+       "\n",
+       "01:37:58.566 --> 01:38:00.566\n",
+       "So what we want to do.\n",
+       "\n",
+       "01:38:05.966 --> 01:38:06.966\n",
+       "What?\n",
+       "\n",
+       "01:38:07.966 --> 01:38:12.966\n",
+       "Under marks that's interesting. So we wanna do the box and whisper.\n",
+       "\n",
+       "01:38:13.093 --> 01:38:14.093\n",
+       "Of.\n",
+       "\n",
+       "01:38:13.593 --> 01:38:15.593\n",
+       "We want to do this.\n",
+       "\n",
+       "01:38:17.793 --> 01:38:18.793\n",
+       "We want to do.\n",
+       "\n",
+       "01:38:18.896 --> 01:38:19.896\n",
+       "This.\n",
+       "\n",
+       "01:38:20.565 --> 01:38:22.565\n",
+       "We did that.\n",
+       "\n",
+       "01:38:22.895 --> 01:38:24.895\n",
+       "We want to do this.\n",
+       "\n",
+       "01:38:25.096 --> 01:38:28.096\n",
+       "Right? So I'm like, Okay, I want the price.\n",
+       "\n",
+       "01:38:28.666 --> 01:38:29.666\n",
+       "And then I want to make.\n",
+       "\n",
+       "01:38:29.093 --> 01:38:33.093\n",
+       "Find the outliers for each one using blocks and wizard per borough.\n",
+       "\n",
+       "01:38:34.392 --> 01:38:37.392\n",
+       "So that the way that you do that.\n",
+       "\n",
+       "01:38:36.866 --> 01:38:39.866\n",
+       "Drag neighborhood group into comms and color.\n",
+       "\n",
+       "01:38:39.866 --> 01:38:44.866\n",
+       "Drag Count to detail under marks. I don't understand that part.\n",
+       "\n",
+       "01:38:45.666 --> 01:38:47.666\n",
+       "Drag. Let's start by driving Price to rows.\n",
+       "\n",
+       "01:38:47.665 --> 01:38:49.665\n",
+       "I'm like, Okay.\n",
+       "\n",
+       "01:38:51.266 --> 01:38:56.266\n",
+       "Now, if we do show me, and we do box and whisker, I'm like, Okay.\n",
+       "\n",
+       "01:38:55.597 --> 01:39:00.597\n",
+       "This has it for all of them. But, like now, I wanna do columns.\n",
+       "\n",
+       "01:39:01.266 --> 01:39:03.266\n",
+       "Is this, and I'm like, Okay, this is.\n",
+       "\n",
+       "01:39:03.597 --> 01:39:07.597\n",
+       "Technically right, but like it looks so small here.\n",
+       "\n",
+       "01:39:08.265 --> 01:39:10.265\n",
+       "And it looks so much different.\n",
+       "\n",
+       "01:39:10.394 --> 01:39:12.394\n",
+       "Here? Who can tell me what?\n",
+       "\n",
+       "01:39:18.994 --> 01:39:20.994\n",
+       "Remember, like good and bad.\n",
+       "\n",
+       "01:39:21.566 --> 01:39:22.566\n",
+       "Axes and scales.\n",
+       "\n",
+       "01:39:26.267 --> 01:39:29.267\n",
+       "The price scale is like too small.\n",
+       "\n",
+       "01:39:30.116 --> 01:39:34.116\n",
+       "Close. It's this is linear and the other one's logarithmic.\n",
+       "\n",
+       "01:39:34.669 --> 01:39:35.669\n",
+       "Log, scale.\n",
+       "\n",
+       "01:39:35.998 --> 01:39:39.998\n",
+       "And like linear like, they're just so far apart that it's hard to tell.\n",
+       "\n",
+       "01:39:40.266 --> 01:39:42.266\n",
+       "So you can easily.\n",
+       "\n",
+       "01:39:42.267 --> 01:39:43.267\n",
+       "Edit, access.\n",
+       "\n",
+       "01:39:44.466 --> 01:39:45.466\n",
+       "Logarithmic.\n",
+       "\n",
+       "01:39:45.515 --> 01:39:46.515\n",
+       "And it looks.\n",
+       "\n",
+       "01:39:46.668 --> 01:39:47.668\n",
+       "Bigger.\n",
+       "\n",
+       "01:39:48.717 --> 01:39:49.717\n",
+       "Now.\n",
+       "\n",
+       "01:39:51.216 --> 01:40:01.216\n",
+       "Under marks. We want to have the oh, okay. So now, this is just showing the prices. But it's not actually counting each one. I believe so. If you put this and do this.\n",
+       "\n",
+       "01:40:02.415 --> 01:40:04.415\n",
+       "Under marks.\n",
+       "\n",
+       "01:40:05.466 --> 01:40:11.466\n",
+       "I'm sorry I'm just not that good at this. It's a really important skill to have. I just never had to do it.\n",
+       "\n",
+       "01:40:11.616 --> 01:40:14.616\n",
+       "Drag listings into detail.\n",
+       "\n",
+       "01:40:14.696 --> 01:40:15.696\n",
+       "And Dick.\n",
+       "\n",
+       "01:40:16.394 --> 01:40:18.394\n",
+       "Into detail under marks.\n",
+       "\n",
+       "01:40:22.396 --> 01:40:26.396\n",
+       "Delete.\n",
+       "\n",
+       "01:40:26.404 --> 01:44:37.404\n",
+       "Cool.\n",
+       "\n",
+       "01:40:28.716 --> 01:40:30.716\n",
+       "Listen. Yeah. Did that? Rev.\n",
+       "\n",
+       "01:40:31.015 --> 01:40:33.015\n",
+       "Drag price the rows.\n",
+       "\n",
+       "01:40:32.566 --> 01:40:34.566\n",
+       "They were grouped columns.\n",
+       "\n",
+       "01:40:34.665 --> 01:40:36.665\n",
+       "Sum of price. Rose.\n",
+       "\n",
+       "01:40:42.015 --> 01:40:43.015\n",
+       "I mean.\n",
+       "\n",
+       "01:40:44.366 --> 01:40:46.366\n",
+       "Was. I don't know what I'm doing around here.\n",
+       "\n",
+       "01:40:46.193 --> 01:40:50.193\n",
+       "I did 5 times already. I don't know why I'm spacing. Can anyone help me out.\n",
+       "\n",
+       "01:40:54.116 --> 01:40:57.116\n",
+       "Sum of price. I think that's incorrect.\n",
+       "\n",
+       "01:41:06.465 --> 01:41:08.465\n",
+       "I've done this like 10 times.\n",
+       "\n",
+       "01:41:08.566 --> 01:41:09.566\n",
+       "Already.\n",
+       "\n",
+       "01:41:10.077 --> 01:41:11.077\n",
+       "Or open, maybe.\n",
+       "\n",
+       "01:41:15.565 --> 01:41:18.565\n",
+       "Titanic baby names, listings.\n",
+       "\n",
+       "01:41:28.766 --> 01:41:33.766\n",
+       "Yeah. Someone circled the box and whispered, Plot on the show. Me tab to the right.\n",
+       "\n",
+       "01:41:33.966 --> 01:41:34.966\n",
+       "Yeah, I did that.\n",
+       "\n",
+       "01:41:35.316 --> 01:41:39.316\n",
+       "And it does this. And I'm like, Okay, cool. But now I want to buy neighborhood.\n",
+       "\n",
+       "01:41:40.415 --> 01:41:41.415\n",
+       "I'd remove this.\n",
+       "\n",
+       "01:41:42.615 --> 01:41:44.615\n",
+       "And then do it by neighborhood group.\n",
+       "\n",
+       "01:41:45.267 --> 01:41:46.267\n",
+       "It works.\n",
+       "\n",
+       "01:41:50.666 --> 01:41:52.666\n",
+       "But it's not. It's not like the thing should be bigger.\n",
+       "\n",
+       "01:41:53.415 --> 01:41:54.415\n",
+       "Anyway.\n",
+       "\n",
+       "01:41:55.065 --> 01:41:58.065\n",
+       "Man. I did this last class in like 5 seconds.\n",
+       "\n",
+       "01:41:59.266 --> 01:42:00.266\n",
+       "And I don't remember.\n",
+       "\n",
+       "01:42:01.094 --> 01:42:05.094\n",
+       "Like these. Box and whiskers should be way bigger.\n",
+       "\n",
+       "01:42:05.165 --> 01:42:08.165\n",
+       "Did anyone have any other? 64 chats.\n",
+       "\n",
+       "01:42:09.566 --> 01:42:16.566\n",
+       "You need your notes to put neighborhood group under color. Yeah, I mean, that's gonna change, not gonna change much. It's gonna change the color.\n",
+       "\n",
+       "01:42:17.265 --> 01:42:22.265\n",
+       "Maybe you could right click the sum of the price. See how it's calculating, and maybe by group or.\n",
+       "\n",
+       "01:42:17.415 --> 01:42:18.415\n",
+       "Looks like you just added.\n",
+       "\n",
+       "01:42:23.065 --> 01:42:25.065\n",
+       "Sum of price.\n",
+       "\n",
+       "01:42:24.066 --> 01:42:27.066\n",
+       "We like to measure how they're calculating it, or something like that.\n",
+       "\n",
+       "01:42:26.765 --> 01:42:29.765\n",
+       "Yeah, it. That's I believe.\n",
+       "\n",
+       "01:42:30.167 --> 01:42:31.167\n",
+       "Dimension.\n",
+       "\n",
+       "01:42:30.801 --> 01:42:32.801\n",
+       "Attribute.\n",
+       "\n",
+       "01:42:32.867 --> 01:42:33.867\n",
+       "Nope.\n",
+       "\n",
+       "01:42:34.967 --> 01:42:35.967\n",
+       "That mentions that.\n",
+       "\n",
+       "01:42:35.665 --> 01:42:37.665\n",
+       "I guess that mentioned, worked.\n",
+       "\n",
+       "01:42:38.195 --> 01:42:39.195\n",
+       "Interesting.\n",
+       "\n",
+       "01:42:40.066 --> 01:42:43.066\n",
+       "Yeah, I think we were supposed to. If we go back to the instructions.\n",
+       "\n",
+       "01:42:44.293 --> 01:42:47.293\n",
+       "Neighborhood Group's listings, calendar detail.\n",
+       "\n",
+       "01:42:47.095 --> 01:42:50.095\n",
+       "Drag price to rows. I believe that's.\n",
+       "\n",
+       "01:42:53.265 --> 01:42:54.265\n",
+       "Okay, well, that worked.\n",
+       "\n",
+       "01:42:56.216 --> 01:43:00.216\n",
+       "Interesting that that worked because it was treating it as.\n",
+       "\n",
+       "01:43:00.266 --> 01:43:01.266\n",
+       "Not.\n",
+       "\n",
+       "01:43:01.666 --> 01:43:05.666\n",
+       "I guess it was discreet, as discrete and not continuous. That must be the.\n",
+       "\n",
+       "01:43:04.815 --> 01:43:07.815\n",
+       "Issue like, if I go back to discrete.\n",
+       "\n",
+       "01:43:07.893 --> 01:43:08.893\n",
+       "Well.\n",
+       "\n",
+       "01:43:09.466 --> 01:43:10.466\n",
+       "That's not right.\n",
+       "\n",
+       "01:43:15.915 --> 01:43:17.915\n",
+       "Logarithmic. Okay?\n",
+       "\n",
+       "01:43:17.915 --> 01:43:23.915\n",
+       "Now I got the same thing, and now it's really good is in this chart. You can see. Okay in Manhattan.\n",
+       "\n",
+       "01:43:24.266 --> 01:43:27.266\n",
+       "The upper whisker is 4, 1,400.\n",
+       "\n",
+       "01:43:26.766 --> 01:43:28.766\n",
+       "The average.\n",
+       "\n",
+       "01:43:29.366 --> 01:43:31.366\n",
+       "The meeting, I'm sorry, is 60.\n",
+       "\n",
+       "01:43:30.665 --> 01:43:32.665\n",
+       "The upper hinge.\n",
+       "\n",
+       "01:43:33.865 --> 01:43:35.865\n",
+       "Is 6, 41 80.\n",
+       "\n",
+       "01:43:36.165 --> 01:43:39.165\n",
+       "And then you can find literally click on all the outliers.\n",
+       "\n",
+       "01:43:41.317 --> 01:43:42.317\n",
+       "There we go!\n",
+       "\n",
+       "01:43:43.116 --> 01:43:46.116\n",
+       "And that's it. Oh, and now we gotta make a dashboard.\n",
+       "\n",
+       "01:43:46.397 --> 01:43:50.397\n",
+       "Now we got all the sheets right. We got this sheet. We got this sheet. We got our map all that stuff.\n",
+       "\n",
+       "01:43:50.216 --> 01:43:54.216\n",
+       "Now you make a new dashboard, so it's as easy as new dashboard.\n",
+       "\n",
+       "01:43:54.366 --> 01:43:56.366\n",
+       "And then you just be like I want.\n",
+       "\n",
+       "01:43:56.065 --> 01:43:58.065\n",
+       "This as my 1st sheet.\n",
+       "\n",
+       "01:43:59.167 --> 01:44:00.167\n",
+       "Let's select all.\n",
+       "\n",
+       "01:44:01.116 --> 01:44:06.116\n",
+       "And I want. This is my 1st pain. This is my next. This is my last.\n",
+       "\n",
+       "01:44:05.893 --> 01:44:07.893\n",
+       "You can adjust the layout.\n",
+       "\n",
+       "01:44:10.566 --> 01:44:12.566\n",
+       "Let's just do make it like way bigger.\n",
+       "\n",
+       "01:44:13.915 --> 01:44:15.915\n",
+       "1,200.\n",
+       "\n",
+       "01:44:18.565 --> 01:44:19.565\n",
+       "800, by 1,200.\n",
+       "\n",
+       "01:44:23.995 --> 01:44:25.995\n",
+       "Maybe 12. Let's do.\n",
+       "\n",
+       "01:44:26.015 --> 01:44:27.015\n",
+       "1,900.\n",
+       "\n",
+       "01:44:28.793 --> 01:44:29.793\n",
+       "There we go much better.\n",
+       "\n",
+       "01:44:29.717 --> 01:44:34.717\n",
+       "My sister's calling me. I love my sister, but we can't talk well.\n",
+       "\n",
+       "01:44:38.366 --> 01:49:41.366\n",
+       "Right.\n",
+       "\n",
+       "01:44:38.495 --> 01:44:39.495\n",
+       "Any questions.\n",
+       "\n",
+       "01:44:42.167 --> 01:44:43.167\n",
+       "Great.\n",
+       "\n",
+       "01:44:43.465 --> 01:44:46.465\n",
+       "Tableau is a really powerful thing.\n",
+       "\n",
+       "01:44:48.315 --> 01:44:50.315\n",
+       "Most companies use it.\n",
+       "\n",
+       "01:44:51.366 --> 01:44:52.366\n",
+       "And it's a great.\n",
+       "\n",
+       "01:44:51.720 --> 01:44:55.720\n",
+       "Keyword, and it's a really easy way to do things quickly.\n",
+       "\n",
+       "01:44:56.567 --> 01:45:00.567\n",
+       "The more you use it the better you get. I've not experienced in using it. I use.\n",
+       "\n",
+       "01:44:59.970 --> 01:45:04.970\n",
+       "Google data studio. And the reason I use Google data is because.\n",
+       "\n",
+       "01:45:05.167 --> 01:45:08.167\n",
+       "It's tied into Google bigquery. Who knows what bigquery is.\n",
+       "\n",
+       "01:45:13.065 --> 01:45:15.065\n",
+       "If we just do this.\n",
+       "\n",
+       "01:45:15.215 --> 01:45:18.215\n",
+       "Real quick. I'll just show you really, really fast.\n",
+       "\n",
+       "01:45:19.501 --> 01:45:22.501\n",
+       "But but but they have a bunch of public data in here.\n",
+       "\n",
+       "01:45:23.665 --> 01:45:29.665\n",
+       "And just say, I just do names. I believe the data set is.\n",
+       "\n",
+       "01:45:34.095 --> 01:45:37.095\n",
+       "County names by census, bureau.\n",
+       "\n",
+       "01:45:41.366 --> 01:45:42.366\n",
+       "Nope.\n",
+       "\n",
+       "01:45:46.066 --> 01:45:50.066\n",
+       "Gosh, darn it, I'm just trying to get a U.S.A. names. Here we go.\n",
+       "\n",
+       "01:45:52.494 --> 01:45:56.494\n",
+       "I can preview this data. And I'm like, all right, cool.\n",
+       "\n",
+       "01:45:52.666 --> 01:45:53.666\n",
+       "1910, to current.\n",
+       "\n",
+       "01:45:57.066 --> 01:45:58.066\n",
+       "Then I can just.\n",
+       "\n",
+       "01:45:58.565 --> 01:46:04.565\n",
+       "Query. It export to a looker studio which is now dashboard.\n",
+       "\n",
+       "01:46:04.167 --> 01:46:05.167\n",
+       "And you can just.\n",
+       "\n",
+       "01:46:05.316 --> 01:46:13.316\n",
+       "Do it. And then you're gonna run SQL queries on it specifically to like filter data easily. But you need no sequel. So what we're gonna do now.\n",
+       "\n",
+       "01:46:13.416 --> 01:46:14.416\n",
+       "Is\n",
+       "\n",
+       "01:46:15.266 --> 01:46:19.266\n",
+       "We're gonna talk about what's due next week. And then we're gonna do breakout rooms.\n",
+       "\n",
+       "01:46:19.966 --> 01:46:21.966\n",
+       "To start the homework.\n",
+       "\n",
+       "01:46:21.765 --> 01:46:25.765\n",
+       "But it's really important to talk about what's due next week, because.\n",
+       "\n",
+       "01:46:25.766 --> 01:46:26.766\n",
+       "Of.\n",
+       "\n",
+       "01:46:27.397 --> 01:46:28.397\n",
+       "One thing I mean.\n",
+       "\n",
+       "01:46:28.665 --> 01:46:30.665\n",
+       "Because of this one fact.\n",
+       "\n",
+       "01:46:30.168 --> 01:46:31.168\n",
+       "Right.\n",
+       "\n",
+       "01:46:32.295 --> 01:46:34.295\n",
+       "So the biggest part of your homework.\n",
+       "\n",
+       "01:46:33.965 --> 01:46:35.965\n",
+       "Is to fill out this slide deck.\n",
+       "\n",
+       "01:46:36.366 --> 01:46:40.366\n",
+       "The slide deck is your project ideation slide deck. If you go to this.\n",
+       "\n",
+       "01:46:39.994 --> 01:46:41.994\n",
+       "I'll go through the things.\n",
+       "\n",
+       "01:46:42.466 --> 01:46:46.466\n",
+       "But each buddy, every person has their name in here. If your name is not in here.\n",
+       "\n",
+       "01:46:46.096 --> 01:46:50.096\n",
+       "Add a sheet with your name somehow. It just didn't get put in. It's fine.\n",
+       "\n",
+       "01:46:52.293 --> 01:46:53.293\n",
+       "And you got to fill it out.\n",
+       "\n",
+       "01:46:53.066 --> 01:46:57.066\n",
+       "With the specific things. And I want to preface this. And I'm gonna hit this home.\n",
+       "\n",
+       "01:46:57.095 --> 01:46:58.095\n",
+       "Is, that.\n",
+       "\n",
+       "01:46:58.496 --> 01:47:02.496\n",
+       "We're gonna fill this out because next class we're having like a matchmaking.\n",
+       "\n",
+       "01:47:02.095 --> 01:47:04.095\n",
+       "Process 2 form teams.\n",
+       "\n",
+       "01:47:05.866 --> 01:47:07.866\n",
+       "And then, after that matchmaking process, day.\n",
+       "\n",
+       "01:47:08.416 --> 01:47:14.416\n",
+       "You have a week to form a team or starting right now. You're supposed to form a team, and you have to have a team in 2 weeks.\n",
+       "\n",
+       "01:47:14.695 --> 01:47:17.695\n",
+       "You don't have to have a project full project idea.\n",
+       "\n",
+       "01:47:18.266 --> 01:47:19.266\n",
+       "But you have to have a team.\n",
+       "\n",
+       "01:47:18.966 --> 01:47:20.966\n",
+       "And the reason this is so important.\n",
+       "\n",
+       "01:47:20.665 --> 01:47:22.665\n",
+       "Is the number.\n",
+       "\n",
+       "01:47:22.666 --> 01:47:23.666\n",
+       "One.\n",
+       "\n",
+       "01:47:25.366 --> 01:47:27.366\n",
+       "The number one.\n",
+       "\n",
+       "01:47:27.016 --> 01:47:29.016\n",
+       "How do I say this correctly?\n",
+       "\n",
+       "01:47:30.266 --> 01:47:32.266\n",
+       "When you don't form your own team.\n",
+       "\n",
+       "01:47:31.866 --> 01:47:37.866\n",
+       "The chances of you not completing your project or your project being.\n",
+       "\n",
+       "01:47:37.965 --> 01:47:40.965\n",
+       "Not good, is very, very high.\n",
+       "\n",
+       "01:47:41.066 --> 01:47:43.066\n",
+       "So I want to say.\n",
+       "\n",
+       "01:47:43.096 --> 01:47:49.096\n",
+       "If you don't like. That's the number one characteristic of people that either don't complete it.\n",
+       "\n",
+       "01:47:50.066 --> 01:47:54.066\n",
+       "Or just have it really bad. One is that they don't form their own team.\n",
+       "\n",
+       "01:47:54.566 --> 01:47:55.566\n",
+       "It happens.\n",
+       "\n",
+       "01:47:55.966 --> 01:47:59.966\n",
+       "Usually once. It's twice a semester, and one of the classes like.\n",
+       "\n",
+       "01:47:59.766 --> 01:48:03.766\n",
+       "People won't find their own team. And I just you're like, all right.\n",
+       "\n",
+       "01:48:04.066 --> 01:48:07.066\n",
+       "These 3 people didn't form your own team. Guess what you're on a team now.\n",
+       "\n",
+       "01:48:06.695 --> 01:48:09.695\n",
+       "And without fit, not without fail, but.\n",
+       "\n",
+       "01:48:10.266 --> 01:48:16.266\n",
+       "Very high percentage of the time they don't complete their project, or it's really, if they do complete it, it's really bad.\n",
+       "\n",
+       "01:48:15.615 --> 01:48:20.615\n",
+       "So use this few weeks to like form a team.\n",
+       "\n",
+       "01:48:21.516 --> 01:48:25.516\n",
+       "And the reason we do this is because we have a show. You guys vibe with each other right?\n",
+       "\n",
+       "01:48:25.616 --> 01:48:26.616\n",
+       "So.\n",
+       "\n",
+       "01:48:27.766 --> 01:48:29.766\n",
+       "Here are the data sources.\n",
+       "\n",
+       "01:48:29.867 --> 01:48:30.867\n",
+       "Like.\n",
+       "\n",
+       "01:48:31.166 --> 01:48:35.166\n",
+       "This is the instruction. This is what a good one looks like. I filled out a good one.\n",
+       "\n",
+       "01:48:35.798 --> 01:48:36.798\n",
+       "Hey!\n",
+       "\n",
+       "01:48:36.994 --> 01:48:38.994\n",
+       "This is a project idea of mine.\n",
+       "\n",
+       "01:48:39.266 --> 01:48:42.266\n",
+       "It's like car image recognitions from car cameras.\n",
+       "\n",
+       "01:48:42.265 --> 01:48:43.265\n",
+       "I put.\n",
+       "\n",
+       "01:48:43.094 --> 01:48:48.094\n",
+       "Just my general interest again, like you wanna be able to connect with them on anything.\n",
+       "\n",
+       "01:48:48.596 --> 01:48:54.596\n",
+       "So I'm like, I like board games. I like serving. Andrew and I are bounding over that just because we're both servers and like that's like enough right.\n",
+       "\n",
+       "01:48:54.668 --> 01:48:55.668\n",
+       "Like.\n",
+       "\n",
+       "01:48:55.566 --> 01:48:59.566\n",
+       "I like text generation. I like radio lab, I like podcasts like.\n",
+       "\n",
+       "01:48:59.766 --> 01:49:06.766\n",
+       "Surfing weather models are cool. I'm really into back-end and maybe finding a backam and building a back end. AI.\n",
+       "\n",
+       "01:49:06.865 --> 01:49:08.865\n",
+       "Back end is like a really cool board game.\n",
+       "\n",
+       "01:49:09.199 --> 01:49:14.199\n",
+       "My current project idea. You have to put 2. I just put one here is classifying vehicles.\n",
+       "\n",
+       "01:49:13.895 --> 01:49:15.895\n",
+       "From pictures or videos.\n",
+       "\n",
+       "01:49:16.167 --> 01:49:20.167\n",
+       "For autonomous cars. Using the system. I probably found this somewhere.\n",
+       "\n",
+       "01:49:21.594 --> 01:49:24.594\n",
+       "And then you got to list at least 2 data sets that interest you.\n",
+       "\n",
+       "01:49:24.716 --> 01:49:26.716\n",
+       "I want to like.\n",
+       "\n",
+       "01:49:27.596 --> 01:49:32.596\n",
+       "Caveat that, like you, are no way committed to this project idea. Your project idea can be as.\n",
+       "\n",
+       "01:49:31.765 --> 01:49:34.765\n",
+       "Creative and out there as possible.\n",
+       "\n",
+       "01:49:34.965 --> 01:49:40.965\n",
+       "You got to put 2 projects, and in no way are you committed to it. It is going. It will probably change.\n",
+       "\n",
+       "01:49:41.518 --> 01:57:53.518\n",
+       "We'll.\n",
+       "\n",
+       "01:49:42.096 --> 01:49:44.096\n",
+       "But this is just to show what you're interested in.\n",
+       "\n",
+       "01:49:45.197 --> 01:49:51.197\n",
+       "And then you've got to find 2 data sets. They don't have to be related to your projects. They should be, but they don't.\n",
+       "\n",
+       "01:49:51.295 --> 01:49:53.295\n",
+       "That just interests you.\n",
+       "\n",
+       "01:49:53.366 --> 01:49:58.366\n",
+       "And when you put them down there, don't just link to it. Describe it, and why you liked it.\n",
+       "\n",
+       "01:49:59.294 --> 01:50:00.294\n",
+       "This is a data set.\n",
+       "\n",
+       "01:50:00.066 --> 01:50:02.066\n",
+       "Real and fake face detections.\n",
+       "\n",
+       "01:50:02.466 --> 01:50:07.466\n",
+       "Like detect identities. If it was photoshopped or good for social networks like this is why I liked it.\n",
+       "\n",
+       "01:50:07.266 --> 01:50:11.266\n",
+       "Don't just link to it, because that doesn't says nothing about why you liked it.\n",
+       "\n",
+       "01:50:14.066 --> 01:50:16.066\n",
+       "These are specific instructions.\n",
+       "\n",
+       "01:50:16.167 --> 01:50:17.167\n",
+       "Find your slide.\n",
+       "\n",
+       "01:50:17.165 --> 01:50:22.165\n",
+       "Fill out the bios, add it minimum 2 projects. You can do 50 project ideas. If you want.\n",
+       "\n",
+       "01:50:21.695 --> 01:50:22.695\n",
+       "I don't care.\n",
+       "\n",
+       "01:50:22.865 --> 01:50:24.865\n",
+       "At a minimum. 2 data sets.\n",
+       "\n",
+       "01:50:27.065 --> 01:50:29.065\n",
+       "Are there any questions about this.\n",
+       "\n",
+       "01:50:33.015 --> 01:50:35.015\n",
+       "Be prepared next week.\n",
+       "\n",
+       "01:50:35.617 --> 01:50:36.617\n",
+       "Do you talk about it?\n",
+       "\n",
+       "01:50:36.996 --> 01:50:41.996\n",
+       "I am going to say, whoever wants to talk about their project idea, I'm gonna give you.\n",
+       "\n",
+       "01:50:42.194 --> 01:50:44.194\n",
+       "One and a half to 2Ā min.\n",
+       "\n",
+       "01:50:44.016 --> 01:50:48.016\n",
+       "They talk about it. If nobody does it. I'm just gonna randomly go through slides.\n",
+       "\n",
+       "01:50:47.594 --> 01:50:49.594\n",
+       "So be prepared to talk about it.\n",
+       "\n",
+       "01:50:50.266 --> 01:50:52.266\n",
+       "And why am I doing this.\n",
+       "\n",
+       "01:50:54.066 --> 01:50:56.066\n",
+       "What's the number? One reason that.\n",
+       "\n",
+       "01:50:56.915 --> 01:50:59.915\n",
+       "Wasn't. What's my underlying mode of overall.\n",
+       "\n",
+       "01:51:00.020 --> 01:51:02.020\n",
+       "To get us a job.\n",
+       "\n",
+       "01:51:00.318 --> 01:51:01.318\n",
+       "Job.\n",
+       "\n",
+       "01:51:02.294 --> 01:51:03.294\n",
+       "That you guys.\n",
+       "\n",
+       "01:51:04.315 --> 01:51:06.315\n",
+       "Best way to get you guys a job, have a good project.\n",
+       "\n",
+       "01:51:06.565 --> 01:51:11.565\n",
+       "What's the number one way to have a bad project, or the number one characteristics of groups that.\n",
+       "\n",
+       "01:51:11.216 --> 01:51:13.216\n",
+       "Don't complete or have bad projects.\n",
+       "\n",
+       "01:51:14.116 --> 01:51:16.116\n",
+       "Not creating your own group.\n",
+       "\n",
+       "01:51:16.365 --> 01:51:17.365\n",
+       "Now creating your own group.\n",
+       "\n",
+       "01:51:18.017 --> 01:51:24.017\n",
+       "That's it just is. I'm not saying they're worse people. But like, that's what I have found over years of doing this.\n",
+       "\n",
+       "01:51:24.616 --> 01:51:26.616\n",
+       "The project team size is 3.\n",
+       "\n",
+       "01:51:27.567 --> 01:51:32.567\n",
+       "You can only work with people in this section. Friday, 1230.\n",
+       "\n",
+       "01:51:32.767 --> 01:51:39.767\n",
+       "The reason being is because every week now we're gonna do check-ins, and you're gonna work as a team in breakout rooms. I know right now it's random.\n",
+       "\n",
+       "01:51:40.165 --> 01:51:41.165\n",
+       "But that's it.\n",
+       "\n",
+       "01:51:40.865 --> 01:51:45.865\n",
+       "The other homework's pretty normal. Are there any questions I'm happy to talk about this.\n",
+       "\n",
+       "01:51:45.866 --> 01:51:52.866\n",
+       "Yeah. I already filled mine out like during our break. I don't know if you wanna look at it. It's definitely not as good as as you showed it.\n",
+       "\n",
+       "01:51:53.716 --> 01:51:55.716\n",
+       "But I mean I have my idea out there. If.\n",
+       "\n",
+       "01:51:54.896 --> 01:51:56.896\n",
+       "Where? Which slide are you.\n",
+       "\n",
+       "01:51:58.666 --> 01:51:59.666\n",
+       "I don't know the number.\n",
+       "\n",
+       "01:52:00.315 --> 01:52:01.315\n",
+       "I have it linked in.\n",
+       "\n",
+       "01:52:03.168 --> 01:52:04.168\n",
+       "This one.\n",
+       "\n",
+       "01:52:04.466 --> 01:52:06.466\n",
+       "I'm line 28, yeah.\n",
+       "\n",
+       "01:52:06.096 --> 01:52:08.096\n",
+       "Yeah, so I, I.\n",
+       "\n",
+       "01:52:07.996 --> 01:52:13.996\n",
+       "Video games working out music. Okay? Who? Everyone does add more details. Again.\n",
+       "\n",
+       "01:52:12.067 --> 01:52:13.067\n",
+       "Yeah.\n",
+       "\n",
+       "01:52:13.094 --> 01:52:15.094\n",
+       "It was. Yeah. It was before you went through it.\n",
+       "\n",
+       "01:52:15.315 --> 01:52:16.315\n",
+       "Yeah.\n",
+       "\n",
+       "01:52:15.866 --> 01:52:20.866\n",
+       "I would like to use machine learning to identify poker cards via camera.\n",
+       "\n",
+       "01:52:21.002 --> 01:52:23.002\n",
+       "Using supervised learning to help.\n",
+       "\n",
+       "01:52:24.266 --> 01:52:25.266\n",
+       "Playing cards.\n",
+       "\n",
+       "01:52:25.995 --> 01:52:27.995\n",
+       "Yeah, like.\n",
+       "\n",
+       "01:52:26.467 --> 01:52:27.467\n",
+       "Black.\n",
+       "\n",
+       "01:52:27.601 --> 01:52:28.601\n",
+       "Okay. Yeah.\n",
+       "\n",
+       "01:52:28.866 --> 01:52:30.866\n",
+       "Maybe a best word routine.\n",
+       "\n",
+       "01:52:30.715 --> 01:52:33.715\n",
+       "Okay, this is this, is.\n",
+       "\n",
+       "01:52:34.267 --> 01:52:35.267\n",
+       "Proficient.\n",
+       "\n",
+       "01:52:35.565 --> 01:52:38.565\n",
+       "This is proficient. This is like this passes.\n",
+       "\n",
+       "01:52:39.215 --> 01:52:41.215\n",
+       "You can go into more details. You need.\n",
+       "\n",
+       "01:52:41.366 --> 01:52:48.366\n",
+       "This is kind of maybe best workout routines that give the best results. I'm like, I don't really see it. But okay.\n",
+       "\n",
+       "01:52:47.766 --> 01:52:51.766\n",
+       "It's fine. Put it out there. And you guys again are no way. But yeah, this is fine.\n",
+       "\n",
+       "01:52:52.266 --> 01:52:53.266\n",
+       "So good work.\n",
+       "\n",
+       "01:52:53.016 --> 01:52:59.016\n",
+       "So what would make a good project idea? Would it be like? How specific our pitches? How would we determine that.\n",
+       "\n",
+       "01:52:59.168 --> 01:53:04.168\n",
+       "That's a really good thing. That's your pre-class homework, for next thing, are like, I gave you guys some.\n",
+       "\n",
+       "01:53:03.866 --> 01:53:06.866\n",
+       "Some things. Here, let's watch these 2 real quick.\n",
+       "\n",
+       "01:53:06.996 --> 01:53:08.996\n",
+       "This this guy's a.\n",
+       "\n",
+       "01:53:09.266 --> 01:53:17.266\n",
+       "G in in a data, science and education. Hold on. Let me share my screen real quick. I'll share my audio of my screen.\n",
+       "\n",
+       "01:53:17.866 --> 01:53:18.866\n",
+       "Share.\n",
+       "\n",
+       "01:53:18.915 --> 01:53:20.915\n",
+       "Desktop to share sound.\n",
+       "\n",
+       "01:53:22.915 --> 01:53:25.915\n",
+       "Can you guys hear it now?\n",
+       "\n",
+       "01:53:25.398 --> 01:53:50.398\n",
+       "What? Why, Anita, come up with a data Science portfolio project, and I've got not a clue. Really the best way to come up with data. Science portfolio projects is to do something first, st funny or functional projects that I typically take a look at in the 1st category are typically using state of the art or cutting edge. Technologies think things like Gpt, 3, 2, and clip funny projects don't seem like a given program.\n",
+       "\n",
+       "01:54:15.477 --> 01:54:20.477\n",
+       "You're working on hopefully, it's giving you some ideas. Any others drop. And below.\n",
+       "\n",
+       "01:54:22.067 --> 01:54:24.067\n",
+       "So like that's that's a thing. But like.\n",
+       "\n",
+       "01:54:24.966 --> 01:54:29.966\n",
+       "One the most and most importantly, he said, make sure that you're passionate about it.\n",
+       "\n",
+       "01:54:31.766 --> 01:54:32.766\n",
+       "And.\n",
+       "\n",
+       "01:54:32.617 --> 01:54:35.617\n",
+       "What I'm trying to get you to do is make things that pop.\n",
+       "\n",
+       "01:54:36.316 --> 01:54:39.316\n",
+       "Doing things. Something 1st is really hard.\n",
+       "\n",
+       "01:54:39.616 --> 01:54:41.616\n",
+       "Right, but funny or functional.\n",
+       "\n",
+       "01:54:41.316 --> 01:54:42.316\n",
+       "Great.\n",
+       "\n",
+       "01:54:41.665 --> 01:54:44.665\n",
+       "Who here knows who Rick Rubin is?\n",
+       "\n",
+       "01:54:44.816 --> 01:54:49.816\n",
+       "Ever heard of the name? Who here knows who Adele or Eminem is.\n",
+       "\n",
+       "01:54:49.715 --> 01:54:52.715\n",
+       "Ed Sheeran, the beasty boys.\n",
+       "\n",
+       "01:54:52.866 --> 01:55:00.866\n",
+       "He produced. Every major artist you've ever heard of ever he's their music producer, or one of the producers.\n",
+       "\n",
+       "01:55:01.015 --> 01:55:06.015\n",
+       "This is him talking about making music or art, and I kind of apply it to projects.\n",
+       "\n",
+       "01:55:11.367 --> 01:55:12.367\n",
+       "If that.\n",
+       "\n",
+       "01:55:14.816 --> 01:55:19.816\n",
+       "Last, and I believe that I'm not making it for them.\n",
+       "\n",
+       "01:55:19.666 --> 01:55:21.666\n",
+       "I'm making it for me.\n",
+       "\n",
+       "01:55:22.417 --> 01:55:23.417\n",
+       "And.\n",
+       "\n",
+       "01:55:23.315 --> 01:55:37.315\n",
+       "It turns out that when you make something truly the best thing you possibly can for the audience so much of why, if you go to the movies. So many big movies. Just not good.\n",
+       "\n",
+       "01:55:36.865 --> 01:55:38.865\n",
+       "It's because they're.\n",
+       "\n",
+       "01:55:38.690 --> 01:55:50.690\n",
+       "They're not being made by a person who cares about it. They're being made by people who are trying to make something that they think someone else is going to like. And that's not how art works.\n",
+       "\n",
+       "01:55:51.416 --> 01:55:52.416\n",
+       "So like.\n",
+       "\n",
+       "01:55:52.016 --> 01:55:56.016\n",
+       "What, Nick said the 1st like, make sure you're passionate about it.\n",
+       "\n",
+       "01:55:56.016 --> 01:56:00.016\n",
+       "And what Rick Rubin's like when you're making something.\n",
+       "\n",
+       "01:56:01.615 --> 01:56:03.615\n",
+       "For to that youth.\n",
+       "\n",
+       "01:56:04.666 --> 01:56:08.666\n",
+       "When you're making it in a way that like Oh, I think this other person's gonna like it.\n",
+       "\n",
+       "01:56:09.465 --> 01:56:10.465\n",
+       "It sucks.\n",
+       "\n",
+       "01:56:10.168 --> 01:56:14.168\n",
+       "Right, like transformers have, like a budget of like 10 billion dollars like.\n",
+       "\n",
+       "01:56:13.516 --> 01:56:16.516\n",
+       "And it sucked because they were all writing it to.\n",
+       "\n",
+       "01:56:17.116 --> 01:56:18.116\n",
+       "For it, to.\n",
+       "\n",
+       "01:56:18.165 --> 01:56:20.165\n",
+       "Because they think that the audience will like this.\n",
+       "\n",
+       "01:56:21.465 --> 01:56:23.465\n",
+       "But what busy decides saying like, make it for you.\n",
+       "\n",
+       "01:56:24.165 --> 01:56:25.165\n",
+       "And it's gonna come out like that.\n",
+       "\n",
+       "01:56:25.966 --> 01:56:30.966\n",
+       "Make it for your team, you know, like, and I want you guys to stress that because when you're passionate about it.\n",
+       "\n",
+       "01:56:31.715 --> 01:56:35.715\n",
+       "And you're doing it kind of like. Oh, I would like this.\n",
+       "\n",
+       "01:56:36.315 --> 01:56:37.315\n",
+       "That's like it shows.\n",
+       "\n",
+       "01:56:37.715 --> 01:56:40.715\n",
+       "So these are things I want to hit home with. You guys.\n",
+       "\n",
+       "01:56:41.415 --> 01:56:42.415\n",
+       "There are there!\n",
+       "\n",
+       "01:56:43.466 --> 01:56:48.466\n",
+       "There are other. These are another thing of his projects, of nix projects.\n",
+       "\n",
+       "01:56:50.116 --> 01:56:53.116\n",
+       "These are pretty hard, though it's like baseline ease, like.\n",
+       "\n",
+       "01:56:52.815 --> 01:56:54.815\n",
+       "It easy is like mad.\n",
+       "\n",
+       "01:56:54.516 --> 01:57:00.516\n",
+       "Impactful data science. This is a Ted Talk. You have to watch all these videos. I mean, it's not that long.\n",
+       "\n",
+       "01:57:01.264 --> 01:57:08.264\n",
+       "You have to do the exercise, the project ideation. And then a Linkedin post is to find a data science project that you like, or a dashboard.\n",
+       "\n",
+       "01:57:08.915 --> 01:57:09.915\n",
+       "Talk, about.\n",
+       "\n",
+       "01:57:11.764 --> 01:57:12.764\n",
+       "Why you like it, etc, etc.\n",
+       "\n",
+       "01:57:13.667 --> 01:57:15.667\n",
+       "Any questions before I send you guys the breakout rooms.\n",
+       "\n",
+       "01:57:17.865 --> 01:57:20.865\n",
+       "Did. I did answer your question. I forgot who said it.\n",
+       "\n",
+       "01:57:23.817 --> 01:57:24.817\n",
+       "Ayana, I think.\n",
+       "\n",
+       "01:57:24.815 --> 01:57:26.815\n",
+       "Douglas has a question.\n",
+       "\n",
+       "01:57:28.415 --> 01:57:29.415\n",
+       "I'm having.\n",
+       "\n",
+       "01:57:29.816 --> 01:57:32.816\n",
+       "I'm really struggling with the Lincoln part.\n",
+       "\n",
+       "01:57:33.069 --> 01:57:34.069\n",
+       "As.\n",
+       "\n",
+       "01:57:34.316 --> 01:57:40.316\n",
+       "I don't know it. It just feels every time I log on Linkedin, and I look at the beat, everything just feels cringe and.\n",
+       "\n",
+       "01:57:40.616 --> 01:57:44.616\n",
+       "When I post I'll cringe and it. I don't mean to be disrespectful.\n",
+       "\n",
+       "01:57:43.815 --> 01:57:46.815\n",
+       "No, no, I'm glad you're sharing your feeling.\n",
+       "\n",
+       "01:57:46.415 --> 01:57:47.415\n",
+       "I'm just. It's.\n",
+       "\n",
+       "01:57:49.665 --> 01:57:52.665\n",
+       "I don't know. I just link it and just feels like really weird to me.\n",
+       "\n",
+       "01:57:53.365 --> 01:57:54.365\n",
+       "And I'm really struggling with.\n",
+       "\n",
+       "01:57:54.166 --> 01:57:57.166\n",
+       "That's understandable. Right?\n",
+       "\n",
+       "01:57:54.364 --> 01:58:58.364\n",
+       "Them.\n",
+       "\n",
+       "01:57:57.115 --> 01:58:00.115\n",
+       "It's and it's a platform that's.\n",
+       "\n",
+       "01:58:01.316 --> 01:58:05.316\n",
+       "All about status and jobs and stuff like that. But unfortunately.\n",
+       "\n",
+       "01:58:05.766 --> 01:58:08.766\n",
+       "It's the best tool we have right now to get you guys a job.\n",
+       "\n",
+       "01:58:08.515 --> 01:58:10.515\n",
+       "This is why I'm doing it.\n",
+       "\n",
+       "01:58:10.515 --> 01:58:13.515\n",
+       "I know it's not enjoyable, but like.\n",
+       "\n",
+       "01:58:14.065 --> 01:58:18.065\n",
+       "A couple years ago. Yeah, you could apply to jobs anonymously, in a way.\n",
+       "\n",
+       "01:58:18.466 --> 01:58:21.466\n",
+       "And maybe get some traction that doesn't really work.\n",
+       "\n",
+       "01:58:21.715 --> 01:58:26.715\n",
+       "One less than 1% way, less than 1% success rate.\n",
+       "\n",
+       "01:58:26.863 --> 01:58:30.863\n",
+       "Linkedin is the way. That's how I hired, and Chris.\n",
+       "\n",
+       "01:58:31.216 --> 01:58:35.216\n",
+       "And Hussam, who was the other. That's how I hired everyone this cohort right now.\n",
+       "\n",
+       "01:58:36.165 --> 01:58:38.165\n",
+       "So that's the why I'm doing it. And.\n",
+       "\n",
+       "01:58:37.665 --> 01:58:41.665\n",
+       "My supreme you now, but you will warm up to it. Just give it a chance.\n",
+       "\n",
+       "01:58:42.163 --> 01:58:45.163\n",
+       "Let me start breakout rooms.\n",
+       "\n",
+       "01:58:44.664 --> 01:58:46.664\n",
+       "We can talk more about it if you want.\n",
+       "\n",
+       "01:58:47.515 --> 01:58:50.515\n",
+       "Actually let me 1st make Giorgio.\n",
+       "\n",
+       "01:58:50.515 --> 01:58:55.515\n",
+       "Co-host. Where did Georgios go? There he is. What up, Georgius, what do you? What do you got any advice for, like.\n",
+       "\n",
+       "01:58:55.569 --> 01:58:56.569\n",
+       "Like.\n",
+       "\n",
+       "01:58:56.764 --> 01:58:57.764\n",
+       "Project, Ide.\n",
+       "\n",
+       "01:58:56.865 --> 01:58:57.865\n",
+       "Or the.\n",
+       "\n",
+       "01:58:58.465 --> 01:59:03.465\n",
+       "Yeah. Take your time to find good actually.\n",
+       "\n",
+       "01:58:58.717 --> 01:59:59.717\n",
+       "Yeah.\n",
+       "\n",
+       "01:59:03.065 --> 01:59:12.065\n",
+       "1st I pick time to think, would you like? And what problem that is out there in the real world that you can apply data, science.\n",
+       "\n",
+       "01:59:12.164 --> 01:59:13.164\n",
+       "On it.\n",
+       "\n",
+       "01:59:12.865 --> 01:59:14.865\n",
+       "I.\n",
+       "\n",
+       "01:59:15.565 --> 01:59:20.565\n",
+       "And then B, and try to find the assets on that particular problem that you'd like to solve.\n",
+       "\n",
+       "01:59:20.065 --> 01:59:28.065\n",
+       "Through data science, but make good ideation very, very.\n",
+       "\n",
+       "01:59:27.817 --> 01:59:29.817\n",
+       "Detailed.\n",
+       "\n",
+       "01:59:29.964 --> 01:59:34.964\n",
+       "Because that's way the pets the like. Much of partners.\n",
+       "\n",
+       "01:59:34.915 --> 01:59:38.915\n",
+       "My slide, I remember, was like full of text.\n",
+       "\n",
+       "01:59:39.365 --> 01:59:49.365\n",
+       "On each like data set, like when I'm thinking to do and how to do it. But that's why we had the best group. And then there's the best product. That's why.\n",
+       "\n",
+       "01:59:49.516 --> 01:59:51.516\n",
+       "Take time to do it, rush through it.\n",
+       "\n",
+       "01:59:50.917 --> 01:59:54.917\n",
+       "Because if you make a team that sucks.\n",
+       "\n",
+       "01:59:55.264 --> 02:00:04.264\n",
+       "Then you're gonna suffer for the whole list. So try to make everything right from the beginning. Actually, this will, this one would be may maybe.\n",
+       "\n",
+       "02:00:00.167 --> 02:00:11.167\n",
+       "Yeah.\n",
+       "\n",
+       "02:00:04.517 --> 02:00:10.517\n",
+       "One of the most important, if nobody, the most important homework.\n",
+       "\n",
+       "02:00:09.616 --> 02:00:11.616\n",
+       "Yeah.\n",
+       "\n",
+       "02:00:11.316 --> 02:00:43.316\n",
+       "Say.\n",
+       "\n",
+       "02:00:11.764 --> 02:00:12.764\n",
+       "Personable.\n",
+       "\n",
+       "02:00:13.116 --> 02:00:17.116\n",
+       "Put time and don't make it like present. You don't have to worry about being presentable.\n",
+       "\n",
+       "02:00:17.016 --> 02:00:22.016\n",
+       "Make it just the more text the better I will zoom in on it. Don't! I'll make it so. We can read it.\n",
+       "\n",
+       "02:00:22.465 --> 02:00:26.465\n",
+       "So you've been assigned breakout rooms. Hop into it Georgia's and I will hop around.\n",
+       "\n",
+       "02:00:27.363 --> 02:00:31.363\n",
+       "You can start the. You can start the homework. You can talk about anything you want. You can do whatever you want.\n",
+       "\n",
+       "02:00:30.916 --> 02:00:31.916\n",
+       "So like.\n",
+       "\n",
+       "02:00:33.365 --> 02:00:37.365\n",
+       "But I recommend oh, the rule is also. Everyone must be sharing their screen at all times.\n",
+       "\n",
+       "02:00:39.866 --> 02:00:42.866\n",
+       "And everyone talking off mute and cameras on.\n",
+       "\n",
+       "02:00:48.265 --> 02:00:50.265\n",
+       "Double Zachary.\n",
+       "\n",
+       "02:00:50.115 --> 02:00:52.115\n",
+       "There's 2 of you in here.\n",
+       "\n",
+       "02:00:51.565 --> 02:00:54.565\n",
+       "Oh, yeah, my zoom keeps crashing. So that's probably.\n",
+       "\n",
+       "02:00:53.815 --> 02:00:56.815\n",
+       "Oh, yeah, the other dude is like, you want me to boot the other guy.\n",
+       "\n",
+       "02:00:56.316 --> 02:01:01.316\n",
+       "Yeah, my my evil clone. You could you could. My past clone.\n",
+       "\n",
+       "02:01:03.365 --> 02:01:05.365\n",
+       "Remove. This is gonna probably drop.\n",
+       "\n",
+       "02:01:05.516 --> 02:01:06.516\n",
+       "Oh, there we go!\n",
+       "\n",
+       "02:01:06.815 --> 02:01:08.815\n",
+       "Also I don't see the.\n",
+       "\n",
+       "02:01:09.163 --> 02:01:11.163\n",
+       "Breakout rooms. I think it's cause I just joined back.\n",
+       "\n",
+       "02:01:11.215 --> 02:01:17.215\n",
+       "Let me see, are you? Yeah, you're not assigned I will put you in.\n",
+       "\n",
+       "02:01:16.015 --> 02:01:21.015\n",
+       "Sorry about that. I don't know. I gotta fix it. Zoom just keeps crashing. I don't know how.\n",
+       "\n",
+       "02:01:21.715 --> 02:01:22.715\n",
+       "It'll just stop.\n",
+       "\n",
+       "02:01:22.015 --> 02:01:24.015\n",
+       "You know everything will stop. I don't know.\n",
+       "\n",
+       "02:01:24.416 --> 02:01:28.416\n",
+       "Highly recommend when you're before you start. Applying to jobs is like.\n",
+       "\n",
+       "02:01:28.715 --> 02:01:30.715\n",
+       "Make sure you have a reliable zoom.\n",
+       "\n",
+       "02:01:30.616 --> 02:01:36.616\n",
+       "It's specifically just zoom, though, like everything else like Google meets and all that's just zoom. But yeah.\n",
+       "\n",
+       "02:01:36.715 --> 02:01:37.715\n",
+       "Gotta figure that out.\n",
+       "\n",
+       "02:01:37.015 --> 02:01:40.015\n",
+       "You can get chromebooks, for, like 200 bucks.\n",
+       "\n",
+       "02:01:40.116 --> 02:01:43.116\n",
+       "And like, just be like, this is my zoom book. You know what I mean.\n",
+       "\n",
+       "02:01:40.166 --> 02:01:41.166\n",
+       "Yeah.\n",
+       "\n",
+       "02:01:42.365 --> 02:01:46.365\n",
+       "Yeah, yeah, seriously, I get like a Linux machine where I could just put zoom on there. Anyways. Yeah.\n",
+       "\n",
+       "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "if not len(item_metadata):\n", + " display_html(\"

(There are no items to display.)

\")\n", + "else:\n", + " any_metadata = next(iter(item_metadata))\n", + " raw_bytes = google_drive_service.read_file_by_id(any_metadata.id)\n", + " display_html(f\"

{any_metadata.name}

\"\n", + " f\"

MIME type: {any_metadata.mime_type}

\"\n", + " f\"

Last modified: {any_metadata.modified_time}

\")\n", + " display_html(f\"
{escape(raw_bytes.decode(\"utf-8\"))}
\"\n", + " if any_metadata.mime_type.startswith(\"text/\")\n", + " else f\"
{\"\\n\".join(wrap(raw_bytes.hex(), 32))}
\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb b/notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..579983d2ff6665deb94b30628898d5d0e07d529e --- /dev/null +++ b/notebooks/google_drive_web_vtt_vectorizer_and_storer.ipynb @@ -0,0 +1,585 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Google Drive WebVTT Vectorizer and Storer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:21:27.333\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.334\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.core.config\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m14\u001b[0m - \u001b[34m\u001b[1mCreated Settings\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.337\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.google_drive_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreated GoogleDriveService\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.361\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[34m\u001b[1mCreated EmbeddingsModelService\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.362\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vectorization_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m22\u001b[0m - \u001b[34m\u001b[1mCreated VectorizationService\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:21:27.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36minit\u001b[0m:\u001b[36m175\u001b[0m - \u001b[1mInitializing MongoDB connection for database: ctp_slack_bot\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.364\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m26\u001b[0m - \u001b[34m\u001b[1mCreated MongoDB\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.364\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m32\u001b[0m - \u001b[34m\u001b[1mConnecting to MongoDB using URI: mongodb+srv://ctp-slack-bot.xkipuvm.mongodb.net/?retryWrites=true&w=majority&appName=ctp-slack-bot\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.365\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mconnect\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mMongoDB client initialized for database: ctp_slack_bot\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.825\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36m_test_connection\u001b[0m:\u001b[36m186\u001b[0m - \u001b[1mMongoDB connection test successful!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:27.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m21\u001b[0m - \u001b[34m\u001b[1mCreated VectorDatabaseService\u001b[0m\n" + ] + } + ], + "source": [ + "from datetime import datetime\n", + "from functools import partial\n", + "from html import escape\n", + "from IPython.display import display_html\n", + "from itertools import chain\n", + "from textwrap import wrap\n", + "from zoneinfo import ZoneInfo\n", + "\n", + "from ctp_slack_bot.containers import Container\n", + "from ctp_slack_bot.models import WebVTTContent\n", + "\n", + "display_html = partial(display_html, raw=True)\n", + "\n", + "container = Container()\n", + "google_drive_service = container.google_drive_service()\n", + "vectorization_service = container.vectorization_service()\n", + "vector_database_service = container.vector_database_service()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configuration\n", + "\n", + "āš ļø Configure before running the code to avoid processing the wrong file type or re-uploading past files which were already uploaded." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "MIME_TYPE = \"text/vtt\" # This should probably not be changed.\n", + "\n", + "MODIFICATION_TIME_CUTOFF = datetime(2024, 8, 30, tzinfo=ZoneInfo(\"UTC\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

Found 7 files/folders.

" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "

7 files/folders pass the modification time (2024-08-30 00:00:00+00:00) cut-off.

" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "

7 files/folders pass the modification time (2024-08-30 00:00:00+00:00) cut-off and MIME type (text/vtt) criterion.

" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "item_metadata = google_drive_service.list_directory(\"\")\n", + "display_html(f\"

Found {len(item_metadata)} files/folders.

\")\n", + "display_html(\"\".join(chain(\"\")))\n", + "\n", + "recent_metadata = tuple(filter(lambda metadata: MODIFICATION_TIME_CUTOFF <= metadata.modified_time, item_metadata))\n", + "display_html(f\"

{len(item_metadata)} files/folders pass the modification time ({MODIFICATION_TIME_CUTOFF}) cut-off.

\")\n", + "display_html(\"\".join(chain(\"\")))\n", + "\n", + "metadata_to_process = tuple(filter(lambda metadata: metadata.mime_type == MIME_TYPE, recent_metadata))\n", + "display_html(f\"

{len(item_metadata)} files/folders pass the modification time ({MODIFICATION_TIME_CUTOFF}) cut-off and MIME type ({MIME_TYPE}) criterion.

\")\n", + "display_html(\"\".join(chain(\"\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Processed 7 files." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "web_vtts = tuple(WebVTTContent.from_bytes(f\"googledrive:{metadata.folder_path}/{metadata.name}\",\n", + " {\n", + " \"filename\": metadata.name,\n", + " \"mimeType\": metadata.mime_type,\n", + " \"modificationTime\": metadata.modified_time\n", + " },\n", + " google_drive_service.read_file_by_id(metadata.id))\n", + " for metadata\n", + " in metadata_to_process)\n", + "\n", + "display_html(f\"Processed {len(web_vtts)} files.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "Chunked Week-03-Analytics-Friday-2024-09-13.cc.vtt into 496 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:21:37.826\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 496 text string(s)…\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Vectorized Week-03-Analytics-Friday-2024-09-13.cc.vtt’s 496 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:21:42.297\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 496 chunks\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:42.319\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:42.320\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:42.340\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:42.341\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:42.360\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:42.360\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:42.380\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:42.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:42.505\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 496 documents into vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:48.862\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 496 vector chunks in database\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Stored Week-03-Analytics-Friday-2024-09-13.cc.vtt’s 496 vectorized chunks to the database." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Chunked Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt into 321 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:21:48.866\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 321 text string(s)…\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Vectorized Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt’s 321 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:21:52.629\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 321 chunks\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:52.652\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:52.652\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:52.671\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:52.672\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:52.691\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:52.691\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:52.712\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:52.829\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:52.831\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 321 documents into vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:21:58.227\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 321 vector chunks in database\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Stored Week-07-Regressors-via-Linear-Regression-Friday-2024-10-18.transcript.vtt’s 321 vectorized chunks to the database." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Chunked Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt into 337 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:21:58.231\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 337 text string(s)…\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Vectorized Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt’s 337 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:22:02.126\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 337 chunks\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:02.147\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:02.147\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:02.167\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:02.167\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:02.186\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:02.187\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:02.207\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:02.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:02.354\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 337 documents into vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:08.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 337 vector chunks in database\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Stored Week-06-Classifiers-via-Logistic-Regression-Friday-2024-10-11.transcript.vtt’s 337 vectorized chunks to the database." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Chunked Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt into 341 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:22:08.524\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 341 text string(s)…\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Vectorized Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt’s 341 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:22:12.675\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 341 chunks\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:12.712\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:12.712\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:12.731\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:12.731\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:12.750\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:12.751\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:12.773\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:12.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:12.926\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 341 documents into vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:18.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 341 vector chunks in database\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Stored Week-09-AI-Part-1-Neural-Networks-Intro-to-HuggingFace-Friday-2024-11-01.cc.vtt’s 341 vectorized chunks to the database." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Chunked Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt into 378 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:22:18.360\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 378 text string(s)…\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Vectorized Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt’s 378 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:22:21.808\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 378 chunks\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:21.841\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:21.841\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:21.873\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:21.874\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:21.894\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:21.894\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:21.914\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:22.029\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:22.035\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 378 documents into vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:28.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 378 vector chunks in database\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Stored Week-08-Decision-Trees-Random-Forest-Tuesday-2024-10-22.cc.vtt’s 378 vectorized chunks to the database." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Chunked Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt into 680 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:22:28.113\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 680 text string(s)…\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Vectorized Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt’s 680 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:22:34.652\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 680 chunks\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:34.671\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:34.671\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:34.705\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:34.705\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:34.720\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:34.720\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:34.740\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:34.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:34.866\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 680 documents into vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:43.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 680 vector chunks in database\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Stored Week-02-Finding-Cleaning-Data-Friday-2024-09-06.vtt’s 680 vectorized chunks to the database." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "Chunked Week-01-Setup-Pandas-Friday-2024-08-30.vtt into 742 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:22:43.438\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.embeddings_model_service\u001b[0m:\u001b[36mget_embeddings\u001b[0m:\u001b[36m36\u001b[0m - \u001b[34m\u001b[1mCreating embeddings for 742 text string(s)…\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Vectorized Week-01-Setup-Pandas-Friday-2024-08-30.vtt’s 742 chunks." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2025-04-19 19:22:50.402\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m38\u001b[0m - \u001b[34m\u001b[1mGetting vectors collection for storing 742 chunks\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:50.426\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:50.426\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:50.452\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:50.452\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m42\u001b[0m - \u001b[34m\u001b[1mCreating vector search index for vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:50.475\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mping\u001b[0m:\u001b[36m85\u001b[0m - \u001b[34m\u001b[1mMongoDB connection is active!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:50.475\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m106\u001b[0m - \u001b[34m\u001b[1mChecking if collection 'vectors' exists…\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:50.508\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mget_collection\u001b[0m:\u001b[36m115\u001b[0m - \u001b[34m\u001b[1mCollection 'vectors' already exists!\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:50.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.db.mongo_db\u001b[0m:\u001b[36mcreate_indexes\u001b[0m:\u001b[36m153\u001b[0m - \u001b[1mVector search index 'vectors_vector_index' created for collection vectors.\u001b[0m\n", + "\u001b[32m2025-04-19 19:22:50.626\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m62\u001b[0m - \u001b[34m\u001b[1mInserting 742 documents into vectors collection\u001b[0m\n", + "\u001b[32m2025-04-19 19:23:01.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mctp_slack_bot.services.vector_database_service\u001b[0m:\u001b[36mstore\u001b[0m:\u001b[36m64\u001b[0m - \u001b[1mStored 742 vector chunks in database\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "Stored Week-01-Setup-Pandas-Friday-2024-08-30.vtt’s 742 vectorized chunks to the database." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for web_vtt in web_vtts:\n", + " chunks = web_vtt.get_chunks()\n", + " display_html(f\"Chunked {web_vtt.get_metadata().get(\"filename\")} into {len(chunks)} chunks.\")\n", + " vectorized_chunks = vectorization_service.vectorize(chunks)\n", + " display_html(f\"Vectorized {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} chunks.\")\n", + " await (await vector_database_service).store(vectorized_chunks)\n", + " display_html(f\"Stored {web_vtt.get_metadata().get(\"filename\")}’s {len(vectorized_chunks)} vectorized chunks to the database.\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/web_vtt.ipynb b/notebooks/web_vtt.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..12a005c3fd4b78b38bce6acf2fd9992f6141a227 --- /dev/null +++ b/notebooks/web_vtt.ipynb @@ -0,0 +1,355 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# WebVTT Reading and Chunking Test" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pure `webvtt-py` as Proof-of-concept" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime, timedelta\n", + "from functools import partial\n", + "from html import escape\n", + "from io import BytesIO\n", + "from IPython.display import display_html\n", + "from itertools import chain\n", + "import re\n", + "from webvtt import Caption, WebVTT\n", + "from webvtt.models import Timestamp\n", + "from zoneinfo import ZoneInfo\n", + "\n", + "display_html = partial(display_html, raw=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FILE_PATH = \"GMT20250411-223535_Recording.transcript.vtt\"\n", + "TIME_ZONE = ZoneInfo(\"America/New_York\")\n", + "BASE_TIME = datetime(2025, 4, 11, hour=22, minute=35, second=35, tzinfo=ZoneInfo(\"GMT\")).astimezone(TIME_ZONE)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "with open(FILE_PATH, \"rb\") as file:\n", + " web_vtt = WebVTT.from_buffer(BytesIO(file.read()))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display_html(\"\".join(chain(\"\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " Caption #344\n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "speaker_speech_pattern = re.compile(\"(?:([^:]+): )?(.*)\")\n", + "\n", + "match web_vtt.captions[343]:\n", + " case Caption(identifier=identifier, start_time=start_time, end_time=end_time, text=text):\n", + " match speaker_speech_pattern.search(text).groups():\n", + " case (speaker, speech):\n", + " display_html(f\"\"\"\n", + " Caption #{identifier}\n", + " \n", + " \"\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Chunking\n", + "\n", + "In order for chunking to produce bits with useful context, we must not only use the caption (frame) itself, but bundle it with its surrounding frames (before and after messages)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from more_itertools import windowed" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "CHUNK_FRAMES_OVERLAP = 1\n", + "CHUNK_FRAMES_WINDOW = 5" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "items = tuple(chr(code_point) for code_point in range(ord('A'), ord('[')))\n", + "display_html(f\"{\"\".join(map(\"\".format, items))}
{}
\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
ABCDE
EFGHI
IJKLM
MNOPQ
QRSTU
UVWXY
YZ
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "chunks = tuple(windowed(items, CHUNK_FRAMES_WINDOW, step=(CHUNK_FRAMES_WINDOW - CHUNK_FRAMES_OVERLAP)))\n", + "display_html(f\"{\"\".join(f\"{\"\".join(f\"\" for item in chunk)}\" for chunk in chunks)}
{item if item else \"\"}
\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using the `WebVTTFile` Class" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from hashlib import sha256\n", + "from zoneinfo import ZoneInfo\n", + "\n", + "from ctp_slack_bot.models import WebVTTContent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FILE_PATH = \"GMT20250411-223535_Recording.transcript.vtt\"\n", + "TIME_ZONE = ZoneInfo(\"America/New_York\")\n", + "MODIFICATION_TIME = datetime(2025, 4, 11, hour=22, minute=35, second=35, tzinfo=ZoneInfo(\"GMT\")).astimezone(TIME_ZONE)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "with open(FILE_PATH, \"rb\") as file:\n", + " bytes = file.read()\n", + " web_vtt_content = WebVTTContent.from_bytes(sha256(bytes).hexdigest(), {\"modification_time\": MODIFICATION_TIME}, bytes)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(Chunk(text=\"iyeshia: For the workshop. We want to set you up.\\n\\niyeshia: Thank you, Kevin, for a question. We want to set you up for success in year one. And so this workshop is to help you kind of like\\n\\niyeshia: figure out, or how to adjust, as you're coming into your careers what to expect like your 30 days of work, 60 days of work, 90 days of work when you are starting your full time roles. So with that, said, let us get started.\\n\\niyeshia: So the topic, of course, is going to be discussing things of like the onboarding process of what it looks like when you start your jobs. How to maneuver or move around in your workplace environments. We'll discuss negotiating raises, because last time we didn't negotiating offers. So now we pass that you already got the offer. So now we'd be at the\\n\\niyeshia: the race card after that year. Don't try to come into your job already. 5 days in somebody to raise. Wait, and then from there we'll do activity on asking for feedback when you have, like your supervisor or manager, and you want to discuss things like that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='1-5', metadata={'start': datetime.timedelta(0), 'end': datetime.timedelta(seconds=60, microseconds=379000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: the race card after that year. Don't try to come into your job already. 5 days in somebody to raise. Wait, and then from there we'll do activity on asking for feedback when you have, like your supervisor or manager, and you want to discuss things like that.\\n\\niyeshia: So let's kick it off with the onboarding process.\\n\\niyeshia: So with this, what you can expect ideally when you start your your job. There could be some type of welcome package. They might have a folder. They might have an email electronically or things like that. But it's gonna describe the details of like the company's environment. What your 1st day, or your 1st week or 1st month, a couple of months, might look like. As you're starting your onboarding process and the paperwork they might even show with you on the 1st day\\n\\niyeshia: work. You might be paired up with a Buddy or other people who might be hired at the same day, or maybe someone who was hired a year before, and they might be shadowing you to help you join and to get comfortable with your work environment.\\n\\niyeshia: and then also, your manager will. Hopefully, our supervisor would let you know what to expect. As you're starting your new\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='5-9', metadata={'start': datetime.timedelta(seconds=45, microseconds=930000), 'end': datetime.timedelta(seconds=108, microseconds=640000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: and then also, your manager will. Hopefully, our supervisor would let you know what to expect. As you're starting your new\\n\\niyeshia: job or career, and then from there, if you're unsure about your onboarding process as you're starting off, please ask questions to your manager or supervisor. The best part is to ask as many questions as you can. You're new, you're learning. They understand that. So they want to hear from you and your input\\n\\niyeshia: from there, I would say, I'm just looking at the\\n\\niyeshia: the chat. Yes, prepare for a lot of paperwork. Yes, I mean W. 2 W. Fours. They might have you fill out all those things. And that was 2. Okay, all right, Kevin.\\n\\niyeshia: So from there we'll kick it off. So an idea of what that could look like for you from 30 days to 60 days to 90 days to infinity and beyond like buzz light year, but from there you would hopefully to have intros with your your team, your manager, different departments. When you're starting\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='9-13', metadata={'start': datetime.timedelta(seconds=102, microseconds=82000), 'end': datetime.timedelta(seconds=166, microseconds=199000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: So from there we'll kick it off. So an idea of what that could look like for you from 30 days to 60 days to 90 days to infinity and beyond like buzz light year, but from there you would hopefully to have intros with your your team, your manager, different departments. When you're starting\\n\\niyeshia: they'll go over etiquette with you of like what you can expect. At the job that can include your attire, your desk hygiene communication, checking in with managers or teams.\\n\\niyeshia: Once you, after the 30 days we get to maybe days, 60 days, and then you're able to develop like your needs. Gain a better understanding of the company, develop plans and deliverables and outcomes. And then you go into your 90 days of being on the job where you're kind of learning your role. You're kind of getting adjust, you're being more effective and being becoming more independent.\\n\\niyeshia: And then from there you be able to understand, like, after the 90 days that you're kind of like settled in maybe months 4 to 6, or maybe the whole year. You should be settled into your role, understanding what's going on understanding how different departments move and things like that. So this is just the overview of what that looks like. It's not necessarily concrete, because every job is different.\\n\\niyeshia: But this is just to give an idea of what you can expect of that. And please just be mindful like with every workshop. I'm definitely going to send you the Powerpoint at the end. So if you want to look over that on your own time, you definitely can.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='13-17', metadata={'start': datetime.timedelta(seconds=147, microseconds=8000), 'end': datetime.timedelta(seconds=233, microseconds=730000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: But this is just to give an idea of what you can expect of that. And please just be mindful like with every workshop. I'm definitely going to send you the Powerpoint at the end. So if you want to look over that on your own time, you definitely can.\\n\\niyeshia: And so now that we've got through the onboarding process, this is probably the quickest we've done onboarding process because Kevin did it in 2 weeks. So from there we are going to move to navigating the workplace environment.\\n\\niyeshia: And so with that said, some things that are really important in your workplace environment is building relationships. Whether that's with your peers, your colleagues. Your manager. Trying to have a mentor mentee connection. All relationships are important.\\n\\niyeshia: With that I would say that when it comes to identifying your relationship needs, you want to know what you're expecting like, what? How do you need to show up in your role. What do you need from others? Understanding those type of things can help build better, I would say. Connections with your teammates and things of that nature when it's time to like cover problems or solve projects and things like that.\\n\\niyeshia: Another thing, too, you want to focus on is your Eiq. Emotional intelligence and communication that is basically pretty much helpful on the ability of recognizing your own emotions. Are you adequate enough, or know where your emotions are where you can get things done, what you need, what you don't need? Can you articulate that to your employer when you know those you can be able to identify and handle your emotions.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='17-21', metadata={'start': datetime.timedelta(seconds=220, microseconds=406000), 'end': datetime.timedelta(seconds=315, microseconds=170000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Another thing, too, you want to focus on is your Eiq. Emotional intelligence and communication that is basically pretty much helpful on the ability of recognizing your own emotions. Are you adequate enough, or know where your emotions are where you can get things done, what you need, what you don't need? Can you articulate that to your employer when you know those you can be able to identify and handle your emotions.\\n\\niyeshia: And you can add basically help also to learn how to understand and help others. As well.\\n\\niyeshia: Another thing, as far as building relationships goes, is practicing, mindful listening. So the best way to truly listen is to talk less, and of course to understand more. And so when you learn from your teammates, listen as much as you can gain as much knowledge as you can from others, and that's gonna help you kinda conduct, or, you know, be a better team player. In your work environment.\\n\\niyeshia: And then a few things that you can do is\\n\\niyeshia: another way to help build a relationship is manager boundaries, you know, saying what is for you, scheduling time? With colleagues trying not to go over certain tasks or assignments. So that time management is gonna definitely help when you want to focus on your boundaries and you want to set schedules to maybe build connections with your team, and these are ways that you can go about it. Introduce yourself to people, whether your peers, whether it's\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='21-25', metadata={'start': datetime.timedelta(seconds=288, microseconds=600000), 'end': datetime.timedelta(seconds=376, microseconds=110000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: another way to help build a relationship is manager boundaries, you know, saying what is for you, scheduling time? With colleagues trying not to go over certain tasks or assignments. So that time management is gonna definitely help when you want to focus on your boundaries and you want to set schedules to maybe build connections with your team, and these are ways that you can go about it. Introduce yourself to people, whether your peers, whether it's\\n\\niyeshia: I don't care if it's a janitor security. The Cfo treat everybody equal and the same. And get to know. Get to know people because you just never know when you're going to need someone or work with someone. During that time.\\n\\niyeshia: And so those are the ways you can go about it. Greet people. You can invite people to coffee breaks, do quick message, check-in, and things of that nature, and then from there the 6 or 7 1, i think, are really important in the workplace environment. Some of the things you want to do is show gratitude, embrace others, give.\\n\\niyeshia: you know, credit where credit is due. Don't try to take anybody's ideas. If it comes to projects and things like that, that is a serious no-no show gratitude, and by any means necessary, try to avoid any gossip, any issues with office politics stay out of it. This is your first.st\\n\\niyeshia: This might be your 1st real like role, as far as like full time. In your career. So you just want to make sure you just keep in the peace and be respectful from there. Gossiping is kind of a big deal and a big no-no as well. So just be mindful of that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='25-29', metadata={'start': datetime.timedelta(seconds=351, microseconds=10000), 'end': datetime.timedelta(seconds=438, microseconds=590000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: This might be your 1st real like role, as far as like full time. In your career. So you just want to make sure you just keep in the peace and be respectful from there. Gossiping is kind of a big deal and a big no-no as well. So just be mindful of that.\\n\\niyeshia: So the next thing, as far as we're talking about building relationship goals, you definitely want to also build those relationships, as I stated, with your peers. And things like that. Your coworkers? But you want to make sure you build a relationship with your manager. And just remember that it's important to have a relationship with your manager. But that's not the only relationship that's like you should focus on, you know. Like, I said before, you want to be a team play. You want to treat everybody equally because you just never know who you connect with.\\n\\niyeshia: But when it comes to that manager time, or asking for I would say, supervisions or meetings with them. You can ask questions. Those are always encouraged. You can ask them about their you know, supervisor style. Are they transformative? Are they hands on?\\n\\niyeshia: Do they like feedback directly towards them? Is everything written email? How are they? What's their work? Style? You can even ask them for the expectations of what is this like in a role like, what are your expectations, as far as how you show up in your role to them? And what are they looking for like with the measurements of success. Of course we always tell fellows to document everything that you do, as far as like when it comes to any goals that you bring any success.\\n\\niyeshia: rate, that you have many tasks that you might have brought to the table any of your accomplishments I know some people carry, or they write down like a accomplishment form of all the things that they've done, which, while they were at work to help with the ideas of what they bring to the table when it's time to come up for that, raise negotiation process. So just make sure you also update your resume as we go along, too.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='29-33', metadata={'start': datetime.timedelta(seconds=424, microseconds=830000), 'end': datetime.timedelta(seconds=536, microseconds=219000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: rate, that you have many tasks that you might have brought to the table any of your accomplishments I know some people carry, or they write down like a accomplishment form of all the things that they've done, which, while they were at work to help with the ideas of what they bring to the table when it's time to come up for that, raise negotiation process. So just make sure you also update your resume as we go along, too.\\n\\niyeshia: and then to talk with your manager about not only your successes and what you accomplish, but maybe areas of where you can grow and what you've been struggling to focus on so they can help support you with that as well.\\n\\niyeshia: Be observant in meetings when you're meeting with your team and other people. So that way you could learn about what else is going on, or whatever what everybody else is doing. So you can see how things work together. If you want to connect and socialize, you can ask people to lunch or coffee chats and things like that, and then always just remain proactive. You know it's always a good gesture to ask for teammate. It's like, Hey, is there anything you need before you know the end of the day? Or before I'm about to leave. You know things like that. It's always\\n\\niyeshia: helpful, too, because you never know when it's like your time, and someone is asking or offering help to you. And you're like, Oh, yeah, definitely need help with this. So it's always great to return their favor.\\n\\niyeshia: And so\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='33-37', metadata={'start': datetime.timedelta(seconds=511, microseconds=850000), 'end': datetime.timedelta(seconds=589, microseconds=330000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: And so\\n\\niyeshia: from there I would say, overall in regards of meeting with your supervisor, depending on how they do it. It could be quarterly it could be every other month. It could be 3 times throughout the year. They have a performance review. And so some companies like to start with, maybe January, you start, or maybe June, you started\\n\\niyeshia: working with them, and you track goals and what you could accomplish. With your manager until, like the next meeting, you have to go over just to make sure that you're on track with your goals throughout the throughout the year, as you've been working with your with your company.\\n\\niyeshia: That you got hired by, and so sometimes they'll do like a mid year review report to see your progress. If there's any touch points they could assist you with or support you with. You can meet with them with one on one meetings. If you feel like that's too long, and you want to make suggestions to meet with them sooner. Maybe you want to do every 3 months\\n\\niyeshia: just to see what's going on and how you can stay on track, and so I would say. Performance reviews, I guess, could be nerve wracking if it's like your 1st time, because you don't know what to expect.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='37-41', metadata={'start': datetime.timedelta(seconds=587, microseconds=800000), 'end': datetime.timedelta(seconds=654, microseconds=640000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: just to see what's going on and how you can stay on track, and so I would say. Performance reviews, I guess, could be nerve wracking if it's like your 1st time, because you don't know what to expect.\\n\\niyeshia: but of course you'll get used to it. As it progresses. But then, of course, you're still maintaining those connections with your supervisor, so you can definitely ask them questions of what you can expect from a performance review and things like that.\\n\\niyeshia: I'll pause here. If anybody has any questions about anything that I've mentioned. Anything like that?\\n\\niyeshia: Any questions? Are we all good.\\n\\nCUNY Tech Prep (CTP): Now's your chance before you forget what you wanted to ask.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='41-45', metadata={'start': datetime.timedelta(seconds=645, microseconds=172000), 'end': datetime.timedelta(seconds=682, microseconds=250000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): Now's your chance before you forget what you wanted to ask.\\n\\nCUNY Tech Prep (CTP): No takers.\\n\\nCUNY Tech Prep (CTP): I have a few comments.\\n\\niyeshia: You want to go ahead, Kevin.\\n\\nCUNY Tech Prep (CTP): Well, self, I see self document as also having a secondary goal, particularly if you find yourself in\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='45-49', metadata={'start': datetime.timedelta(seconds=678, microseconds=110000), 'end': datetime.timedelta(seconds=700, microseconds=910000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): Well, self, I see self document as also having a secondary goal, particularly if you find yourself in\\n\\nCUNY Tech Prep (CTP): not such a nice work environment.\\n\\nCUNY Tech Prep (CTP): It helps prevent people from gaslighting. You, for example.\\n\\nCUNY Tech Prep (CTP): And like it keeps you out of trouble. Let's say cause if you self document, then\\n\\nCUNY Tech Prep (CTP): you know exactly what was decided on.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='49-53', metadata={'start': datetime.timedelta(seconds=693, microseconds=509000), 'end': datetime.timedelta(seconds=720, microseconds=809000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): you know exactly what was decided on.\\n\\nCUNY Tech Prep (CTP): And you're just following exactly what was said.\\n\\niyeshia: That is correct.\\n\\nCUNY Tech Prep (CTP): And then the setting boundaries right.\\n\\nCUNY Tech Prep (CTP): and there are some. There are some\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='53-57', metadata={'start': datetime.timedelta(seconds=717, microseconds=970000), 'end': datetime.timedelta(seconds=732, microseconds=590000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): and there are some. There are some\\n\\nCUNY Tech Prep (CTP): bosses who will push your boundaries. Try to get you to like\\n\\nCUNY Tech Prep (CTP): do overtime. Stay longer than like\\n\\nCUNY Tech Prep (CTP): your stay longer than what's on like the contract, or whatever.\\n\\nCUNY Tech Prep (CTP): If you give an inch sometimes they'll take a mile, so\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='57-61', metadata={'start': datetime.timedelta(seconds=729, microseconds=400000), 'end': datetime.timedelta(seconds=749, microseconds=960000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): If you give an inch sometimes they'll take a mile, so\\n\\nCUNY Tech Prep (CTP): you should be very clear on\\n\\nCUNY Tech Prep (CTP): your time. Your time limits, like.\\n\\nCUNY Tech Prep (CTP): you know, have always have an out, for\\n\\nCUNY Tech Prep (CTP): when too much is being requested of you.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='61-65', metadata={'start': datetime.timedelta(seconds=745, microseconds=275000), 'end': datetime.timedelta(seconds=767, microseconds=120000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): when too much is being requested of you.\\n\\nCUNY Tech Prep (CTP): My usual go to is like, Oh, I I have like I have a meeting for Ctp, or like I have class.\\n\\niyeshia: Very good. That's good to good to know. And I know. David. Put in the chat like for an example of documentation. On March 16, th at 4, 35, you said, and I quote that is, that is exactly.\\n\\nCUNY Tech Prep (CTP): Under my lap.\\n\\niyeshia: But if you're in that situation, you definitely, it's so fresh, and it's so like truthful, like someone's like, no, I'm not going to doubt that someone made that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='65-69', metadata={'start': datetime.timedelta(seconds=764, microseconds=400000), 'end': datetime.timedelta(seconds=803, microseconds=550000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: But if you're in that situation, you definitely, it's so fresh, and it's so like truthful, like someone's like, no, I'm not going to doubt that someone made that.\\n\\nCUNY Tech Prep (CTP): Yeah.\\n\\niyeshia: We wrote that and gave them the time so absolutely documentation goals for the good and for the bad. So definitely. Thank you for sharing that Kevin and David?\\n\\niyeshia: And so with that said, We'll go on to the the next slide. Which is a question of is my manager the same as having a mentor. Does anybody want to come off the come off mute and say yes or no?\\n\\niyeshia: I can just call on Kyle.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='69-73', metadata={'start': datetime.timedelta(seconds=795, microseconds=400000), 'end': datetime.timedelta(seconds=831, microseconds=790000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: I can just call on Kyle.\\n\\nCUNY Tech Prep (CTP): Kyle, you there.\\n\\nKyle Schoenhardt: No, it's not.\\n\\niyeshia: Okay, let's see.\\n\\niyeshia: Yay, good job, PAL. The answer is, no.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='73-77', metadata={'start': datetime.timedelta(seconds=828, microseconds=820000), 'end': datetime.timedelta(seconds=844, microseconds=930000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'Kyle Schoenhardt', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Yay, good job, PAL. The answer is, no.\\n\\niyeshia: Did you want to give more input?\\n\\nKyle Schoenhardt: Yeah. Sure.\\n\\niyeshia: Yeah.\\n\\nKyle Schoenhardt: Well, I mean, sometimes you can just have really bad managers who are there to cover their own self, make themselves look good sometimes at your expense, or they micromanage, or you just don't click well with that person. For whatever reason a mentor is akin to a leader, I think they are there to lift you up and show you\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='77-81', metadata={'start': datetime.timedelta(seconds=841, microseconds=340000), 'end': datetime.timedelta(seconds=869, microseconds=440000), 'speakers': frozenset({'Kyle Schoenhardt', 'iyeshia'})}),\n", + " Chunk(text=\"Kyle Schoenhardt: Well, I mean, sometimes you can just have really bad managers who are there to cover their own self, make themselves look good sometimes at your expense, or they micromanage, or you just don't click well with that person. For whatever reason a mentor is akin to a leader, I think they are there to lift you up and show you\\n\\nKyle Schoenhardt: how you can improve on yourself like a coach.\\n\\nKyle Schoenhardt: Constantly giving you feedback, whether positive or negative.\\n\\nKyle Schoenhardt: I would say someone you would\\n\\nKyle Schoenhardt: go to immediately like. If the 1st person you think of that you need help with something is not your manager, then that's\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='81-85', metadata={'start': datetime.timedelta(seconds=850, microseconds=340000), 'end': datetime.timedelta(seconds=885, microseconds=510000), 'speakers': frozenset({'Kyle Schoenhardt'})}),\n", + " Chunk(text=\"Kyle Schoenhardt: go to immediately like. If the 1st person you think of that you need help with something is not your manager, then that's\\n\\nKyle Schoenhardt: a good indicator, that that person is not a mentor, or, if you need help with something, your your 1st go to person to that you think of is\\n\\nKyle Schoenhardt: someone else that is probably who your mentor is most likely to be, could be a coworker. It could be a manager, but it's not always.\\n\\niyeshia: Got it. Thank you, Kevin. I mean. Thank you, Kyle, said Kevin. Thank you. Kyle. Appreciate that. With that, said, I don't feel like I need to add any more. I feel like Kyle took that. So I'm gonna move on to the day.\\n\\niyeshia: So the next question is, should my manager, be my mentor.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='85-89', metadata={'start': datetime.timedelta(seconds=879, microseconds=360000), 'end': datetime.timedelta(seconds=919, microseconds=30000), 'speakers': frozenset({'Kyle Schoenhardt', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: So the next question is, should my manager, be my mentor.\\n\\niyeshia: Alison.\\n\\nAllison Lee: Well, you you can't force a mentor mentee relationship if that's not how it's going to work.\\n\\nAllison Lee: But it is possible for your manager to be some kind of mentor figure.\\n\\niyeshia: Thank you.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='89-93', metadata={'start': datetime.timedelta(seconds=914, microseconds=565000), 'end': datetime.timedelta(seconds=945, microseconds=810000), 'speakers': frozenset({'iyeshia', 'Allison Lee'})}),\n", + " Chunk(text=\"iyeshia: Thank you.\\n\\niyeshia: So with that, said.\\n\\niyeshia: that depends. So I appreciate Allison. Your response. It definitely depends. Can't force them. But of course, if you do get along with your supervisor, and you want to ask them that\\n\\niyeshia: by all means. But good, answers everyone.\\n\\niyeshia: So now we go more in depth of what can good mentorship look like? And so from there I would say, mentors, as\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='93-97', metadata={'start': datetime.timedelta(seconds=944, microseconds=920000), 'end': datetime.timedelta(seconds=975, microseconds=362000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: So now we go more in depth of what can good mentorship look like? And so from there I would say, mentors, as\\n\\niyeshia: Kyle touched on was that they provide support, wisdom to help you succeed in certain examples are, this is pretty much sharing any ideas you might have with them from paying program with you on a code base providing feedback, maybe on a slide deck to helping you remind that it's impossible to know everything. So they're kind of reassuring you in your in your role as you're starting your career.\\n\\niyeshia: and then you want to make sure your mentor is a is a safe space for you at the time. Sometimes your mentor. You can talk to your mentor about your manager sometimes if they are difficult or not, and so from there it's a form of trust\\n\\niyeshia: with your with your mentor. So if you have, if you are blessed to have a supervisor who can be both roles, a manager and a mentor. Go for it, if you're like. I'm still learning. I'm only 3, 30 days in 60 days, 90 days. Take your time, then. So that is definitely something to to know from that.\\n\\niyeshia: And then questions of Where can I find? A mentor? And so, before I even answer this question, who can tell me what erg stands for\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='97-101', metadata={'start': datetime.timedelta(seconds=964, microseconds=630000), 'end': datetime.timedelta(seconds=1046, microseconds=430000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: And then questions of Where can I find? A mentor? And so, before I even answer this question, who can tell me what erg stands for\\n\\niyeshia: anyone?\\n\\niyeshia: Go ahead, Devon, please.\\n\\nDevin Xie (no cam): Employee resource groups.\\n\\niyeshia: Thank you so much, Devin. I appreciate you and blouse right there. Next to erg. So the examples of that can be any groups that they have at your job related to Lgbtq. It could be groups related to race and identity. It could be anything from parenthood. I wish they had groups related for auntiehood and things of that nature. But it's all about finding your community and resources for things to help support you while you're working\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='101-105', metadata={'start': datetime.timedelta(seconds=1035, microseconds=839000), 'end': datetime.timedelta(seconds=1085, microseconds=780000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n", + " Chunk(text=\"iyeshia: Thank you so much, Devin. I appreciate you and blouse right there. Next to erg. So the examples of that can be any groups that they have at your job related to Lgbtq. It could be groups related to race and identity. It could be anything from parenthood. I wish they had groups related for auntiehood and things of that nature. But it's all about finding your community and resources for things to help support you while you're working\\n\\niyeshia: in some of your environments. And then, when you have your community, you can always reflect on interests related to tech.\\n\\niyeshia: or maybe research on your company like, who's in your area. And you could always reach out to some people for informational interviews. If you're really trying to seek this mentor Mentee relationship from people who are at your company. So just to keep that in mind.\\n\\niyeshia: I think I saw something.\\n\\niyeshia: Auntie Hood. Yes, and then I think, Mingle, said Manager supervisors are not your friend. Their one and only job is to find a person that can get the job done. Okay, come on, now, very good. And so\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='105-109', metadata={'start': datetime.timedelta(seconds=1057, microseconds=780000), 'end': datetime.timedelta(seconds=1131, microseconds=240000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Auntie Hood. Yes, and then I think, Mingle, said Manager supervisors are not your friend. Their one and only job is to find a person that can get the job done. Okay, come on, now, very good. And so\\n\\niyeshia: with that, said, I think y'all know the roles between manager and mentor, and I appreciate that.\\n\\niyeshia: So now the next part is negotiating raises. So the last workshop we did was negotiating offers, as I stated before. So this one's gonna be a little different. You got the job. So now, after that whole success in your 1st year you want to start discussing maybe time for a raise. So let's get into that.\\n\\niyeshia: So you did a great job.\\n\\niyeshia: 1st year you knocked it out. You got outcomes, you got successes. You're amazing. On the 1st year what happens now?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='109-113', metadata={'start': datetime.timedelta(seconds=1114, microseconds=170000), 'end': datetime.timedelta(seconds=1167, microseconds=119000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: 1st year you knocked it out. You got outcomes, you got successes. You're amazing. On the 1st year what happens now?\\n\\niyeshia: Your success is going to be measured by achievements, contributions into your organization, and that could be rewarded with\\n\\niyeshia: money or something else you value that could be related to time. Things of that nature. You want to go up based off your benefits. As we stated before, in the last workshop, you might wanna negotiate that. But if you want to talk about money first.st That's okay, too.\\n\\niyeshia: And these are gonna help you, too, as well with your I would say. Manager or supervisor. Meetings\\n\\niyeshia: from there.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='113-117', metadata={'start': datetime.timedelta(seconds=1160, microseconds=790000), 'end': datetime.timedelta(seconds=1199, microseconds=450000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: from there.\\n\\niyeshia: So just remember that it's okay when you when you flex those negotiating offers or flex those muscles during conversations around raises. It's not bragging. If you're talking about your achievements and things like that. It's okay to to talk about your successes, you know, especially during a raise time, because you're trying to show your manager or prove what you brought to the to the table. So keep that in mind.\\n\\niyeshia: So how does it look.\\n\\nCUNY Tech Prep (CTP): Comments, sorry.\\n\\niyeshia: Yeah, that is.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='117-121', metadata={'start': datetime.timedelta(seconds=1198, microseconds=703000), 'end': datetime.timedelta(seconds=1228, microseconds=390000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text='iyeshia: Yeah, that is.\\n\\nCUNY Tech Prep (CTP): Something you would also document. If your manager praises you, you document that.\\n\\niyeshia: That.\\n\\nCUNY Tech Prep (CTP): Is evidence you can use in your negotiations.\\n\\niyeshia: That is such a fact.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='121-125', metadata={'start': datetime.timedelta(seconds=1227, microseconds=350000), 'end': datetime.timedelta(seconds=1240, microseconds=380000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: That is such a fact.\\n\\niyeshia: I literally just copy to paste everything, my manager said. Yep, one of my negotiation days. Yep, so thank you, Kevin, for saying that? So with that said, if you have those those meetings with them, document not only what you say, but what they said, as Kevin mentioned.\\n\\niyeshia: That was great in the negotiating offer. So how else do we prepare for this?\\n\\niyeshia: You're going to research? Yes, you're going to gather all your feedback, whether it's from your colleagues and meetings, whether it's from the success that you hear from your manager or tips from people that you work with, you're going to make sure you learn about your role. What's going on in the market. Just research is going to be your best.\\n\\niyeshia: Put input on this as well. When you're talking about your salary. The next thing you want to do is list the accomplishments. Keep those documents. Don't wait to the last minute you get to the end of the year. You're like, what did I do? It's been 12 months, like.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='125-129', metadata={'start': datetime.timedelta(seconds=1238, microseconds=990000), 'end': datetime.timedelta(seconds=1296, microseconds=189000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Put input on this as well. When you're talking about your salary. The next thing you want to do is list the accomplishments. Keep those documents. Don't wait to the last minute you get to the end of the year. You're like, what did I do? It's been 12 months, like.\\n\\niyeshia: yeah, document everything, because you might forget some stuff. So that's definitely gonna help, too.\\n\\niyeshia: With that, said, you want to make sure you remind everyone. Maybe you save a bunch of money for the company. Oh, maybe you help them with other accomplishments, or maybe you spend off a project that's done really well. For your department. Share it. So please feel free to do that.\\n\\niyeshia: and then that will also help you keep your resume updated as well. So you don't have to worry about trying to\\n\\niyeshia: scatter or get all your thoughts together at the last minute.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='129-133', metadata={'start': datetime.timedelta(seconds=1281, microseconds=940000), 'end': datetime.timedelta(seconds=1331, microseconds=399000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: scatter or get all your thoughts together at the last minute.\\n\\niyeshia: And then with that status also, your manager needs to have the facts, too, to convince their boss to approve you for a raise. So if your manager is giving you the praises already, they're like, yeah, I did say that like\\n\\niyeshia: as well. Even if they make a joke like saying to you like, Hey, you deserve a raise document that you could go right back to like, you know. April 11th at 5, at 6 58 pm. You said, I deserve a raise this time like it. Just everything will just work for you in your favor for that, so please feel free to do that.\\n\\niyeshia: And so now you did the you did the raise. You had the meeting with your your manager. They're proposing it to the Supervisor, or things of that nature. I know different companies work in different ways, so they might have you go directly to your boss's boss to talk about the raise, or whoever is in charge of that\\n\\niyeshia: common, to negotiate that with them. But every company is different. But if they say yes, that's great job all done. Now, what if you get to a conversation where they say, No, what do you do, then? Well, there are alternatives for that. You can ask to work on, maybe towards a promotion. You know what I'm saying as far as if they say based off your level. We can't go any higher than that\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='133-137', metadata={'start': datetime.timedelta(seconds=1328, microseconds=370000), 'end': datetime.timedelta(seconds=1410, microseconds=720000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: common, to negotiate that with them. But every company is different. But if they say yes, that's great job all done. Now, what if you get to a conversation where they say, No, what do you do, then? Well, there are alternatives for that. You can ask to work on, maybe towards a promotion. You know what I'm saying as far as if they say based off your level. We can't go any higher than that\\n\\niyeshia: negotiate for promotion which would include maybe getting a title change, or better money that comes with it. This is why we say research, because you can definitely research what's going on in the market saying, Hey, that's my job. But the title is different.\\n\\niyeshia: Look that up and like definitely propose that if you want to. You can even ask for a faster review cycle. If they say something like, Hey, we can't give that to you. Just yet today. But let's revisit this topic on the 6 months, maybe, like, hey? Can we meet sooner, maybe in 3 months, to discuss more about how I can go about this\\n\\niyeshia: and then you could simply, if they say no. Ask why? Because you don't want to hear anything as far as like knowing that period. No, they should give you an explanation for it. So always ask questions with that to help like what's driving? That? Was it bad timing? Is there a gap? Is there their cap? Is there certain budgets. Did I miss anything that could help? So they can definitely\\n\\niyeshia: share with you and tell you that information of why they might have done. It could be a whole timing thing. It could be a budget thing. But just keep in mind to keep so just to keep in mind you could ask for like. Go around it 3 these ways, let's say 3 different ways. You can go about the answer and no from there. With that, said, does anyone have any questions so far?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='137-141', metadata={'start': datetime.timedelta(seconds=1386, microseconds=520000), 'end': datetime.timedelta(seconds=1487, microseconds=429000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: share with you and tell you that information of why they might have done. It could be a whole timing thing. It could be a budget thing. But just keep in mind to keep so just to keep in mind you could ask for like. Go around it 3 these ways, let's say 3 different ways. You can go about the answer and no from there. With that, said, does anyone have any questions so far?\\n\\niyeshia: Nobody. Okay. Devin.\\n\\nCUNY Tech Prep (CTP): Devin does Devon.\\n\\nDevin Xie (no cam): Just curious. So like, say, we\\n\\nDevin Xie (no cam): find some opportunity after we graduate from Cuny Tech fair.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='141-145', metadata={'start': datetime.timedelta(seconds=1467, microseconds=260000), 'end': datetime.timedelta(seconds=1503, microseconds=140000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia', 'Devin Xie (no cam)'})}),\n", + " Chunk(text=\"Devin Xie (no cam): find some opportunity after we graduate from Cuny Tech fair.\\n\\nDevin Xie (no cam): And then we have questions about this stuff like.\\n\\nDevin Xie (no cam): let's say we work there for like a year. And we\\n\\nDevin Xie (no cam): we stop. We we want to ask for some advice. Can we still hit you guys up.\\n\\niyeshia: Yeah, but you become alumni. You're not just gonna drop you all off in May and be like, bye. No, you can definitely you'll be invited. May like, after the graduation, I want to say in the summertime you'll get an invite to the alumni slack channel and you can join\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='145-149', metadata={'start': datetime.timedelta(seconds=1499, microseconds=630000), 'end': datetime.timedelta(seconds=1531, microseconds=469000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n", + " Chunk(text=\"iyeshia: Yeah, but you become alumni. You're not just gonna drop you all off in May and be like, bye. No, you can definitely you'll be invited. May like, after the graduation, I want to say in the summertime you'll get an invite to the alumni slack channel and you can join\\n\\niyeshia: that, and I will be gladly to assist you. There. We have a career coach there, but usually all the the staff is on the Ctv team is on the alumni channel. So yeah, definitely. But we also like, I said before, Devin, save the Powerpoint, too.\\n\\niyeshia: Just putting that out there? So yeah, good question.\\n\\niyeshia: Okay?\\n\\niyeshia: And so the next part is after the conversation for the the raise. You want to make sure. The conversation goes well, timing is going to be a part of that. So clarifying the process, asking them like, you know, when should I expect the raise? You know that's not being thirsty. That's that's your money. You can ask questions about it. And what's the next step for that?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='149-153', metadata={'start': datetime.timedelta(seconds=1513, microseconds=30000), 'end': datetime.timedelta(seconds=1577, microseconds=890000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: And so the next part is after the conversation for the the raise. You want to make sure. The conversation goes well, timing is going to be a part of that. So clarifying the process, asking them like, you know, when should I expect the raise? You know that's not being thirsty. That's that's your money. You can ask questions about it. And what's the next step for that?\\n\\niyeshia: You can always confirm with your manager? Like. If the reason they said no, was it because there's certain maybe I would say physical years of like, how they what deadline they have for the New Year or the new budget. Time or deadline, was it? Did I miss it when I asked for a salary? Or when's the next time I should ask for a salary. Increase, and things like that. Cause your your department, or you would hope the team that you're on will show you throughout the year of like what's coming up and what you can expect.\\n\\niyeshia: So you definitely want to plan ahead next time. If they say no, and then review the work and the feedback asking for feedback. Was it my, the way that I would propose the raise? Is there anything I could do to get? You know better on that? That would help with the mentor, of course.\\n\\niyeshia: Cause the person you're proposing it to might not give the input. But definitely, a mentor is gonna help you with that as well to see what's going on. You could definitely check in with your manager. If they had any feedback they might tell your manager to like, let them know like this is why they might have said No or this? Why, they might have said, Not yet, or they'll say yes later. So keep that in mind.\\n\\niyeshia: and then let's see right\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='153-157', metadata={'start': datetime.timedelta(seconds=1553, microseconds=290000), 'end': datetime.timedelta(seconds=1648, microseconds=679000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: and then let's see right\\n\\niyeshia: from there we'll go to the activity.\\n\\niyeshia: And so from there, this is an activity of asking for feedback.\\n\\niyeshia: And we're gonna do a scenario of you want to ask for feedback from your manager.\\n\\niyeshia: and you previously had passed up for raise and want to learn more about how you can ensure success earning one in the next review cycle.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='157-161', metadata={'start': datetime.timedelta(seconds=1641, microseconds=970000), 'end': datetime.timedelta(seconds=1673, microseconds=539000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: and you previously had passed up for raise and want to learn more about how you can ensure success earning one in the next review cycle.\\n\\niyeshia: So this part is, how would you start that conversation in your weekly check in?\\n\\niyeshia: So since we're virtual, we're gonna have, I'm gonna give you about 30 seconds to come up with your own answer, and then type it in the chat.\\n\\niyeshia: So review the scenario now and then we'll start in 30 seconds.\\n\\niyeshia: So\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='161-165', metadata={'start': datetime.timedelta(seconds=1665, microseconds=550000), 'end': datetime.timedelta(seconds=1692, microseconds=620000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text='iyeshia: So\\n\\niyeshia: we set the timer for 30.\\n\\niyeshia: Okay?\\n\\niyeshia: Goes now\\n\\niyeshia: 10 seconds.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='165-169', metadata={'start': datetime.timedelta(seconds=1691, microseconds=890000), 'end': datetime.timedelta(seconds=1727, microseconds=70000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text='iyeshia: 10 seconds.\\n\\niyeshia: Okay, time is up.\\n\\niyeshia: Okay, nice.\\n\\niyeshia: And look for a raise on to guarantee a raise in this performance. Review. Awesome. Thank you. Ty\\n\\niyeshia: and Mckenzie. Thank you.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='169-173', metadata={'start': datetime.timedelta(seconds=1725, microseconds=970000), 'end': datetime.timedelta(seconds=1767, microseconds=160000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text='iyeshia: and Mckenzie. Thank you.\\n\\niyeshia: 13.\\n\\niyeshia: Some feedback to see what I can build. Awesome.\\n\\niyeshia: Hey, boys!\\n\\niyeshia: Oh, my God this time to reach out a bit. Okay, okay for me.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='173-177', metadata={'start': datetime.timedelta(seconds=1765, microseconds=20000), 'end': datetime.timedelta(seconds=1785, microseconds=509000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text='iyeshia: Oh, my God this time to reach out a bit. Okay, okay for me.\\n\\niyeshia: No.\\n\\niyeshia: Okay.\\n\\niyeshia: Any improvement that you see that I cannot. Okay, thank you.\\n\\niyeshia: Let me check in with you.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='177-181', metadata={'start': datetime.timedelta(seconds=1780, microseconds=400000), 'end': datetime.timedelta(seconds=1810, microseconds=859000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Let me check in with you.\\n\\niyeshia: There we go.\\n\\niyeshia: Okay, perfect.\\n\\niyeshia: So what I can make for the next recycle. Awesome. Thank you all for sharing so far, I'm gonna move on to the the next part. I think I kind of skipped\\n\\niyeshia: ahead.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='181-185', metadata={'start': datetime.timedelta(seconds=1807, microseconds=139000), 'end': datetime.timedelta(seconds=1830, microseconds=670000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: ahead.\\n\\niyeshia: Okay.\\n\\niyeshia: so right now, we have a role play example between a manager and you. Let's say you would.\\n\\niyeshia: it could be data science. Related. Right? So from here, I'm going to\\n\\niyeshia: probably volunteer, because I'm not sure if people will volunteer to be the manager and someone be you\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='185-189', metadata={'start': datetime.timedelta(seconds=1829, microseconds=480000), 'end': datetime.timedelta(seconds=1857, microseconds=657000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: probably volunteer, because I'm not sure if people will volunteer to be the manager and someone be you\\n\\niyeshia: So let me see who I can get.\\n\\niyeshia: Okay, I'll go with David for manager, and I'll go for\\n\\niyeshia: Let's try, Kevin for you.\\n\\niyeshia: If you have to read this role, play example.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='189-193', metadata={'start': datetime.timedelta(seconds=1850, microseconds=520000), 'end': datetime.timedelta(seconds=1877, microseconds=689000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text='iyeshia: If you have to read this role, play example.\\n\\nDavid Rodriguez: Should I start now?\\n\\nCUNY Tech Prep (CTP): Kevin, you there?\\n\\nCUNY Tech Prep (CTP): Kevin? Chen.\\n\\nKevin Zheng: Right, right.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='193-197', metadata={'start': datetime.timedelta(seconds=1874, microseconds=660000), 'end': datetime.timedelta(seconds=1892, microseconds=270000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'David Rodriguez', 'iyeshia', 'Kevin Zheng'})}),\n", + " Chunk(text=\"Kevin Zheng: Right, right.\\n\\nCUNY Tech Prep (CTP): Alright!\\n\\nDavid Rodriguez: Great I'll start.\\n\\nDavid Rodriguez: Is there anything else you'd like to talk about?\\n\\nKevin Zheng: Yes, as you know, I've been taking on additional responsibilities since we used the team, and I'd like to speak to you about my conversation package.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='197-201', metadata={'start': datetime.timedelta(seconds=1891, microseconds=450000), 'end': datetime.timedelta(seconds=1910, microseconds=499000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'David Rodriguez', 'Kevin Zheng'})}),\n", + " Chunk(text=\"Kevin Zheng: Yes, as you know, I've been taking on additional responsibilities since we used the team, and I'd like to speak to you about my conversation package.\\n\\nDavid Rodriguez: We really appreciate your hard work.\\n\\nDavid Rodriguez: but it's still a tough economy, and we're not really in a position to give you anything more than a 2% raise. We can talk about a raise at your next review in about 6 months.\\n\\nKevin Zheng: I do understand that the economy has made things difficult. Can we set a time to discuss my compensation again before my next schedule Review.\\n\\nKevin Zheng: I appreciate an opportunity to talk in more detail on the additional work I've taken on, and its impact.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='201-205', metadata={'start': datetime.timedelta(seconds=1901, microseconds=690000), 'end': datetime.timedelta(seconds=1938, microseconds=959000), 'speakers': frozenset({'David Rodriguez', 'Kevin Zheng'})}),\n", + " Chunk(text=\"Kevin Zheng: I appreciate an opportunity to talk in more detail on the additional work I've taken on, and its impact.\\n\\nDavid Rodriguez: Sure that makes sense.\\n\\nDavid Rodriguez: I want to make sure you heard how about a month.\\n\\nKevin Zheng: Great. Thank you. I'll find some time on your calendar for us to meet.\\n\\niyeshia: Thank you. So with that, said, I. Just want to open up the the floor. To everyone. What did you notice?\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='205-209', metadata={'start': datetime.timedelta(seconds=1933, microseconds=720000), 'end': datetime.timedelta(seconds=1967, microseconds=303000), 'speakers': frozenset({'David Rodriguez', 'iyeshia', 'Kevin Zheng'})}),\n", + " Chunk(text=\"iyeshia: Thank you. So with that, said, I. Just want to open up the the floor. To everyone. What did you notice?\\n\\niyeshia: that during the the role play. That the let's say the data scientists who was played by Kevin,\\n\\niyeshia: did as far as like, maybe something different from your responses that you put in the chat. Did y'all notice anything differently?\\n\\niyeshia: Hey, Devin?\\n\\nDevin Xie (no cam): I don't know if I'm correct. But I think\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='209-213', metadata={'start': datetime.timedelta(seconds=1957, microseconds=300000), 'end': datetime.timedelta(seconds=2005, microseconds=496000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n", + " Chunk(text=\"Devin Xie (no cam): I don't know if I'm correct. But I think\\n\\nDevin Xie (no cam): the data scientists or us in this situation, we try to like Scheduler, a review like\\n\\nDevin Xie (no cam): in a later time.\\n\\niyeshia: absolutely. Thank you. He took initiative and be like, you know, hey, let me, let me get on your calendar for next time, instead of just like waiting around, you know, people be like, Oh, I'll get back to you and things like that. He's like, no, we can. We can discuss later, like, what's your schedule like? So that\\n\\niyeshia: that forwardness of just, you know, following up and seeing it through is definitely helpful.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='213-217', metadata={'start': datetime.timedelta(seconds=2002, microseconds=950000), 'end': datetime.timedelta(seconds=2041, microseconds=590000), 'speakers': frozenset({'iyeshia', 'Devin Xie (no cam)'})}),\n", + " Chunk(text=\"iyeshia: that forwardness of just, you know, following up and seeing it through is definitely helpful.\\n\\niyeshia: So and so, for now I would say this would take about maybe\\n\\niyeshia: so final reflection. We could talk about this for like maybe 3\\xa0min, or anybody could just like popcorn it out unless I just call on them. But for today's learning from the workshop what are some things you can generally expect when you 1st join a company? What is a manager's role in your success? And how do you find out your measures of success? Does anyone want to\\n\\niyeshia: volunteer and answer any of the any of the 3 questions that are of their choice\\n\\niyeshia: before I call on someone.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='217-221', metadata={'start': datetime.timedelta(seconds=2035, microseconds=850000), 'end': datetime.timedelta(seconds=2087, microseconds=550000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: before I call on someone.\\n\\niyeshia: Okay, anybody but Devin.\\n\\niyeshia: See, I'm gonna go with anthony.\\n\\nAnthony Jerez: Yes, I'm here.\\n\\niyeshia: Which question would you like to answer? You had to reflect.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='221-225', metadata={'start': datetime.timedelta(seconds=2086, microseconds=20000), 'end': datetime.timedelta(seconds=2122, microseconds=210000), 'speakers': frozenset({'Anthony Jerez', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Which question would you like to answer? You had to reflect.\\n\\nAnthony Jerez: On, I would say the 1st one.\\n\\niyeshia: Okay, go for it.\\n\\nAnthony Jerez: So some major things that I would expect would be we're going through like sessions like orientation, and like onboarding\\n\\nAnthony Jerez: also knowledge about like some some resources resources that we would have access to at any point.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='225-229', metadata={'start': datetime.timedelta(seconds=2119, microseconds=390000), 'end': datetime.timedelta(seconds=2147, microseconds=390000), 'speakers': frozenset({'Anthony Jerez', 'iyeshia'})}),\n", + " Chunk(text=\"Anthony Jerez: also knowledge about like some some resources resources that we would have access to at any point.\\n\\nAnthony Jerez: And yeah, stuff like that. I would say.\\n\\niyeshia: Thank you, Anthony, for sharing.\\n\\niyeshia: and then let me see, trying to see who's not making eye contact. Oh, oh, not everybody looks okay. So let's go with\\n\\niyeshia: Ibrahim.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='229-233', metadata={'start': datetime.timedelta(seconds=2139, microseconds=43000), 'end': datetime.timedelta(seconds=2167, microseconds=810000), 'speakers': frozenset({'Anthony Jerez', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Ibrahim.\\n\\nIbrahim Faruquee: Yeah, I'll answer question, too.\\n\\nIbrahim Faruquee: So your manager's role is mainly like for the company to manage like people and make sure that the right persons for the right job, but they can be like a mentor figure for you. So like, if there can be like good mentors who like help you throughout the process and help you with a raise, or they could also like, be difficult and make that like harder for you. But they're kind of. It's not like there's nothing to be, I guess, expected from a manager. It's just like\\n\\nIbrahim Faruquee: what they like. What do you, I guess. What do you end up with.\\n\\nIbrahim Faruquee: or what do you make the most of.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='233-237', metadata={'start': datetime.timedelta(seconds=2166, microseconds=780000), 'end': datetime.timedelta(seconds=2208, microseconds=880000), 'speakers': frozenset({'iyeshia', 'Ibrahim Faruquee'})}),\n", + " Chunk(text=\"Ibrahim Faruquee: or what do you make the most of.\\n\\niyeshia: Awesome. Thank you.\\n\\niyeshia: And then for the 3rd question.\\n\\niyeshia: and we're gonna go for Isabel.\\n\\nIsabel LoƧi: Hello!\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='237-241', metadata={'start': datetime.timedelta(seconds=2207, microseconds=390000), 'end': datetime.timedelta(seconds=2223, microseconds=750000), 'speakers': frozenset({'Isabel LoƧi', 'iyeshia', 'Ibrahim Faruquee'})}),\n", + " Chunk(text=\"Isabel LoƧi: Hello!\\n\\niyeshia: Hello!\\n\\nIsabel LoƧi: Sorry. My Internet's horrible, and might I might disconnect?\\n\\nIsabel LoƧi: I'll see if I can answer the 3rd one. How do you find your measures of success.\\n\\nIsabel LoƧi: I would say, ask for feedback from other people elsewhere, from other colleagues, from your manager.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='241-245', metadata={'start': datetime.timedelta(seconds=2222, microseconds=900000), 'end': datetime.timedelta(seconds=2245, microseconds=189000), 'speakers': frozenset({'Isabel LoƧi', 'iyeshia'})}),\n", + " Chunk(text=\"Isabel LoƧi: I would say, ask for feedback from other people elsewhere, from other colleagues, from your manager.\\n\\nIsabel LoƧi: That way you get a better understanding of where you are right now. And also I would say to also look back on the goals that you've set for yourself, and see if you've reached those goals as well, and that would be a good measure of success.\\n\\niyeshia: Okay, very good. All right.\\n\\niyeshia: So yeah, definitely helped make my life easier with this presentation. So thank you. I'm glad things are sticking and so with that said, We will go and launch Kahoo. But before I do that I definitely want to say just be mindful of these things.\\n\\niyeshia: When you are starting in your 1st year, in your career. As it was stated in one of the slides, you don't have to have it all figured out is the perfect time to ask questions. You're gonna make mistakes, or you're not. But if you do, it's okay. Because it's all gonna be a learning process. For your 1st year, and your managers expect that.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='245-249', metadata={'start': datetime.timedelta(seconds=2238, microseconds=660000), 'end': datetime.timedelta(seconds=2306, microseconds=319000), 'speakers': frozenset({'Isabel LoƧi', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: When you are starting in your 1st year, in your career. As it was stated in one of the slides, you don't have to have it all figured out is the perfect time to ask questions. You're gonna make mistakes, or you're not. But if you do, it's okay. Because it's all gonna be a learning process. For your 1st year, and your managers expect that.\\n\\niyeshia: So just keep that in mind.\\n\\niyeshia: And then, if you are going to seek, you know, support, I think. It was great that it's a bell, stated asking for feedback from your manager, but you could also ask for feedback from your teammates, too. Cause they, if you work with them closely. If you have a team to see, like what your areas of strengths are your areas of growth.\\n\\niyeshia: and things that you're learning. That could be helpful. Towards that process if you're going up for a raise. But sometimes people could see our strengths stronger or clearer, or even faster than we can, and we don't even realize it.\\n\\niyeshia: And then even asking your mentors, too, as well, can be helpful. And then.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='249-253', metadata={'start': datetime.timedelta(seconds=2282, microseconds=771000), 'end': datetime.timedelta(seconds=2345, microseconds=799000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: And then even asking your mentors, too, as well, can be helpful. And then.\\n\\niyeshia: if you are going to negotiate, remember to keep for raise, to keep that documented focus on your skills. Make sure you do your research on the market and definitely, just try to figure out if you can negotiate other things.\\n\\niyeshia: And when it comes to relationships, at work, you wanna make sure to treat everybody equally so I hope that that helps. If you didn't get anything else. I hope that's what helps you with them\\n\\niyeshia: with your 1st year? As you enter into your careers. And so with that said, we'll go into Kahoot.\\n\\niyeshia: and so I'm going to launch it now.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='253-257', metadata={'start': datetime.timedelta(seconds=2341, microseconds=80000), 'end': datetime.timedelta(seconds=2390, microseconds=330000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: and so I'm going to launch it now.\\n\\niyeshia: Let's get it started.\\n\\niyeshia: I don't think my headphones died so\\n\\niyeshia: got 33 people on here, and only 16.\\n\\niyeshia: Okay.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='257-261', metadata={'start': datetime.timedelta(seconds=2387, microseconds=420000), 'end': datetime.timedelta(seconds=2445, microseconds=90000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Okay.\\n\\niyeshia: sound. Good.\\n\\niyeshia: 33.\\n\\niyeshia: Well, I didn't cut myself. That's Kevin. You're playing too.\\n\\niyeshia: Figure out how to be successful on my own.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='261-265', metadata={'start': datetime.timedelta(seconds=2444, microseconds=230000), 'end': datetime.timedelta(seconds=2550, microseconds=965000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Figure out how to be successful on my own.\\n\\niyeshia: Oh, you do not have to figure that out.\\n\\niyeshia: That's why we tell you, have mentors, extra peers and things of that nature.\\n\\niyeshia: Well, yeah, shout out to the 22. It's okay. One. I'll take the 22 others, you know. Wow!\\n\\niyeshia: Your boss. My goodness, okay, is in the lead.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='265-269', metadata={'start': datetime.timedelta(seconds=2547, microseconds=780000), 'end': datetime.timedelta(seconds=2583, microseconds=779000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Your boss. My goodness, okay, is in the lead.\\n\\niyeshia: So let's go ahead\\n\\niyeshia: who should not go to\\n\\niyeshia: thank you definitely. The worst thing you could do is talk to no one. If you need support with something.\\n\\niyeshia: So I hope.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='269-273', metadata={'start': datetime.timedelta(seconds=2578, microseconds=507000), 'end': datetime.timedelta(seconds=2624, microseconds=130000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: So I hope.\\n\\nCUNY Tech Prep (CTP): I am shocked.\\n\\niyeshia: That one should you not go to? So yeah.\\n\\niyeshia: let's see. Okay, Jamie is in the name.\\n\\niyeshia: Okay, let's go.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='273-277', metadata={'start': datetime.timedelta(seconds=2622, microseconds=675000), 'end': datetime.timedelta(seconds=2641, microseconds=959000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Okay, let's go.\\n\\niyeshia: 3rd question, what are not considerations to mention when providing reasons for a salary increase.\\n\\niyeshia: There aren't enough.\\n\\niyeshia: Okay? 18. Yes, the cost of living. That is correct. You should not consider that\\n\\niyeshia: They don't, they don't. They don't care so definitely the other ones. You could do that on your own when you're doing your negotiating your your budget. But don't come out and say, like, Hey, the cost of living in this city? They're like\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='277-281', metadata={'start': datetime.timedelta(seconds=2640, microseconds=140000), 'end': datetime.timedelta(seconds=2695, microseconds=309000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: They don't, they don't. They don't care so definitely the other ones. You could do that on your own when you're doing your negotiating your your budget. But don't come out and say, like, Hey, the cost of living in this city? They're like\\n\\niyeshia: or virtual.\\n\\niyeshia: our office in California, we have no idea. So yeah, just just keep that in mind. So good job to the the cost of living folks.\\n\\niyeshia: Okay, David Rv is in the lead.\\n\\niyeshia: Okay, let's go to the next question.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='281-285', metadata={'start': datetime.timedelta(seconds=2680, microseconds=250000), 'end': datetime.timedelta(seconds=2715, microseconds=419000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Okay, let's go to the next question.\\n\\niyeshia: what is a thoughtful way to actually negotiate?\\n\\niyeshia: So we can negotiate? Very good. It's a thoughtful way to act\\n\\niyeshia: and I think most of y'all got that in the chat. I saw some other answers. I'm gonna leave that questionable. But for the ones who did shout out to y'all.\\n\\niyeshia: So I think this is the last question.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='285-289', metadata={'start': datetime.timedelta(seconds=2712, microseconds=460000), 'end': datetime.timedelta(seconds=2758, microseconds=389000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: So I think this is the last question.\\n\\niyeshia: But Kyle is in the lead now, and so shouts to Kyle. So here goes the last question.\\n\\niyeshia: The most important relationship at work is with my manager.\\n\\niyeshia: Shout out to the people who said, False I said, it is important, but not the most important. Yeah, there's team this\\n\\niyeshia: Ceos, what about yourself? You know, things like that? So I just want to keep that in mind. So\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='289-293', metadata={'start': datetime.timedelta(seconds=2755, microseconds=680000), 'end': datetime.timedelta(seconds=2795, microseconds=579000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Ceos, what about yourself? You know, things like that? So I just want to keep that in mind. So\\n\\niyeshia: yeah, let's always about that. So let's go to the windows.\\n\\niyeshia: Okay, let's okay.\\n\\niyeshia: Number one.\\n\\niyeshia: Okay, at the bottom.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='293-297', metadata={'start': datetime.timedelta(seconds=2788, microseconds=670000), 'end': datetime.timedelta(seconds=2827, microseconds=966000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Okay, at the bottom.\\n\\niyeshia: Okay, with that, said\\n\\niyeshia: the last thing I will do. These are some follow up questions that you can ask your career coach. If I'm your career coach, you could definitely ask me that.\\n\\niyeshia: But how much of a raise. Can you ask for? When do you? Should you start a retirement fund? I would say, Asap, how long should you take to figure out if your company is a good fit, and how do you approach a conflict with a manager or coworker? So if you have any questions about those, please feel free to reach out to me or your career coach, if you would like to discuss further details, and I do want to be mindful of time.\\n\\niyeshia: And so I want to thank you for your time, and just want to let you know. This is the feedback form that really helps me with this presentation\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='297-301', metadata={'start': datetime.timedelta(seconds=2822, microseconds=600000), 'end': datetime.timedelta(seconds=2879, microseconds=310000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: And so I want to thank you for your time, and just want to let you know. This is the feedback form that really helps me with this presentation\\n\\niyeshia: and help me to deliver it better or worse. So if I did a good job, that's great. But I'm going to put this in the chat.\\n\\niyeshia: So you could fill that out now and then. Also want to invite you all to Rsvp. For Ctp's graduation.\\n\\niyeshia: So I would say, you can do that right now as well\\n\\niyeshia: and please register as a student. For those who can attend. You're more than welcome for the I believe the May 20th ones. If you cannot attend because you have a final, you have an internship. It is okay. There's no pressure. We're not going to be like, Hey, you can't you got to make it? No, we totally get it, I mean, we understand. So blessings on your finals\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='301-305', metadata={'start': datetime.timedelta(seconds=2870, microseconds=460000), 'end': datetime.timedelta(seconds=2919, microseconds=640000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: and please register as a student. For those who can attend. You're more than welcome for the I believe the May 20th ones. If you cannot attend because you have a final, you have an internship. It is okay. There's no pressure. We're not going to be like, Hey, you can't you got to make it? No, we totally get it, I mean, we understand. So blessings on your finals\\n\\niyeshia: and your projects. But for those who can't attend come through. It's going to be great to see your projects to see each other one last time, like Demo Night. And it's gonna be it's going to be a great time as we close out the the cohort in in May. So, and also to Devin's question, just one more time. We won't leave you hanging you will get an invite to be alumni\\n\\niyeshia: for Ctp, and that way you'll be with everybody who did the cohorts before your cohorts, one through 9 and so it'll be one through 10 now. And so that'll be like over a thousand people in that slack channel. So you can definitely network with your peers and the people who came before you. So yeah, just keep that in mind.\\n\\niyeshia: So thank you all. And I will stop sharing.\\n\\niyeshia: And yeah, please. Rsvp for the graduation. And please fill out that feedback form. It is greatly appreciative. I want to thank you for your time lessons on your projects. And yeah, if any of my fellows have any questions about the presentation, you can highlight me on slack. I am there to support you, and other than that. I want to thank you. And, Kevin, I think it's all yours now.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='305-309', metadata={'start': datetime.timedelta(seconds=2901, microseconds=130000), 'end': datetime.timedelta(seconds=2988, microseconds=469000), 'speakers': frozenset({'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: And yeah, please. Rsvp for the graduation. And please fill out that feedback form. It is greatly appreciative. I want to thank you for your time lessons on your projects. And yeah, if any of my fellows have any questions about the presentation, you can highlight me on slack. I am there to support you, and other than that. I want to thank you. And, Kevin, I think it's all yours now.\\n\\nCUNY Tech Prep (CTP): Definitely. Thank you, Aisha, for the valuable tips. I think. A lot of students, a lot of the students I've spoken to, at least are.\\n\\nCUNY Tech Prep (CTP): have got recently gotten jobs or are very close to getting them, and\\n\\nCUNY Tech Prep (CTP): they will find this material very useful. I'm actually kind of glad I remember to click record at the beginning, because some of them are like in traffic right now.\\n\\niyeshia: Got it. Okay.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='309-313', metadata={'start': datetime.timedelta(seconds=2964, microseconds=60000), 'end': datetime.timedelta(seconds=3011, microseconds=947000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text=\"iyeshia: Got it. Okay.\\n\\niyeshia: I'm glad.\\n\\nCUNY Tech Prep (CTP): Okay, thank you. So I'm gonna give you all 10\\xa0min to fill this out. Since you got 2 things to fill out. One is the inviting yourself to the graduation, and then 2 is the survey.\\n\\nCUNY Tech Prep (CTP): Alright, so we will come back at 7, 35.\\n\\nCUNY Tech Prep (CTP): Oh, yes, there's good news for those of you who missed it.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='313-317', metadata={'start': datetime.timedelta(seconds=3010, microseconds=980000), 'end': datetime.timedelta(seconds=3063, microseconds=720000), 'speakers': frozenset({'CUNY Tech Prep (CTP)', 'iyeshia'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): Oh, yes, there's good news for those of you who missed it.\\n\\nCUNY Tech Prep (CTP): There's no homework for the next 2 weeks, and there's spring break. So which means.\\n\\nCUNY Tech Prep (CTP): after this class, I'll be seeing you the second Friday from now.\\n\\nCUNY Tech Prep (CTP): Not next Friday.\\n\\nCUNY Tech Prep (CTP): No, a break is not exactly a break, so you have projects.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='317-321', metadata={'start': datetime.timedelta(seconds=3060, microseconds=740000), 'end': datetime.timedelta(seconds=3115, microseconds=180000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n", + " Chunk(text='CUNY Tech Prep (CTP): No, a break is not exactly a break, so you have projects.\\n\\nCUNY Tech Prep (CTP): This is time to do your projects.\\n\\nCUNY Tech Prep (CTP): Alright, so just as a gift to all the people who are in class.\\n\\nCUNY Tech Prep (CTP): If you check the homework sheet.\\n\\nCUNY Tech Prep (CTP): there is actually a column where you can grade yourselves. You can give yourself any emoji you want.', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='321-325', metadata={'start': datetime.timedelta(seconds=3110, microseconds=350000), 'end': datetime.timedelta(seconds=3275, microseconds=10000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): there is actually a column where you can grade yourselves. You can give yourself any emoji you want.\\n\\nCUNY Tech Prep (CTP): I'll let you figure out which one that is\\n\\nCUNY Tech Prep (CTP): alright. We're back.\\n\\nCUNY Tech Prep (CTP): So go for the rest of this day. So we're gonna I'm gonna put you in breakout rooms\\n\\nCUNY Tech Prep (CTP): for your projects.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='325-329', metadata={'start': datetime.timedelta(seconds=3269, microseconds=390000), 'end': datetime.timedelta(seconds=3591, microseconds=359000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n", + " Chunk(text='CUNY Tech Prep (CTP): for your projects.\\n\\nCUNY Tech Prep (CTP): And what I want you to do is I need to think about the state of the project. You, the the state the project is in.\\n\\nCUNY Tech Prep (CTP): I will be coming around to check in\\n\\nCUNY Tech Prep (CTP): because you have 2 weeks and no homework.\\n\\nCUNY Tech Prep (CTP): I want you to put your all into the project. So', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='329-333', metadata={'start': datetime.timedelta(seconds=3589, microseconds=600000), 'end': datetime.timedelta(seconds=3613, microseconds=269000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n", + " Chunk(text='CUNY Tech Prep (CTP): I want you to put your all into the project. So\\n\\nCUNY Tech Prep (CTP): let me make the breakout rooms first.st\\n\\nCUNY Tech Prep (CTP): Basically, what I want you to do is plan out the next 2 weeks. Okay, what do you want? What? What is missing from\\n\\nCUNY Tech Prep (CTP): your project that you need to complete it?\\n\\nCUNY Tech Prep (CTP): And how are you going to get there in the next 2 weeks?', parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='333-337', metadata={'start': datetime.timedelta(seconds=3609, microseconds=440000), 'end': datetime.timedelta(seconds=3646, microseconds=619000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): And how are you going to get there in the next 2 weeks?\\n\\nCUNY Tech Prep (CTP): Because after the next 2 weeks you literally have only 2 weeks left.\\n\\nCUNY Tech Prep (CTP): There's class. There's week 11, and then there's week 12\\n\\nCUNY Tech Prep (CTP): week. 13 is like May May 10th or May 9, th\\n\\nCUNY Tech Prep (CTP): and then the week after that, I believe, is\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='337-341', metadata={'start': datetime.timedelta(seconds=3643, microseconds=720000), 'end': datetime.timedelta(seconds=3672, microseconds=696000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}),\n", + " Chunk(text=\"CUNY Tech Prep (CTP): and then the week after that, I believe, is\\n\\nCUNY Tech Prep (CTP): when you're going to do Demos.\\n\\nCUNY Tech Prep (CTP): I could be wrong.\\n\\nCUNY Tech Prep (CTP): Alright. You can pick the rooms. Now go into your rooms.\", parent_id='38b3a5ac7de4b38806edbcce9f913d8518ebd2976083d54bad21f6b15fce4313', chunk_id='341-344', metadata={'start': datetime.timedelta(seconds=3670, microseconds=320000), 'end': datetime.timedelta(seconds=3682, microseconds=370000), 'speakers': frozenset({'CUNY Tech Prep (CTP)'})}))" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "web_vtt_content.get_chunks()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pyproject.toml b/pyproject.toml index 0ad4d9311a11c499b266bc62cecdd6dd90bfe7b4..498d253a5c62774854ba080d03eb6b70d77804b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "ctp-slack-bot" version = "0.1.0" description = "A Slack bot for processing and analyzing Zoom transcripts using AI" readme = "README.md" -requires-python = ">=3.11.9" +requires-python = ">=3.12" license = {text = "MIT"} authors = [ {name = "Your Name", email = "your.email@example.com"} @@ -19,26 +19,27 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "dependency-injector>=4.46.0", "pydantic>=2.11.2", "pydantic-settings>=2.8.1", - "fastapi>=0.115.12", - "uvicorn>=0.34.0", - "loguru>=0.7.3", + "cachetools>=5.5.2", + "more-itertools>=10.6.0", "python-dotenv>=1.1.0", - "httpx>=0.28.1", - "tenacity>=9.1.2", - "pybreaker>=1.3.0", + "loguru>=0.7.3", + "dependency-injector>=4.46.0", "pytz>=2025.2", "apscheduler>=3.11.0", +# "tenacity>=9.1.2", +# "pybreaker>=1.3.0", + "aiohttp>=3.11.16", + "webvtt-py>=0.5.1", "slack-sdk>=3.35.0", + "slack_bolt>=1.23.0", "pymongo>=4.11.3 ", - "numpy>=2.2.4", - "webvtt-py>=0.5.1", + "motor>=3.7.0", "openai>=1.70.0", -# "langchain>=0.3.23", -# "transformers>=4.51.0", -# "torch>=2.6.0", + "google-api-python-client>=2.167.0", + "google-auth>=2.39.0", + "google-auth-oauthlib>=1.2.1" ] [project.optional-dependencies] @@ -49,7 +50,7 @@ dev = [ "types-pytz>=2025.2", "black>=25.1.0", "isort>=6.0.1", - "ruff>=0.11.4", + "ruff>=0.11.4" ] [project.urls] diff --git a/scripts/run-dev.sh b/scripts/run-dev.sh index bf5322f35c6304884a2877d8f320f9772136a738..dc28ef95c6323c25697c89807cdca4e70d86e6d8 100755 --- a/scripts/run-dev.sh +++ b/scripts/run-dev.sh @@ -2,4 +2,4 @@ parent_path=$(cd "$(dirname "${BASH_SOURCE[0]}")"; pwd -P) -python3 "${parent_path}/../src/ctp_slack_bot/api/main.py" +LOG_LEVEL=DEBUG python3 "${parent_path}/../src/ctp_slack_bot/app.py" diff --git a/src/ctp_slack_bot/__init__.py b/src/ctp_slack_bot/__init__.py index 4333a5918642c8ef15511cec1a8dd11d7b2ca52b..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/src/ctp_slack_bot/__init__.py +++ b/src/ctp_slack_bot/__init__.py @@ -1 +0,0 @@ -from ctp_slack_bot.containers import Container diff --git a/src/ctp_slack_bot/api/__init__.py b/src/ctp_slack_bot/api/__init__.py deleted file mode 100644 index e237842bd3e748e0f536a15b70641214e71a74df..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/api/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from ctp_slack_bot.api.main import app, run diff --git a/src/ctp_slack_bot/api/main.py b/src/ctp_slack_bot/api/main.py deleted file mode 100644 index 42f0d467d40c72b3da35cb924be82798fb301938..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/api/main.py +++ /dev/null @@ -1,70 +0,0 @@ -from contextlib import asynccontextmanager -from fastapi import FastAPI, HTTPException, Depends -from loguru import logger -from typing import AsyncGenerator -from dependency_injector.wiring import inject, Provide - -from ctp_slack_bot import Container -from ctp_slack_bot.api.routes import router -from ctp_slack_bot.core import Settings, setup_logging -from ctp_slack_bot.core.response_rendering import PrettyJSONResponse -from ctp_slack_bot.tasks import start_scheduler, stop_scheduler - -@asynccontextmanager -async def lifespan(app: FastAPI) -> AsyncGenerator: - """ - Lifespan context manager for FastAPI application. - Handles startup and shutdown events. - """ - # Initialize container and wire the container to modules that need dependency injection. - container = Container() - container.wire(packages=['ctp_slack_bot']) - app.container = container - - # Setup logging. - setup_logging(container) - logger.info("Starting application") - - # Start the scheduler. - scheduler = start_scheduler(container) - logger.info("Started scheduler") - - yield # control to FastAPI until shutdown. - - # Shutdown. - logger.info("Shutting down application") - stop_scheduler(scheduler) - logger.info("Stopped scheduler") - - -app = FastAPI( - title="CTP Slack Bot", - description="A Slack bot for processing and analyzing Zoom transcripts using AI", - version="0.1.0", - lifespan=lifespan, -) - -# Include routers. -app.include_router(router) - -# Provide a minimalist health check endpoint for clients to detect availability. -@app.get("/health") -async def get_health() -> dict[str, str]: - """Health check""" - return { - "status": "healthy" - } - -# Alternate starting path for development -def run() -> None: - import uvicorn - settings = Settings() # type: ignore - uvicorn.run( - "main:app", - host=settings.API_HOST, - port=settings.API_PORT, - reload=settings.DEBUG - ) - -if __name__ == "__main__": - run() diff --git a/src/ctp_slack_bot/api/routes.py b/src/ctp_slack_bot/api/routes.py deleted file mode 100644 index c2d489c95b2ae579edede5e1795379644623f02e..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/api/routes.py +++ /dev/null @@ -1,67 +0,0 @@ -from fastapi import APIRouter, Depends, HTTPException, status -from dependency_injector.wiring import inject, Provide -from loguru import logger - -from ctp_slack_bot import Container -from ctp_slack_bot.core import Settings -from ctp_slack_bot.services import SlackService - -router = APIRouter(prefix="/api/v1") - -@router.get("/env", response_model=Settings) -@inject -async def get_env(settings: Settings = Depends(Provide[Container.settings])) -> Settings: - if not settings.DEBUG: - raise HTTPException(status_code=404) - return settings - -# @router.post("/transcripts/analyze", response_model=TranscriptResponse) -# async def analyze_transcript( -# request: TranscriptRequest, -# transcript_service: TranscriptService = Depends(get_transcript_service), -# ): -# """ -# Analyze a Zoom transcript and return insights. -# """ -# logger.info(f"Analyzing transcript: {request.transcript_id}") -# try: -# result = await transcript_service.analyze_transcript(request) -# return result -# except Exception as e: -# logger.error(f"Error analyzing transcript: {e}") -# raise HTTPException( -# status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, -# detail="Failed to analyze transcript", -# ) - - -# @router.post("/slack/message") -# async def send_slack_message( -# channel: str, -# message: str, -# slack_service: SlackService = Depends(get_slack_service), -# ): -# """ -# Send a message to a Slack channel. -# """ -# logger.info(f"Sending message to Slack channel: {channel}") -# try: -# result = await slack_service.send_message(channel, message) -# return {"status": "success", "message_ts": result.get("ts")} -# except Exception as e: -# logger.error(f"Error sending Slack message: {e}") -# raise HTTPException( -# status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, -# detail="Failed to send Slack message", -# ) - - -# @router.post("/slack/webhook", include_in_schema=False) -# async def slack_webhook( -# slack_service: SlackService = Depends(get_slack_service), -# ): -# """ -# Webhook endpoint for Slack events. -# """ -# # This would typically handle Slack verification and event processing -# return {"challenge": "challenge_token"} diff --git a/src/ctp_slack_bot/app.py b/src/ctp_slack_bot/app.py new file mode 100644 index 0000000000000000000000000000000000000000..e6b47b6f21981db814e1173b07354cfaadfbe7e5 --- /dev/null +++ b/src/ctp_slack_bot/app.py @@ -0,0 +1,53 @@ +from asyncio import all_tasks, CancelledError, create_task, current_task, get_running_loop, run +from loguru import logger +from signal import SIGINT, SIGTERM +from typing import Any, Callable + +from ctp_slack_bot.containers import Container +from ctp_slack_bot.core.logging import setup_logging + +async def handle_shutdown_signal() -> None: + logger.info("Received shutdown signal.") + for task in all_tasks(): + if task is not current_task() and not task.done(): + task.cancel() + logger.trace("Cancelled task {}.", task.get_name()) + logger.info("Cancelled all tasks.") + +def create_shutdown_signal_handler() -> Callable[[], None]: + def shutdown_signal_handler() -> None: + create_task(handle_shutdown_signal()) + return shutdown_signal_handler + +async def main() -> None: + # Setup logging. + setup_logging() + logger.info("Starting application…") + + # Set up dependency injection container. + container = Container() + container.wire(packages=['ctp_slack_bot']) + + # Kick off services which should be active from the start. + container.content_ingestion_service() + container.question_dispatch_service() + container.schedule_service() + + # Start the Slack socket mode handler in the background. + socket_mode_handler = container.socket_mode_handler() + slack_bolt_task = create_task(socket_mode_handler.start_async()) + shutdown_signal_handler = create_shutdown_signal_handler() + loop = get_running_loop() + loop.add_signal_handler(SIGINT, shutdown_signal_handler) + loop.add_signal_handler(SIGTERM, shutdown_signal_handler) + try: + logger.info("Starting Slack Socket Mode handler…") + await slack_bolt_task + except CancelledError: + logger.info("Shutting down application…") + finally: + await socket_mode_handler.close_async() + await container.shutdown_resources() + +if __name__ == "__main__": + run(main()) diff --git a/src/ctp_slack_bot/containers.py b/src/ctp_slack_bot/containers.py index 8692407ccd40fdef0f2e907d1e1f4fb301f79aca..924a2d45295b511cf712a001d696154ca1fe8959 100644 --- a/src/ctp_slack_bot/containers.py +++ b/src/ctp_slack_bot/containers.py @@ -1,44 +1,40 @@ from dependency_injector.containers import DeclarativeContainer -from dependency_injector.providers import Factory, Singleton -from openai import OpenAI +from dependency_injector.providers import Resource, Singleton +from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler +from slack_bolt.async_app import AsyncApp from ctp_slack_bot.core.config import Settings -from ctp_slack_bot.db.mongo_db import MongoDB +from ctp_slack_bot.db.mongo_db import MongoDBResource +from ctp_slack_bot.db.repositories import MongoVectorizedChunkRepository from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService +from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService +from ctp_slack_bot.services.google_drive_service import GoogleDriveService +from ctp_slack_bot.services.language_model_service import LanguageModelService from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService -from ctp_slack_bot.services.slack_service import SlackService +from ctp_slack_bot.services.schedule_service import ScheduleServiceResource +from ctp_slack_bot.services.slack_service import SlackServiceResource from ctp_slack_bot.services.vector_database_service import VectorDatabaseService from ctp_slack_bot.services.vectorization_service import VectorizationService class Container(DeclarativeContainer): settings = Singleton(Settings) - event_brokerage_service = Singleton(EventBrokerageService) - - mongo_db = Singleton(MongoDB, settings=settings) - - # Repositories - # transcript_repository = Factory( - # # Your transcript repository class - # db=db - # ) - - open_ai_client = Factory(OpenAI, api_key=settings.provided.OPENAI_API_KEY) # TODO: poor practice to do it this way; create a LanguageModelService that creates an OpenAI client. - + schedule_service = Resource(ScheduleServiceResource, settings=settings) + mongo_db = Resource(MongoDBResource, settings=settings) # TODO: generalize to any database. + vectorized_chunk_repository = Singleton(MongoVectorizedChunkRepository, mongo_db=mongo_db) vector_database_service = Singleton(VectorDatabaseService, settings=settings, mongo_db=mongo_db) - - vectorization_service = Singleton(VectorizationService, settings=settings, client=open_ai_client) - + embeddings_model_service = Singleton(EmbeddingsModelService, settings=settings) + vectorization_service = Singleton(VectorizationService, settings=settings, embeddings_model_service=embeddings_model_service) content_ingestion_service = Singleton(ContentIngestionService, settings=settings, event_brokerage_service=event_brokerage_service, vector_database_service=vector_database_service, vectorization_service=vectorization_service) - context_retrieval_service = Singleton(ContextRetrievalService, settings=settings, vectorization_service=vectorization_service, vector_database_service=vector_database_service) - - answer_retrieval_service = Singleton(AnswerRetrievalService, settings=settings, event_brokerage_service=event_brokerage_service, client=open_ai_client) - + language_model_service = Singleton(LanguageModelService, settings=settings) + answer_retrieval_service = Singleton(AnswerRetrievalService, settings=settings, event_brokerage_service=event_brokerage_service, language_model_service=language_model_service) question_dispatch_service = Singleton(QuestionDispatchService, settings=settings, event_brokerage_service=event_brokerage_service, content_ingestion_service=content_ingestion_service, context_retrieval_service=context_retrieval_service, answer_retrieval_service=answer_retrieval_service) - - slack_service = Singleton(SlackService, settings=settings, event_brokerage_service=event_brokerage_service) + slack_bolt_app = Singleton(AsyncApp, token=settings.provided.SLACK_BOT_TOKEN().get_secret_value()) + slack_service = Resource(SlackServiceResource, event_brokerage_service=event_brokerage_service, slack_bolt_app=slack_bolt_app) + socket_mode_handler = Singleton(lambda _, app, app_token: AsyncSocketModeHandler(app, app_token), slack_service, slack_bolt_app, settings.provided.SLACK_APP_TOKEN().get_secret_value()) + google_drive_service = Singleton(GoogleDriveService, settings=settings) diff --git a/src/ctp_slack_bot/core/__init__.py b/src/ctp_slack_bot/core/__init__.py index bbbda2c170c5e042cec6bb6799a50e646659db21..33059dcd073fba33ae489a6a9c7e049931809d42 100644 --- a/src/ctp_slack_bot/core/__init__.py +++ b/src/ctp_slack_bot/core/__init__.py @@ -1,2 +1 @@ from ctp_slack_bot.core.config import Settings -from ctp_slack_bot.core.logging import logger, setup_logging diff --git a/src/ctp_slack_bot/core/config.py b/src/ctp_slack_bot/core/config.py index b77c1aaa9962ca7bd4655b48bcd3c59123630dcf..cdf59422df397635982ef64dc35323b43477e6b4 100644 --- a/src/ctp_slack_bot/core/config.py +++ b/src/ctp_slack_bot/core/config.py @@ -1,28 +1,29 @@ +from loguru import logger from pydantic import Field, MongoDsn, NonNegativeFloat, NonNegativeInt, PositiveInt, SecretStr from pydantic_settings import BaseSettings, SettingsConfigDict -from typing import Literal, Optional +from types import MappingProxyType +from typing import Literal, Mapping, Optional, Self -class Settings(BaseSettings): # TODO: Strong guarantees of validity, because garbage in = garbage out, and settings flow into all the nooks and crannies +class Settings(BaseSettings): """ Application settings loaded from environment variables. """ - # Application Configuration - DEBUG: bool = False - # Logging Configuration + def __init__(self: Self, **data) -> None: + super().__init__(**data) + logger.debug("Created {}", self.__class__.__name__) + if self.__pydantic_extra__: + logger.warning("Extra unrecognized environment variables were provided: {}", ", ".join(self.__pydantic_extra__)) + + # Logging Configuration ― not actually used to configure Loguru, but defined to prevent warnings about ā€œunknownā€ environment variables LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default_factory=lambda data: "DEBUG" if data.get("DEBUG", False) else "INFO") LOG_FORMAT: Literal["text", "json"] = "json" # APScheduler Configuration - SCHEDULER_TIMEZONE: str = "UTC" - - # API Configuration - API_HOST: str - API_PORT: PositiveInt + SCHEDULER_TIMEZONE: Optional[str] = "UTC" # Slack Configuration SLACK_BOT_TOKEN: SecretStr - SLACK_SIGNING_SECRET: SecretStr SLACK_APP_TOKEN: SecretStr # Vectorization Configuration @@ -31,23 +32,45 @@ class Settings(BaseSettings): # TODO: Strong guarantees of validity, because gar CHUNK_SIZE: PositiveInt CHUNK_OVERLAP: NonNegativeInt TOP_K_MATCHES: PositiveInt - + # MongoDB Configuration MONGODB_URI: SecretStr # TODO: Contemplate switching to MongoDsn type for the main URL, and separate out the credentials to SecretStr variables. MONGODB_NAME: str + SCORE_THRESHOLD: NonNegativeFloat # Hugging Face Configuration - HF_API_TOKEN: Optional[SecretStr] = None + HF_API_TOKEN: Optional[SecretStr] = None # TODO: Currently, this is unused. # OpenAI Configuration - OPENAI_API_KEY: Optional[SecretStr] = None + OPENAI_API_KEY: SecretStr CHAT_MODEL: str MAX_TOKENS: PositiveInt TEMPERATURE: NonNegativeFloat SYSTEM_PROMPT: str + # Google Drive Configuration + GOOGLE_DRIVE_ROOT_ID: str + GOOGLE_PROJECT_ID: str + GOOGLE_PRIVATE_KEY_ID: SecretStr + GOOGLE_PRIVATE_KEY: SecretStr + GOOGLE_CLIENT_ID: str + GOOGLE_CLIENT_EMAIL: str + GOOGLE_AUTH_URI: str = "https://accounts.google.com/o/oauth2/auth" + GOOGLE_TOKEN_URI: str = "https://oauth2.googleapis.com/token" + GOOGLE_AUTH_PROVIDER_CERT_URL: str = "https://www.googleapis.com/oauth2/v1/certs" + GOOGLE_CLIENT_CERT_URL: str = "https://www.googleapis.com/robot/v1/metadata/x509/ctp-slack-bot-714%40voltaic-reducer-294821.iam.gserviceaccount.com" + GOOGLE_UNIVERSE_DOMAIN: str = "googleapis.com" + + # File Monitoring Configuration + FILE_MONITOR_ROOT_PATH: Optional[str] = None + model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", case_sensitive=True, + extra="allow", + frozen=True ) + + def get_extra_environment_variables(self: Self) -> Mapping[str, str]: + return MappingProxyType(self.__pydantic_extra__) diff --git a/src/ctp_slack_bot/core/logging.py b/src/ctp_slack_bot/core/logging.py index 60ae5307af4ea0753d36bcc757a6606f2be52af1..fcb5fd917480876bcf7cc6dce83ed4f5a2560a4c 100644 --- a/src/ctp_slack_bot/core/logging.py +++ b/src/ctp_slack_bot/core/logging.py @@ -1,7 +1,8 @@ -from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord +from logging import __file__ as logging_file, basicConfig, currentframe, getLogger, Handler, INFO, LogRecord, WARNING from loguru import logger +from os import getenv from sys import stderr -from typing import Dict, Union +from typing import Self class InterceptHandler(Handler): """ @@ -11,7 +12,7 @@ class InterceptHandler(Handler): to Loguru, allowing unified logging across the application. """ - def emit(self, record: LogRecord) -> None: + def emit(self: Self, record: LogRecord) -> None: # Get corresponding Loguru level if it exists try: level = logger.level(record.levelname).name @@ -29,22 +30,23 @@ class InterceptHandler(Handler): ) -def setup_logging(container: "Container") -> None: +def setup_logging() -> None: """ Configure logging with Loguru. - This function sets up Loguru as the main logging provider, - configures the log format based on settings, and intercepts - standard logging messages. + This function sets up Loguru as the main logging provider, configures the log format based on environment variables, + and intercepts standard logging messages. """ - from ctp_slack_bot import Container - settings = container.settings() if container else Provide[Container.settings] - # Remove default loguru handler + # Get logger configuration from environment variables. + log_level = getenv("LOG_LEVEL", "INFO") + log_format = getenv("LOG_FORMAT", "text") + + # Remove default loguru handler. logger.remove() - # Determine log format - if settings.LOG_FORMAT == "json": + # Determine log format. + if log_format == "json": log_format = { "time": "{time:YYYY-MM-DD HH:mm:ss.SSS}", "level": "{level}", @@ -62,33 +64,35 @@ def setup_logging(container: "Container") -> None: "{message}" ) - # Add console handler + # Add console handler. logger.add( stderr, format=format_string, - level=settings.LOG_LEVEL, - serialize=(settings.LOG_FORMAT == "json"), + level=log_level, + serialize=(log_format == "json"), backtrace=True, diagnose=True, ) - # Add file handler for non-DEBUG environments - if settings.LOG_LEVEL != "DEBUG": - logger.add( - "logs/app.log", - rotation="10 MB", - retention="1 week", - compression="zip", - format=format_string, - level=settings.LOG_LEVEL, - serialize=(settings.LOG_FORMAT == "json"), - ) + # Add file handler for non-DEBUG environments. + # if log_level != "DEBUG": + # logger.add( + # "/data/app.log", + # rotation="10 MB", + # retention="1 week", + # compression="zip", + # format=format_string, + # level=log_level, + # serialize=(log_format == "json"), + # ) - # Intercept standard logging messages + # Intercept standard logging messages. basicConfig(handlers=[InterceptHandler()], level=0, force=True) - # Update logging levels for some noisy libraries - for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "apscheduler", "pymongo"): + # Update logging levels for some noisy libraries. + for logger_name in ("uvicorn", "uvicorn.error", "fastapi", "httpx", "pymongo"): getLogger(logger_name).setLevel(INFO) + for logger_name in ("apscheduler"): + getLogger(logger_name).setLevel(WARNING) - logger.info(f"Logging configured with level {settings.LOG_LEVEL}") + logger.info(f"Logging configured with level {log_level}") diff --git a/src/ctp_slack_bot/core/response_rendering.py b/src/ctp_slack_bot/core/response_rendering.py deleted file mode 100644 index f91589131dfa9a93c560e0eea52d2d3f111c1697..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/core/response_rendering.py +++ /dev/null @@ -1,13 +0,0 @@ -from json import dumps -from starlette.responses import JSONResponse -from typing import Any, Self - -class PrettyJSONResponse(JSONResponse): - def render(self: Self, content: Any) -> bytes: - return dumps( - content, - ensure_ascii=False, - allow_nan=False, - indent=4, - separators=(", ", ": "), - ).encode("utf-8") diff --git a/src/ctp_slack_bot/db/mongo_db.py b/src/ctp_slack_bot/db/mongo_db.py index 38337572472579cee6c1bde18ec41f4347877e1d..f4b03a80b51a4e3dedc032945bde8c76b16dad25 100644 --- a/src/ctp_slack_bot/db/mongo_db.py +++ b/src/ctp_slack_bot/db/mongo_db.py @@ -1,125 +1,198 @@ -from pymongo import MongoClient, ASCENDING +from asyncio import create_task +from dependency_injector.resources import AsyncResource +from motor.motor_asyncio import AsyncIOMotorClient +from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError +from pymongo.operations import SearchIndexModel from loguru import logger -from pydantic import BaseModel, model_validator, ConfigDict -from typing import Optional, Self, Any +from pydantic import BaseModel, PrivateAttr +from typing import Any, Dict, Optional, Self from ctp_slack_bot.core.config import Settings +from ctp_slack_bot.utils import sanitize_mongo_db_uri class MongoDB(BaseModel): """ - MongoDB connection and initialization class. - Handles connection to MongoDB, database selection, and index creation. + MongoDB connection manager using Motor for async operations. """ - settings: Settings - client: Optional[MongoClient] = None - db: Optional[Any] = None - vector_collection: Optional[Any] = None - initialized: bool = False - - model_config = ConfigDict(arbitrary_types_allowed=True) - - @model_validator(mode='after') - def post_init(self: Self) -> Self: - logger.debug("Created {}", self.__class__.__name__) - return self + _client: PrivateAttr = PrivateAttr() + _db: PrivateAttr = PrivateAttr() - def connect(self): - """ - Connect to MongoDB using connection string from settings. - """ - if self.client is not None: - return + class Config: + arbitrary_types_allowed = True - if not self.settings.MONGODB_URI: - raise ValueError("MONGODB_URI is not set in environment variables") + def __init__(self: Self, **data: Dict[str, Any]) -> None: + super().__init__(**data) + logger.debug("Created {}", self.__class__.__name__) + def connect(self: Self) -> None: + """Initialize MongoDB client with settings.""" try: - # Create MongoDB connection - self.client = MongoClient(self.settings.MONGODB_URI.get_secret_value()) - self.db = self.client[self.settings.MONGODB_NAME] - self.vector_collection = self.db["vector_store"] - logger.info(f"Connected to MongoDB: {self.settings.MONGODB_NAME}") + connection_string = self.settings.MONGODB_URI.get_secret_value() + logger.debug("Connecting to MongoDB using URI: {}", sanitize_mongo_db_uri(connection_string)) + + # Create client with appropriate settings + self._client = AsyncIOMotorClient( + connection_string, + serverSelectionTimeoutMS=5000, + connectTimeoutMS=10000, + socketTimeoutMS=45000, + maxPoolSize=100, + retryWrites=True, + w="majority" + ) + + # Set database + db_name = self.settings.MONGODB_NAME + + self._db = self._client[db_name] + logger.debug("MongoDB client initialized for database: {}", db_name) + except Exception as e: - logger.error(f"Error connecting to MongoDB: {str(e)}") + logger.error("Failed to initialize MongoDB client: {}", e) + self._client = None + self._db = None raise - def initialize(self): + @property + def client(self: Self) -> AsyncIOMotorClient: + """Get the MongoDB client instance.""" + if not hasattr(self, '_client') or self._client is None: + logger.warning("MongoDB client not initialized. Attempting to initialize…") + self.connect() + if not hasattr(self, '_client') or self._client is None: + raise ConnectionError("Failed to initialize MongoDB client.") + return self._client + + @property + def db(self: Self) -> Any: + """Get the MongoDB database instance.""" + if not hasattr(self, '_db') or self._db is None: + logger.warning("MongoDB database not initialized. Attempting to initialize client…") + self.connect() + if not hasattr(self, '_db') or self._db is None: + raise ConnectionError("Failed to initialize MongoDB database.") + return self._db + + async def ping(self: Self) -> bool: + """Check if MongoDB connection is alive.""" + try: + # Get client to ensure we're connected + client = self.client + + # Try a simple ping command + await client.admin.command('ping') + logger.debug("MongoDB connection is active!") + return True + except (ConnectionFailure, ServerSelectionTimeoutError) as e: + logger.error("MongoDB connection failed: {}", e) + return False + except Exception as e: + logger.error("Unexpected error during MongoDB ping: {}", e) + return False + + async def get_collection(self: Self, name: str) -> Any: """ - Initialize MongoDB with required collections and indexes. + Get a collection by name with validation. + Creates the collection if it doesn't exist. """ - if self.initialized: - return - - if not self.client: - self.connect() - + # First ensure we can connect at all + if not await self.ping(): + logger.error("Cannot get collection '{}' because a MongoDB connection is not available.", name) + raise ConnectionError("MongoDB connection is not available.") + try: - # Create vector index for similarity search - self.create_vector_index() - self.initialized = True - logger.info("MongoDB initialized successfully") + # Get all collection names to check if this one exists + logger.debug("Checking if collection '{}' exists…", name) + collection_names = await self.db.list_collection_names() + + if name not in collection_names: + logger.info("Collection '{}' does not exist. Creating it…", name) + # Create the collection + await self.db.create_collection(name) + logger.debug("Successfully created collection: {}", name) + else: + logger.debug("Collection '{}' already exists!", name) + + # Get and return the collection + collection = self.db[name] + return collection except Exception as e: - logger.error(f"Error initializing MongoDB: {str(e)}") + logger.error("Error accessing collection '{}': {}", name, e) raise - def create_vector_index(self): + async def create_indexes(self: Self, collection_name: str) -> None: """ - Create vector index for similarity search using MongoDB Atlas Vector Search. + Create a vector search index on a collection. + + Args: + collection_name: Name of the collection """ + collection = await self.get_collection(collection_name) + try: - # Check if index already exists - existing_indexes = list(self.vector_collection.list_indexes()) - index_names = [index.get('name') for index in existing_indexes] - - if "vector_index" not in index_names: - # Create vector search index - index_definition = { - "mappings": { - "dynamic": True, - "fields": { - "embedding": { - "dimensions": self.settings.VECTOR_DIMENSION, - "similarity": "cosine", - "type": "knnVector" - } - } - } - } - - # Create the index - self.db.command({ - "createIndexes": self.vector_collection.name, - "indexes": [ + # Create search index model using MongoDB's recommended approach + search_index_model = SearchIndexModel( + definition={ + "fields": [ { - "name": "vector_index", - "key": {"embedding": "vector"}, - "weights": {"embedding": 1}, - "vectorSearchOptions": index_definition + "type": "vector", + "path": "embedding", + "numDimensions": self.settings.VECTOR_DIMENSION, + "similarity": "cosine", + "quantization": "scalar" } ] - }) - - # Create additional metadata indexes for filtering - self.vector_collection.create_index([("metadata.source", ASCENDING)]) - self.vector_collection.create_index([("metadata.timestamp", ASCENDING)]) - - logger.info("Vector search index created") + }, + name=f"{collection_name}_vector_index", + type="vectorSearch" + ) + + # Create the search index using the motor collection + result = await collection.create_search_index(search_index_model) + logger.info("Vector search index '{}' created for collection {}.", result, collection_name) + + except Exception as e: + if "command not found" in str(e).lower(): + logger.warning("Vector search not supported by this MongoDB instance. Some functionality may be limited.") + # Create a fallback standard index on embedding field + await collection.create_index("embedding") + logger.info("Created standard index on 'embedding' field as fallback.") + else: + logger.error("Failed to create vector index: {}", e) + raise + + async def close(self: Self) -> None: + """Close MongoDB connection.""" + if self._client: + self._client.close() + logger.info("Closed MongoDB connection.") + self._client = None + self._db = None + +class MongoDBResource(AsyncResource): + async def init(self: Self, settings: Settings) -> MongoDB: + logger.info("Initializing MongoDB connection for database: {}", settings.MONGODB_NAME) + mongo_db = MongoDB(settings=settings) + mongo_db.connect() + await self._test_connection(mongo_db) + return mongo_db + + async def _test_connection(self: Self, mongo_db: MongoDB) -> None: + """Test MongoDB connection and log the result.""" + try: + is_connected = await mongo_db.ping() + if is_connected: + logger.info("MongoDB connection test successful!") else: - logger.info("Vector search index already exists") - + logger.error("MongoDB connection test failed!") except Exception as e: - logger.error(f"Error creating vector index: {str(e)}") + logger.error("Error testing MongoDB connection: {}", e) raise - def close(self): - """ - Close MongoDB connection. - """ - if self.client: - self.client.close() - self.client = None - self.db = None - self.vector_collection = None - self.initialized = False - logger.info("MongoDB connection closed") + async def shutdown(self: Self, mongo_db: MongoDB) -> None: + """Close MongoDB connection on shutdown.""" + try: + await mongo_db.close() + except Exception as e: + logger.error("Error closing MongoDB connection: {}", e) diff --git a/src/ctp_slack_bot/db/repositories/__init__.py b/src/ctp_slack_bot/db/repositories/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..6fc17ef43a46bfd014826324c4e88357fc454a14 100644 --- a/src/ctp_slack_bot/db/repositories/__init__.py +++ b/src/ctp_slack_bot/db/repositories/__init__.py @@ -0,0 +1,2 @@ +from ctp_slack_bot.db.repositories.mongo_db_vectorized_chunk_repository import MongoVectorizedChunkRepository +from ctp_slack_bot.db.repositories.vectorized_chunk_repository import VectorizedChunkRepository diff --git a/src/ctp_slack_bot/db/repositories/mongo_db_vectorized_chunk_repository.py b/src/ctp_slack_bot/db/repositories/mongo_db_vectorized_chunk_repository.py new file mode 100644 index 0000000000000000000000000000000000000000..ac01d06f131e5895f88c14113fe3c07fe2bfe25d --- /dev/null +++ b/src/ctp_slack_bot/db/repositories/mongo_db_vectorized_chunk_repository.py @@ -0,0 +1,65 @@ +from typing import List, Optional, Dict, Any +import pymongo +from bson import ObjectId + +from ctp_slack_bot.db import MongoDB +from ctp_slack_bot.db.repositories.vectorized_chunk_repository import VectorizedChunkRepository +from ctp_slack_bot.models.base import VectorizedChunk + +class MongoVectorizedChunkRepository(VectorizedChunkRepository): + """MongoDB implementation of VectorizedChunkRepository.""" + + def __init__(self, mongo_db: MongoDB): + self.mongo_db = mongo_db + self.collection = self.mongo_db.db.get_collection("vectorized_chunks") + + # Create indexes for efficient queries + self.collection.create_index("chunk_id") + self.collection.create_index("parent_id") + + async def find_by_id(self, id: str) -> Optional[VectorizedChunk]: + doc = await self.collection.find_one({"_id": ObjectId(id)}) + return self._map_to_entity(doc) if doc else None + + async def find_all(self) -> List[VectorizedChunk]: + cursor = self.collection.find({}) + return [self._map_to_entity(doc) async for doc in cursor] + + async def find_by_parent_id(self, parent_id: str) -> List[VectorizedChunk]: + cursor = self.collection.find({"parent_id": parent_id}) + return [self._map_to_entity(doc) async for doc in cursor] + + async def save(self, chunk: VectorizedChunk) -> VectorizedChunk: + doc = self._map_to_document(chunk) + + if "_id" in doc and doc["_id"]: + # Update existing document + await self.collection.replace_one({"_id": doc["_id"]}, doc) + else: + # Insert new document + result = await self.collection.insert_one(doc) + doc["_id"] = result.inserted_id + + return self._map_to_entity(doc) + + async def delete(self, id: str) -> bool: + result = await self.collection.delete_one({"_id": ObjectId(id)}) + return result.deleted_count > 0 + + async def find_by_metadata(self, metadata_query: Dict[str, Any]) -> List[VectorizedChunk]: + # Convert the metadata query to MongoDB query format + query = {f"metadata.{k}": v for k, v in metadata_query.items()} + cursor = self.collection.find(query) + return [self._map_to_entity(doc) async for doc in cursor] + + def _map_to_document(self, chunk: VectorizedChunk) -> Dict[str, Any]: + """Convert a VectorizedChunk to a MongoDB document.""" + doc = chunk.model_dump() + # Handle any special conversions needed + return doc + + def _map_to_entity(self, doc: Dict[str, Any]) -> VectorizedChunk: + """Convert a MongoDB document to a VectorizedChunk.""" + if "_id" in doc: + doc["id"] = str(doc.pop("_id")) + return VectorizedChunk(**doc) diff --git a/src/ctp_slack_bot/db/repositories/vectorized_chunk_repository.py b/src/ctp_slack_bot/db/repositories/vectorized_chunk_repository.py new file mode 100644 index 0000000000000000000000000000000000000000..c396ba599bf24acf99b57d29a9d62dc29cd39f29 --- /dev/null +++ b/src/ctp_slack_bot/db/repositories/vectorized_chunk_repository.py @@ -0,0 +1,30 @@ +from typing import List, Optional, Dict, Any + +from ctp_slack_bot.models.base import VectorizedChunk + +class VectorizedChunkRepository: + """Repository interface for VectorizedChunk entities.""" + + async def find_by_id(self, id: str) -> Optional[VectorizedChunk]: + """Find a chunk by its ID.""" + pass + + async def find_all(self) -> List[VectorizedChunk]: + """Find all chunks.""" + pass + + async def find_by_parent_id(self, parent_id: str) -> List[VectorizedChunk]: + """Find chunks by parent document ID.""" + pass + + async def save(self, chunk: VectorizedChunk) -> VectorizedChunk: + """Save a chunk to the database.""" + pass + + async def delete(self, id: str) -> bool: + """Delete a chunk by its ID.""" + pass + + async def find_by_metadata(self, metadata_query: Dict[str, Any]) -> List[VectorizedChunk]: + """Find chunks by metadata criteria.""" + pass diff --git a/src/ctp_slack_bot/enums.py b/src/ctp_slack_bot/enums.py new file mode 100644 index 0000000000000000000000000000000000000000..894019ed00852c44cc4ffa31f09ca79f70094a17 --- /dev/null +++ b/src/ctp_slack_bot/enums.py @@ -0,0 +1,6 @@ +from enum import auto, StrEnum + +class EventType(StrEnum): + INCOMING_CONTENT = auto() + INCOMING_SLACK_MESSAGE = auto() + OUTGOING_SLACK_RESPONSE = auto() diff --git a/src/ctp_slack_bot/models/__init__.py b/src/ctp_slack_bot/models/__init__.py index e56314b445dec25c42cf0f8f7158226c822defa1..5f23a44769081fa0f05a4a1e7c02d5f63fe8bd09 100644 --- a/src/ctp_slack_bot/models/__init__.py +++ b/src/ctp_slack_bot/models/__init__.py @@ -1,4 +1,4 @@ -from ctp_slack_bot.models.base import Content, Ingestible, Metadata -from ctp_slack_bot.models.content import RetreivedContext -from ctp_slack_bot.models.slack import SlackMessage -from ctp_slack_bot.models.vector_query import VectorQuery +from ctp_slack_bot.models.base import Chunk, Content, VectorizedChunk, VectorQuery +from ctp_slack_bot.models.google_drive import GoogleDriveMetadata +from ctp_slack_bot.models.slack import SlackEventPayload, SlackMessage, SlackReaction, SlackResponse, SlackUserTimestampPair +from ctp_slack_bot.models.webvtt import WebVTTContent, WebVTTFrame diff --git a/src/ctp_slack_bot/models/base.py b/src/ctp_slack_bot/models/base.py index 1de973730b26a219f2d42c0a0e9408ee9b475dcf..7ddf13da24dd22b3951bfb5f45e15a1389df5f2b 100644 --- a/src/ctp_slack_bot/models/base.py +++ b/src/ctp_slack_bot/models/base.py @@ -1,61 +1,58 @@ from abc import ABC, abstractmethod -from datetime import datetime -from pydantic import BaseModel, Field, validator -from typing import Dict, List, Optional, Union, Any, ClassVar -import hashlib -import json +from pydantic import BaseModel, ConfigDict, Field +from typing import Any, final, Mapping, Self, Sequence, Optional -class Metadata(BaseModel): - """A class representing metadata about content.""" +class Chunk(BaseModel): + """A class representing a chunk of content.""" - id: str # The content’s identity consistent across modifications - modification_time: datetime # The content’s modification for detection of alterations - hash: str # The content’s hash for detection of alterations + text: str # The text representation + parent_id: str # The source content’s identity + chunk_id: str # This chunk’s identity—unique within the source content + metadata: Mapping[str, Any] + model_config = ConfigDict(frozen=True) -class Content(BaseModel): - """A class representing ingested content.""" - metadata: Metadata +@final +class VectorQuery(BaseModel): + """Model for vector database similarity search queries. + + Attributes: + query_text: The text to be vectorized and used for similarity search + k: Number of similar documents to retrieve + score_threshold: Minimum similarity score threshold for inclusion in results + filter_metadata: Optional filters for metadata fields + """ + + query_embeddings: Sequence[float] + k: int + score_threshold: float = Field(default=0.7) + filter_metadata: Optional[Mapping[str, Any]] = None + model_config = ConfigDict(frozen=True) + + +@final +class VectorizedChunk(Chunk): + """A class representing a vectorized chunk of content.""" + embedding: Sequence[float] # The vector representation -class Ingestible(ABC, BaseModel): - """An abstract base class for ingestible content.""" - metadata: Metadata +class Content(ABC, BaseModel): + """An abstract base class for all types of content.""" + + model_config = ConfigDict(frozen=True) - @property @abstractmethod - def content(self) -> Content: - """ - Return content ready for vectorization. - - This could be: - - A single string - - A list of strings (pre-chunked) - - A more complex structure that can be recursively processed - """ + def get_id(self: Self) -> str: + pass + + @abstractmethod + def get_chunks(self: Self) -> Sequence[Chunk]: + pass + + @abstractmethod + def get_metadata(self: Self) -> Mapping[str, Any]: pass - - def get_chunks(self) -> List[str]: - """ - Split content into chunks suitable for vectorization. - Override this in subclasses for specialized chunking logic. - """ - content = self.content - if isinstance(content, str): - # Simple chunking by character count - return [content[i:i+self.chunk_size] - for i in range(0, len(content), self.chunk_size)] - elif isinstance(content, list): - # Content is already chunked - return content - else: - raise ValueError(f"Unsupported content type: {type(content)}") - - @property - def key(self) -> str: - """Convenience accessor for the metadata key.""" - return self.metadata.key diff --git a/src/ctp_slack_bot/models/content.py b/src/ctp_slack_bot/models/content.py deleted file mode 100644 index 57d4e92b4a8bfcbf361fd327a8fe31a703fe12c3..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/models/content.py +++ /dev/null @@ -1,19 +0,0 @@ -from pydantic import BaseModel, Field -from typing import Optional, List, Dict, Any -from ctp_slack_bot.models.slack import SlackMessage - -class RetreivedContext(BaseModel): - """Represents a the context of a question from Slack returned from the Vector Store Database. - - contextual_text: The text that is relevant to the question. - metadata_source: The source of the contextual text. - similarity_score: The similarity score of the contextual text to the question. - - in_reation_to_question: OPTINAL: The question that the contextual text is related to. - """ - contextual_text: str - metadata_source: str - similarity_score: float - - said_by: str = Optional[None] - in_reation_to_question: str = Optional[None] diff --git a/src/ctp_slack_bot/models/google_drive.py b/src/ctp_slack_bot/models/google_drive.py new file mode 100644 index 0000000000000000000000000000000000000000..b7b9fed82b078abc37297b4323ad9b18ef9fc87c --- /dev/null +++ b/src/ctp_slack_bot/models/google_drive.py @@ -0,0 +1,25 @@ +from datetime import datetime +from pydantic import BaseModel, ConfigDict +from typing import Self + +from ctp_slack_bot.models import FileContent + + +class GoogleDriveMetadata(BaseModel): + """Represents Google Drive file or folder metadata.""" + + id: str + name: str + modified_time: datetime + mime_type: str + folder_path: str + + model_config = ConfigDict(frozen=True) + + @classmethod + def from_folder_path_and_dict(cls: type["GoogleDriveMetadata"], folder_path: str, dict: dict) -> Self: + id = dict["id"] + name = dict["name"] + modified_time = datetime.fromisoformat(dict["modifiedTime"]) + mime_type = dict["mimeType"] + return GoogleDriveMetadata(id=id, name=name, modified_time=modified_time, mime_type=mime_type, folder_path=folder_path) diff --git a/src/ctp_slack_bot/models/slack.py b/src/ctp_slack_bot/models/slack.py index dff4f86a2228d6e31fe669bffc568041f9acab4e..16607959734e073db5938ddf1531852a2a737741 100644 --- a/src/ctp_slack_bot/models/slack.py +++ b/src/ctp_slack_bot/models/slack.py @@ -1,16 +1,84 @@ -from pydantic import BaseModel, Field -from typing import Optional, List, Dict, Any +from datetime import datetime +from json import dumps +from pydantic import BaseModel, ConfigDict, PositiveInt, PrivateAttr +from types import MappingProxyType +from typing import Any, Dict, Literal, Mapping, Optional, Self, Sequence -class SlackMessage(BaseModel): +from ctp_slack_bot.models.base import Chunk, Content + +class SlackEventPayload(BaseModel): + """Represents a general event payload from Slack.""" + type: str + event_ts: str + + model_config = ConfigDict(extra='allow', frozen=True) + +class SlackEvent(BaseModel): + """Represents a general event from Slack.""" + + token: str + team_id: str + api_app_id: str + event: SlackEventPayload + type: str + event_id: str + event_time: int + authed_users: Sequence[str] + + model_config = ConfigDict(frozen=True) + +class SlackUserTimestampPair(BaseModel): + """Represents a Slack user-timestamp pair.""" + + user: str + ts: str + + model_config = ConfigDict(frozen=True) + +class SlackReaction(BaseModel): + """Represents a Slack reaction information.""" + + name: str + count: PositiveInt + users: Sequence[str] + + model_config = ConfigDict(frozen=True) + +class SlackMessage(Content): """Represents a message from Slack after adaptation.""" - channel_id: str - user_id: str - text: str + + type: Literal["app_mention", "message"] + subtype: Optional[str] = None + channel: str + channel_type: Optional[str] = None + user: Optional[str] = None + bot_id: Optional[str] = None thread_ts: Optional[str] = None - timestamp: str - is_question: bool = False - - @property - def key(self) -> str: + text: str + ts: str + edited: Optional[SlackUserTimestampPair] = None + event_ts: str + deleted_ts: Optional[str] = None + hidden: bool = False + is_starred: Optional[bool] = None + pinned_to: Optional[Sequence[str]] = None + reactions: Optional[Sequence[SlackReaction]] = None + + def get_id(self: Self) -> str: """Unique identifier for this message.""" - return f"slack:{self.channel_id}:{self.timestamp}" + return f"slack-message:{self.channel}:{self.ts}" + + def get_chunks(self: Self) -> Sequence[Chunk]: + return (Chunk(text=self.text, parent_id=self.get_id(), chunk_id="", metadata=self.get_metadata()), ) + + def get_metadata(self: Self) -> Mapping[str, Any]: + return MappingProxyType({ + "modificationTime": datetime.fromtimestamp(float(self.ts)) + }) + +class SlackResponse(BaseModel): # TODO: This should also be based on Content as it is a SlackMessage―just not one for which we know the identity yet. + """Represents a response message to be sent to Slack.""" + + text: str + channel: Optional[str] + thread_ts: Optional[str] = None diff --git a/src/ctp_slack_bot/models/vector_query.py b/src/ctp_slack_bot/models/vector_query.py deleted file mode 100644 index b54f0f9d65d880a62e6b452ab5955924c2e5e686..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/models/vector_query.py +++ /dev/null @@ -1,16 +0,0 @@ -from pydantic import BaseModel, Field, validator -from typing import Optional, List, Dict, Any - -class VectorQuery(BaseModel): - """Model for vector database similarity search queries. - - Attributes: - query_text: The text to be vectorized and used for similarity search - k: Number of similar documents to retrieve - score_threshold: Minimum similarity score threshold for inclusion in results - filter_metadata: Optional filters for metadata fields - """ - query_text: str - k: int - score_threshold: float = Field(default=0.7) - filter_metadata: Optional[Dict[str, Any]] = None diff --git a/src/ctp_slack_bot/models/webvtt.py b/src/ctp_slack_bot/models/webvtt.py new file mode 100644 index 0000000000000000000000000000000000000000..906528084f018e34f003864d8dbbb166ff558526 --- /dev/null +++ b/src/ctp_slack_bot/models/webvtt.py @@ -0,0 +1,73 @@ +from datetime import datetime, timedelta +from io import BytesIO +from itertools import starmap +from json import dumps +from more_itertools import windowed +from pydantic import BaseModel, ConfigDict, Field, PositiveInt, PrivateAttr +from types import MappingProxyType +from typing import Any, Dict, Literal, Mapping, Optional, Self, Sequence +from webvtt import Caption, WebVTT + +from ctp_slack_bot.models.base import Chunk, Content + +CHUNK_FRAMES_OVERLAP = 1 +CHUNK_FRAMES_WINDOW = 5 +SPEAKER_SPEECH_TEXT_SEPARATOR = ": " + +class WebVTTFrame(BaseModel): + """Represents a WebVTT frame""" + + identifier: str + start: timedelta + end: timedelta + speaker: Optional[str] = None + speech: str + + model_config = ConfigDict(frozen=True) + + @classmethod + def from_webvtt_caption(cls: type["WebVTTFrame"], index: int, caption: Caption) -> Self: + identifier = caption.identifier if caption.identifier else str(index) + start = timedelta(**caption.start_time.__dict__) + end = timedelta(**caption.end_time.__dict__) + match caption.text.split(SPEAKER_SPEECH_TEXT_SEPARATOR, 1): + case [speaker, speech]: + return cls(identifier=identifier, start=start, end=end, speaker=speaker, speech=speech) + case [speech]: + return cls(identifier=identifier, start=start, end=end, speech=speech) + + +class WebVTTContent(Content): + """Represents parsed WebVTT content.""" + + id: str + metadata: Mapping[str, Any] = Field(default_factory=dict) + frames: Sequence[WebVTTFrame] + + def get_id(self: Self) -> str: + return self.id + + def get_chunks(self: Self) -> Sequence[Chunk]: + windows = (tuple(filter(None, window)) + for window + in windowed(self.frames, CHUNK_FRAMES_WINDOW, step=CHUNK_FRAMES_WINDOW-CHUNK_FRAMES_OVERLAP)) + return tuple(Chunk(text="\n\n".join(": ".join(filter(None, (frame.speaker, frame.speech))) + for frame + in frames), + parent_id=self.get_id(), + chunk_id=f"{frames[0].identifier}-{frames[-1].identifier}", + metadata={ + "start": str(frames[0].start), # TODO: This is a harder problem: to get the offsets to become real datetimes so that they can be queryable using MongoDB. + "end": str(frames[-1].end), + "speakers": [frame.speaker for frame in frames if frame.speaker] + }) + for frames + in windows) + + def get_metadata(self: Self) -> Mapping[str, Any]: + return MappingProxyType(self.metadata) + + @classmethod + def from_bytes(cls: type["WebVTTContent"], id: str, metadata: Mapping[str, Any], buffer: bytes) -> Self: + frames = tuple(starmap(WebVTTFrame.from_webvtt_caption, enumerate(WebVTT.from_buffer(BytesIO(buffer)).captions, 1))) + return WebVTTContent(id=id, metadata=MappingProxyType(metadata), frames=frames) diff --git a/src/ctp_slack_bot/services/GOOGLE_DRIVE_README.md b/src/ctp_slack_bot/services/GOOGLE_DRIVE_README.md deleted file mode 100644 index dfb7c44ebeb1d4318983479691a36f075cd7d222..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/services/GOOGLE_DRIVE_README.md +++ /dev/null @@ -1,228 +0,0 @@ -# Google Drive Access Module - -This Python module provides a simplified way to interact with Google Drive, focusing on easy access to files in nested folders using path-like syntax. It handles various Google file formats and provides comprehensive metadata for files and folders. - -## Features - -- **Path-based folder access**: Access files using simple paths like `folder1/folder2/folder3` -- **Efficient caching**: Folder IDs are cached to improve performance -- **Comprehensive metadata**: Get detailed information about files and folders -- **Read various file types**: - - Text files - - Google Docs - - VTT files -- **Robust folder finding**: Works with exact and partial name matching -- **Simple API**: Designed for ease of use with minimal code - -## Setup Instructions - -### 1. Create a Google Cloud Project - -1. Go to the [Google Cloud Console](https://console.cloud.google.com/) -2. Click on the project dropdown at the top of the page and select "New Project" -3. Enter a project name and click "Create" -4. Once created, make sure your new project is selected in the dropdown - -### 2. Enable the Google Drive API - -1. In the Google Cloud Console, navigate to "APIs & Services" > "Library" in the left sidebar -2. Search for "Google Drive API" in the search bar -3. Click on "Google Drive API" in the results -4. Click the "Enable" button - -### 3. Create OAuth Credentials - -1. In the Google Cloud Console, go to "APIs & Services" > "Credentials" in the left sidebar -2. Click "Create Credentials" at the top and select "OAuth client ID" -3. If prompted to configure the OAuth consent screen: - - Choose "External" user type (or "Internal" if you're in a Google Workspace organization) - - Fill in the required information (App name, User support email, Developer contact email) - - Click "Save and Continue" - - Add the following scopes: - - `.../auth/drive` (Full access to Google Drive) - - Click "Save and Continue" and complete the registration -4. Return to the "Create OAuth client ID" screen -5. Select "Desktop application" as the Application type -6. Enter a name for your OAuth client (e.g., "Google Drive Access Desktop") -7. Click "Create" -8. Download the JSON file (this is your `client_secret.json`) - -### 4. Project Setup - -1. Setup a virtual environment and install dependencies: -```bash -python -m venv venv -source venv/bin/activate # On Windows: venv\Scripts\activate -pip install -r requirements.txt -``` - -2. Place your credentials: - - Create a `credentials` directory in your project root - - Move the downloaded OAuth client JSON file to the `credentials` directory - - Rename it to `client_secret.json` - -### 5. Authentication Process - -When you run the application for the first time: -1. A browser window will open automatically -2. You'll be asked to sign in to your Google account -3. You'll see a consent screen asking for permission to access your Google Drive -4. After granting permission, the browser will display a success message -5. The application will save a token file (`token.pickle`) in the credentials directory for future use - -## Usage Guide - -The `EasyGoogleDrive` class provides several methods to interact with Google Drive. Here's how to use the core functionality: - -### Basic Usage - -```python -from google_drive_access import EasyGoogleDrive - -# Initialize the Google Drive client -drive = EasyGoogleDrive() - -# Example folder path - replace with your actual folder path -folder_path = "Spring-2025-BAI" -subfolder_path = "Spring-2025-BAI/transcripts" -``` - -### Listing Folders - -```python -# List folders in a directory -folders = drive.get_folders_in_folder(folder_path) - -# Access folder properties -for folder in folders: - print(f"Folder: {folder['name']}") - print(f" Created: {folder.get('createdTimeFormatted', 'Unknown')}") - print(f" Modified: {folder.get('modifiedTimeFormatted', 'Unknown')}") -``` - -### Listing Files - -```python -# List files in a directory -files = drive.get_files_in_folder(subfolder_path) - -# Access file properties -for file in files: - print(f"File: {file['name']}") - print(f" Type: {file.get('fileType', 'Unknown')}") - print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}") - print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}") - print(f" Size: {file.get('sizeFormatted', 'Unknown')}") -``` - -### Getting a Specific File - -```python -# Get a specific file with metadata -file = drive.get_file("example.txt", subfolder_path, include_metadata=True) - -if file: - print(f"File: {file['name']}") - print(f" Type: {file.get('fileType', 'Unknown')}") - print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}") - print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}") - print(f" Size: {file.get('sizeFormatted', 'Unknown')}") -``` - -### Getting All Items in a Folder - -```python -# Get all items (files and folders) in a folder -all_items = drive.get_all_files_in_folder(folder_path) - -# Access item properties -for item in all_items: - item_type = "Folder" if item.get('mimeType') == drive.MIME_TYPES['folder'] else item.get('fileType', 'Unknown') - print(f"Item: {item['name']} ({item_type})") -``` - -### Checking if a File Exists - -```python -# Check if a file exists -exists = drive.file_exists("example.txt", subfolder_path) -print(f"File exists: {exists}") -``` - -### Getting File Modified Time - -```python -# Get file modified time -modified_time = drive.get_file_modified_time("example.txt", subfolder_path) -if modified_time: - print(f"Last modified: {modified_time}") -``` - -### Reading File Content - -```python -# Get file with content -file_with_content = drive.get_file("example.txt", subfolder_path, include_content=True) - -if file_with_content and 'file_content' in file_with_content: - content = file_with_content['file_content'] - if content: - print(f"Content: {content[:100]}...") # Print first 100 characters -``` - -## Complete Example - -For a complete example of how to use the `EasyGoogleDrive` class, see the `basic_usage.py` file included in this package. This file demonstrates all the core functionality with practical examples. - -## Key Concepts - -### Path-based Folder Access - -The module uses a simple path-like syntax to access folders: - -```python -# Access a deeply nested folder -folder_path = "folder1/folder2/folder3" -files = drive.get_files_in_folder(folder_path) -``` - -This makes it much easier to work with nested folder structures compared to using folder IDs. - -### Metadata Fields - -The module provides comprehensive metadata for files and folders, including: - -- **Creation and modification dates**: Both as datetime objects and formatted strings -- **File size**: Both in bytes and human-readable format (KB, MB, GB) -- **File type**: Simplified type based on MIME type -- **Owner information**: Names and email addresses of file owners -- **Sharing status**: Whether the file is shared -- **Web links**: Direct links to view the file in a browser - -## Error Handling - -The module includes comprehensive error handling: - -- **Authentication errors**: Clear messages when credentials are missing or invalid -- **Folder not found**: Helpful messages when a folder in the path cannot be found -- **File not found**: Attempts partial name matching before giving up -- **Decoding errors**: Handles issues with file content encoding - -## Dependencies - -- **Required**: - - google-auth-oauthlib - - google-auth-httplib2 - - google-api-python-client - - python-dateutil - -## Security Notes - -- Never commit your `client_secret.json` or token files to version control -- Add `credentials/` to your `.gitignore` file -- Keep your credentials secure and don't share them -- For production applications, consider using service accounts with the minimum required permissions - -## Contributing - -Feel free to contribute to this project by submitting issues or pull requests. diff --git a/src/ctp_slack_bot/services/__init__.py b/src/ctp_slack_bot/services/__init__.py index 1afe81850a93156a80252865ae590a4b9036d72f..a2adc9da725a74bdafedf47de9d7c0ec89749514 100644 --- a/src/ctp_slack_bot/services/__init__.py +++ b/src/ctp_slack_bot/services/__init__.py @@ -1,7 +1,10 @@ from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService +from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService +from ctp_slack_bot.services.google_drive_service import GoogleDriveService +from ctp_slack_bot.services.language_model_service import LanguageModelService from ctp_slack_bot.services.question_dispatch_service import QuestionDispatchService from ctp_slack_bot.services.slack_service import SlackService from ctp_slack_bot.services.vector_database_service import VectorDatabaseService diff --git a/src/ctp_slack_bot/services/answer_retrieval_service.py b/src/ctp_slack_bot/services/answer_retrieval_service.py index 5fd43e84e57be149525f889d67c5f211cd1de7a4..0801a4c79a45e1592a9408c5c1bc4727e7ea6d0d 100644 --- a/src/ctp_slack_bot/services/answer_retrieval_service.py +++ b/src/ctp_slack_bot/services/answer_retrieval_service.py @@ -1,65 +1,34 @@ -# from asyncio import create_task from loguru import logger -from openai import OpenAI -from pydantic import BaseModel, model_validator -from typing import List, Optional, Self, Tuple +from pydantic import BaseModel +from typing import Collection, Self from ctp_slack_bot.core import Settings -from ctp_slack_bot.models import RetreivedContext, SlackMessage +from ctp_slack_bot.enums import EventType +from ctp_slack_bot.models import Chunk, SlackMessage, SlackResponse from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService +from ctp_slack_bot.services.language_model_service import LanguageModelService -class AnswerRetrievalService(BaseModel): # TODO: this should separate the OpenAI backend out into its own service. +class AnswerRetrievalService(BaseModel): """ - Service for language model operations. + Service for context-based answer retrievel from a language model. """ settings: Settings event_brokerage_service: EventBrokerageService - client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic. + language_model_service: LanguageModelService class Config: - arbitrary_types_allowed = True + frozen=True - @model_validator(mode='after') - def post_init(self: Self) -> Self: + def __init__(self: Self, **data) -> None: + super().__init__(**data) logger.debug("Created {}", self.__class__.__name__) - return self - def generate_answer(self, question: SlackMessage, context: List[RetreivedContext]) -> str: - """Generate a response using OpenAI's API with retrieved context. - - Args: - question (str): The user's question - context (List[RetreivedContext]): List of RetreivedContext - - Returns: - str: Generated answer - """ - # Prepare context string from retrieved chunks - context_str = "" - for c in context: - context_str += f"{c.contextual_text}\n" - - - # logger.info(f"Generating response for question: {question}") - # logger.info(f"Using {len(context)} context chunks") - - # Create messages for the chat completion - messages = [ - {"role": "system", "content": settings.SYSTEM_PROMPT}, - {"role": "user", "content": - f"""Student Auestion: {question.text} - Context from class materials and transcripts: {context_str} - Please answer the Student Auestion based on the Context from class materials and transcripts. If the context doesn't contain relevant information, acknowledge that and suggest asking the professor."""} - ] - - # Generate response - response = self.client.chat.completions.create( - model=settings.CHAT_MODEL, - messages=messages, - max_tokens=settings.MAX_TOKENS, - temperature=settings.TEMPERATURE - ) - - return response.choices[0].message.content + async def push(self: Self, question: SlackMessage, context: Collection[Chunk]) -> None: + channel_to_respond_to = question.channel + thread_to_respond_to = question.thread_ts if question.thread_ts else question.ts + answer = self.language_model_service.answer_question(question.text, context) + logger.debug("Pushing response to channel {} and thread {}: {}", channel_to_respond_to, thread_to_respond_to, answer) + slack_response = SlackResponse(text=answer, channel=channel_to_respond_to, thread_ts=thread_to_respond_to) + await self.event_brokerage_service.publish(EventType.OUTGOING_SLACK_RESPONSE, slack_response) diff --git a/src/ctp_slack_bot/services/application_database_service.py b/src/ctp_slack_bot/services/application_database_service.py new file mode 100644 index 0000000000000000000000000000000000000000..a581b0728910723fe204f7aac238a860114eca2e --- /dev/null +++ b/src/ctp_slack_bot/services/application_database_service.py @@ -0,0 +1,29 @@ +from datetime import datetime +from loguru import logger +from pydantic import BaseModel, PrivateAttr +from typing import Iterable, Mapping, Self + +from ctp_slack_bot.core import Settings +from ctp_slack_bot.db import MongoDB + + +class ApplicationDatabaseService(BaseModel): + """Service for retrieving and persisting application state.""" + + settings: Settings + mongo_db: MongoDB # TODO: This should be replaced following the repository pattern―one repository class per collection. + + class Config: + frozen=True + + def __init__(self: Self, **data) -> None: + super().__init__(**data) + logger.debug("Created {}", self.__class__.__name__) + + async def get_last_modification_times_by_file_paths(self: Self, file_paths: Iterable[str]) -> Mapping[str, datetime]: + """Retrieve the last modification time for each file path.""" + raise NotImplementedError() # TODO + + async def set_last_modification_time_by_file_path(self: Self, file_path: str, modification_time: datetime) -> None: + """Set the last modification time for a file path.""" + raise NotImplementedError() # TODO diff --git a/src/ctp_slack_bot/services/content_ingestion_service.py b/src/ctp_slack_bot/services/content_ingestion_service.py index 6e10f5516644394532780c0db5e4f70c5ea9f7e6..e162c1fee30ff32156be09db68c9702177e73824 100644 --- a/src/ctp_slack_bot/services/content_ingestion_service.py +++ b/src/ctp_slack_bot/services/content_ingestion_service.py @@ -1,8 +1,11 @@ from loguru import logger -from pydantic import BaseModel, model_validator -from typing import Self +from pydantic import BaseModel +from typing import Self, Sequence from ctp_slack_bot.core import Settings +from ctp_slack_bot.enums import EventType +from ctp_slack_bot.models import Chunk, Content, SlackMessage +from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService from ctp_slack_bot.services.vector_database_service import VectorDatabaseService from ctp_slack_bot.services.vectorization_service import VectorizationService @@ -12,10 +15,35 @@ class ContentIngestionService(BaseModel): """ settings: Settings + event_brokerage_service: EventBrokerageService vector_database_service: VectorDatabaseService vectorization_service: VectorizationService - @model_validator(mode='after') - def post_init(self: Self) -> Self: + class Config: + frozen=True + + def __init__(self: Self, **data) -> None: + super().__init__(**data) + self.event_brokerage_service.subscribe(EventType.INCOMING_CONTENT, self.process_incoming_content) + self.event_brokerage_service.subscribe(EventType.INCOMING_SLACK_MESSAGE, self.process_incoming_slack_message) logger.debug("Created {}", self.__class__.__name__) - return self + + async def process_incoming_content(self: Self, content: Content) -> None: + logger.debug("Content ingestion service received content with metadata: {}", content.get_metadata()) + # if self.vector_database_service.has_content(content.get_id()) # TODO + # logger.debug("Ignored content with ID {} because it already exists in the database.", content.get_id()) + # return + chunks = content.get_chunks() + await self.__vectorize_and_store_chunks_in_database(chunks) + logger.debug("Stored {} vectorized chunk(s) in the database.", len(chunks)) + + async def process_incoming_slack_message(self: Self, slack_message: SlackMessage) -> None: + logger.debug("Content ingestion service received a Slack message: {}", slack_message.text) + chunks = slack_message.get_chunks() + await self.__vectorize_and_store_chunks_in_database(chunks) + logger.debug("Stored {} vectorized chunk(s) in the database.", len(chunks)) + + async def __vectorize_and_store_chunks_in_database(self: Self, chunks: Sequence[Chunk]) -> None: + vectorized_chunks = self.vectorization_service.vectorize(chunks) # TODO + await self.vector_database_service.store(vectorized_chunks) # TODO + diff --git a/src/ctp_slack_bot/services/context_retrieval_service.py b/src/ctp_slack_bot/services/context_retrieval_service.py index 854057829e7933c17ef4dd28bbed1fac17811520..73bf39829da02dd162c5c2a4d6945f6e335b93ec 100644 --- a/src/ctp_slack_bot/services/context_retrieval_service.py +++ b/src/ctp_slack_bot/services/context_retrieval_service.py @@ -1,9 +1,9 @@ from loguru import logger -from pydantic import BaseModel, model_validator -from typing import Self, List +from pydantic import BaseModel +from typing import Self, Sequence from ctp_slack_bot.core.config import Settings -from ctp_slack_bot.models import RetreivedContext, SlackMessage, VectorQuery +from ctp_slack_bot.models import Chunk, SlackMessage, VectorQuery, VectorizedChunk from ctp_slack_bot.services.vector_database_service import VectorDatabaseService from ctp_slack_bot.services.vectorization_service import VectorizationService @@ -16,57 +16,51 @@ class ContextRetrievalService(BaseModel): vectorization_service: VectorizationService vector_database_service: VectorDatabaseService - @model_validator(mode='after') - def post_init(self: Self) -> Self: + class Config: + frozen=True + + def __init__(self: Self, **data) -> None: + super().__init__(**data) logger.debug("Created {}", self.__class__.__name__) - return self - - def get_context(self, message: SlackMessage) -> List[RetreivedContext]: + + async def get_context(self: Self, message: SlackMessage) -> Sequence[Chunk]: """ - Retrieve relevant context for a given Slack message. - - This function: - 1. Extracts the question text from the message - 2. Vectorizes the question using VectorizationService - 3. Queries VectorDatabaseService for similar context - 4. Returns the relevant context as a list of RetreivedContext objects + Retrieve relevant context for a given SlackMessage by vectorizing the message and + querying the vectorstore. Args: message: The SlackMessage containing the user's question Returns: - List[RetreivedContext]: List of retrieved context items with similarity scores + Sequence[Chunk]: List of retrieved context items with similarity scores """ - if not message.is_question: - logger.debug(f"Message {message.key} is not a question, skipping context retrieval") + # Extract chunks from the message + message_chunks = message.get_chunks() + + # Vectorize the chunks + vectorized_chunks = self.vectorization_service.vectorize(message_chunks) + + # Create vector query using the first chunk's embedding (typically there's only one chunk for a message) + if not vectorized_chunks: + logger.warning("No vectorized chunks were created for message") return [] + + query = VectorQuery( + query_embeddings=vectorized_chunks[0].embedding, + k=self.settings.TOP_K_MATCHES, + score_threshold=self.settings.SCORE_THRESHOLD, + filter_metadata=None # Can be expanded to include filters based on message metadata + ) + # Perform similarity search try: - # Vectorize the message text - embeddings = self.vectorization_service.get_embeddings([message.text]) - if embeddings is None or len(embeddings) == 0: - logger.error(f"Failed to generate embedding for message: {message.key}") - return [] - - query_embedding = embeddings[0].tolist() - - # Create vector query - vector_query = VectorQuery( - query_text=message.text, - k=self.settings.TOP_K_MATCHES, - score_threshold=0.7 # Minimum similarity threshold - ) - - # Search for similar content in vector database - context_results = self.vector_database_service.search_by_similarity( - query=vector_query, - query_embedding=query_embedding - ) - - logger.info(f"Retrieved {len(context_results)} context items for message: {message.key}") - return context_results - + results = await self.vector_database_service.search_by_similarity(query) + # logger.info(f"Retrieved {len(results)} context chunks for query") + return results except Exception as e: - logger.error(f"Error retrieving context for message {message.key}: {str(e)}") + logger.error(f"Error retrieving context: {str(e)}") return [] - \ No newline at end of file + + # test return statement + # return (VectorizedChunk(text="Mock context chunk", parent_id="lol", chunk_id="no", metadata={}, embedding=tuple()), + # VectorizedChunk(text="Moar mock context chunk", parent_id="lol", chunk_id="wut", metadata={}, embedding=tuple())) diff --git a/src/ctp_slack_bot/services/embeddings_model_service.py b/src/ctp_slack_bot/services/embeddings_model_service.py new file mode 100644 index 0000000000000000000000000000000000000000..16ca354f05596693dddb4527d6da3d58ffcb73a6 --- /dev/null +++ b/src/ctp_slack_bot/services/embeddings_model_service.py @@ -0,0 +1,47 @@ +from loguru import logger +from openai import OpenAI +from pydantic import BaseModel, PrivateAttr +from typing import Any, Dict, Sequence, Self + +from ctp_slack_bot.core import Settings + +class EmbeddingsModelService(BaseModel): + """ + Service for embeddings model operations. + """ + + settings: Settings + _open_ai_client: PrivateAttr = PrivateAttr() + + class Config: + frozen=True + + def __init__(self: Self, **data: Dict[str, Any]) -> None: + super().__init__(**data) + self._open_ai_client = OpenAI(api_key=self.settings.OPENAI_API_KEY.get_secret_value()) + logger.debug("Created {}", self.__class__.__name__) + + def get_embeddings(self: Self, texts: Sequence[str]) -> Sequence[Sequence[float]]: + """Get embeddings for a collection of texts using OpenAI’s API. + + Args: + texts (Collection[str]): Collection of text chunks to embed + + Returns: + NDArray: Array of embeddings with shape (n_texts, VECTOR_DIMENSION) + + Raises: + ValueError: If the embedding dimensions don't match expected size + """ + logger.debug("Creating embeddings for {} text string(s)…", len(texts)) + response = self._open_ai_client.embeddings.create( + model=self.settings.EMBEDDING_MODEL, + input=texts, + encoding_format="float" # Ensure we get raw float values. + ) + embeddings = tuple(tuple(data.embedding) for data in response.data) + match embeddings: + case (first, _) if len(first) != self.settings.VECTOR_DIMENSION: + logger.error("Embedding dimension mismatch and/or misconfiguration: expected configured dimension {}, but got {}.", self.settings.VECTOR_DIMENSION, len(first)) + raise ValueError() # TODO: raise a more specific type. + return embeddings diff --git a/src/ctp_slack_bot/services/event_brokerage_service.py b/src/ctp_slack_bot/services/event_brokerage_service.py index 8a8fccaab6183b66b152b325800f27bf4ca70220..adae3948df50f826a6b545549683c110caca0429 100644 --- a/src/ctp_slack_bot/services/event_brokerage_service.py +++ b/src/ctp_slack_bot/services/event_brokerage_service.py @@ -1,38 +1,47 @@ -# from asyncio import create_task +from asyncio import create_task, iscoroutinefunction, to_thread +from collections import defaultdict from loguru import logger -from openai import OpenAI -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, PrivateAttr from typing import Any, Callable, Dict, List, Self -from ctp_slack_bot.core import Settings -from ctp_slack_bot.models import RetreivedContext, SlackMessage -from ctp_slack_bot.services.content_ingestion_service import ContentIngestionService -from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService - +from ctp_slack_bot.enums import EventType class EventBrokerageService(BaseModel): """ Service for brokering events between services. """ - subscribers: Dict[str, List[Callable]] = {} + _subscribers: PrivateAttr = PrivateAttr(default_factory=lambda: defaultdict(list)) class Config: - arbitrary_types_allowed = True + frozen=True - @model_validator(mode='after') - def post_init(self: Self) -> Self: + def __init__(self: Self, **data) -> None: + super().__init__(**data) logger.debug("Created {}", self.__class__.__name__) - return self - def subscribe(self: Self, event_type: str, callback: Callable) -> None: + def subscribe(self: Self, type: EventType, callback: Callable) -> None: """Subscribe to an event type with a callback function.""" - if event_type not in self.subscribers: - self.subscribers[event_type] = [] - self.subscribers[event_type].append(callback) - - def publish(self: Self, event_type: str, data: Any = None) -> None: + logger.debug("1 new subscriber is listening for {} events.", type) + subscribers = self._subscribers[type] + subscribers.append(callback) + logger.debug("Event type {} has {} subscriber(s).", type, len(subscribers)) + + async def publish(self: Self, type: EventType, data: Any = None) -> None: """Publish an event with optional data to all subscribers.""" - if event_type in self.subscribers: - for callback in self.subscribers[event_type]: - callback(data) + subscribers = self._subscribers[type] + if not subscribers: + logger.debug("No subscribers handle event {}: {}", type, len(subscribers), data) + return + logger.debug("Broadcasting event {} to {} subscriber(s): {}", type, len(subscribers), data) + for callback in subscribers: + if iscoroutinefunction(callback): + task = create_task(callback(data)) + task.add_done_callback(lambda done_task: logger.error("Error in asynchronous event callback handling event {}: {}", type, done_task.exception()) + if done_task.exception() + else None) + else: + try: + create_task(to_thread(callback, data)) + except Exception as e: + logger.error("Error scheduling synchronous callback to handle event {}: {}", type, e) diff --git a/src/ctp_slack_bot/services/google_drive_access.py b/src/ctp_slack_bot/services/google_drive_access.py deleted file mode 100644 index 0a9dd6fa370897ef681937eea136361712add05c..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/services/google_drive_access.py +++ /dev/null @@ -1,623 +0,0 @@ -""" -Easy Google Drive Access - -A simplified module for accessing Google Drive files in nested folders. -Designed to make it as easy as possible to access files using path-like syntax. -""" - -import os -import pickle -import io -import datetime -from typing import List, Dict, Optional, Any, Union - -from google.oauth2.credentials import Credentials -from google_auth_oauthlib.flow import InstalledAppFlow -from google.auth.transport.requests import Request -from googleapiclient.discovery import build -from googleapiclient.http import MediaIoBaseDownload -from googleapiclient.errors import HttpError - - -class EasyGoogleDrive: - """ - Simplified Google Drive access focused on accessing files in nested folders. - """ - - # Define the scopes needed for the application - SCOPES = ['https://www.googleapis.com/auth/drive'] - - # Define common MIME types - MIME_TYPES = { - 'folder': 'application/vnd.google-apps.folder', - 'document': 'application/vnd.google-apps.document', - 'spreadsheet': 'application/vnd.google-apps.spreadsheet', - 'text': 'text/plain', - 'pdf': 'application/pdf', - 'image': 'image/jpeg', - 'video': 'video/mp4', - 'audio': 'audio/mpeg', - } - - # Define metadata fields to retrieve - FILE_FIELDS = 'id, name, mimeType, createdTime, modifiedTime, size, description, webViewLink, thumbnailLink, owners, shared, sharingUser, lastModifyingUser, capabilities, permissions' - FOLDER_FIELDS = 'id, name, createdTime, modifiedTime, description, webViewLink, owners, shared, sharingUser, lastModifyingUser, capabilities, permissions' - - def __init__(self, credentials_dir: str = 'credentials'): - """Initialize the Google Drive access.""" - self.credentials_dir = credentials_dir - self.credentials_path = os.path.join(credentials_dir, 'client_secret.json') - self.token_path = os.path.join(credentials_dir, 'token.pickle') - - # Ensure credentials directory exists - os.makedirs(credentials_dir, exist_ok=True) - - # Initialize the Drive API service - self.service = build('drive', 'v3', credentials=self._get_credentials()) - - # Cache for folder IDs to avoid repeated lookups - self.folder_id_cache = {} - - def _get_credentials(self) -> Credentials: - """Get and refresh Google Drive API credentials.""" - creds = None - - # Load existing token if it exists - if os.path.exists(self.token_path): - with open(self.token_path, 'rb') as token: - creds = pickle.load(token) - - # If credentials need refresh or don't exist - if not creds or not creds.valid: - if creds and creds.expired and creds.refresh_token: - creds.refresh(Request()) - else: - if not os.path.exists(self.credentials_path): - raise FileNotFoundError( - f"Client secrets file not found at {self.credentials_path}. " - "Please follow the setup instructions in the README." - ) - - flow = InstalledAppFlow.from_client_secrets_file( - self.credentials_path, self.SCOPES) - creds = flow.run_local_server(port=0) - - # Save the credentials for future use - with open(self.token_path, 'wb') as token: - pickle.dump(creds, token) - - return creds - - def _format_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Any]: - """ - Format metadata for easier reading and usage. - - Args: - metadata: Raw metadata from Google Drive API - - Returns: - Formatted metadata dictionary - """ - formatted = metadata.copy() - - # Format dates - for date_field in ['createdTime', 'modifiedTime']: - if date_field in formatted: - try: - # Convert ISO 8601 string to datetime object - dt = datetime.datetime.fromisoformat(formatted[date_field].replace('Z', '+00:00')) - formatted[date_field] = dt - # Add a formatted date string for easier reading - formatted[f"{date_field}Formatted"] = dt.strftime('%Y-%m-%d %H:%M:%S') - except (ValueError, AttributeError): - pass - - # Format size - if 'size' in formatted and formatted['size']: - try: - size_bytes = int(formatted['size']) - # Add human-readable size - if size_bytes < 1024: - formatted['sizeFormatted'] = f"{size_bytes} B" - elif size_bytes < 1024 * 1024: - formatted['sizeFormatted'] = f"{size_bytes / 1024:.1f} KB" - elif size_bytes < 1024 * 1024 * 1024: - formatted['sizeFormatted'] = f"{size_bytes / (1024 * 1024):.1f} MB" - else: - formatted['sizeFormatted'] = f"{size_bytes / (1024 * 1024 * 1024):.1f} GB" - except (ValueError, TypeError): - pass - - # Extract owner names - if 'owners' in formatted and formatted['owners']: - formatted['ownerNames'] = [owner.get('displayName', 'Unknown') for owner in formatted['owners']] - formatted['ownerEmails'] = [owner.get('emailAddress', 'Unknown') for owner in formatted['owners']] - - # Add file type description - if 'mimeType' in formatted: - mime_type = formatted['mimeType'] - for key, value in self.MIME_TYPES.items(): - if mime_type == value: - formatted['fileType'] = key - break - else: - # If not found in our predefined types - formatted['fileType'] = mime_type.split('/')[-1] - - return formatted - - def get_folder_id(self, folder_path: str) -> Optional[str]: - """ - Get a folder ID from a path like 'folder1/folder2/folder3'. - - Args: - folder_path: Path to the folder, using '/' as separator - - Returns: - The folder ID if found, None otherwise - """ - # Check if we've already resolved this path - if folder_path in self.folder_id_cache: - return self.folder_id_cache[folder_path] - - # If it looks like an ID already, return it - if len(folder_path) > 25 and '/' not in folder_path: - return folder_path - - # Split the path into components - parts = folder_path.split('/') - - # Start from the root - current_folder_id = None - current_path = "" - - # Traverse the path one folder at a time - for i, folder_name in enumerate(parts): - if not folder_name: # Skip empty parts - continue - - # Update the current path for caching - if current_path: - current_path += f"/{folder_name}" - else: - current_path = folder_name - - # Check if we've already resolved this subpath - if current_path in self.folder_id_cache: - current_folder_id = self.folder_id_cache[current_path] - continue - - # Search for the folder by name - query = f"mimeType='{self.MIME_TYPES['folder']}' and name='{folder_name}'" - if current_folder_id: - query += f" and '{current_folder_id}' in parents" - - try: - results = self.service.files().list( - q=query, - spaces='drive', - fields='files(id, name)', - pageSize=10 - ).execute() - - files = results.get('files', []) - if not files: - # Try a more flexible search if exact match fails - query = query.replace(f"name='{folder_name}'", f"name contains '{folder_name}'") - results = self.service.files().list( - q=query, - spaces='drive', - fields='files(id, name)', - pageSize=10 - ).execute() - - files = results.get('files', []) - if not files: - print(f"Could not find folder '{folder_name}' in path '{folder_path}'") - return None - - # Use the first match - current_folder_id = files[0]['id'] - - # Cache this result - self.folder_id_cache[current_path] = current_folder_id - - except HttpError as error: - print(f"Error finding folder: {error}") - return None - - return current_folder_id - - def get_folders_in_folder(self, folder_path: str, include_metadata: bool = True) -> List[Dict[str, Any]]: - """ - Get all subfolders in a folder specified by path. - - Args: - folder_path: Path to the folder, using '/' as separator - include_metadata: Whether to include detailed metadata (default: True) - - Returns: - List of folder metadata dictionaries - """ - # Get the folder ID - folder_id = self.get_folder_id(folder_path) - if not folder_id: - print(f"Could not find folder: '{folder_path}'") - return [] - - # List all folders in this folder - query = f"'{folder_id}' in parents and mimeType = '{self.MIME_TYPES['folder']}'" - - try: - results = self.service.files().list( - q=query, - spaces='drive', - fields=f'files({self.FOLDER_FIELDS})' if include_metadata else 'files(id, name)', - pageSize=1000 - ).execute() - - folders = results.get('files', []) - - # Format metadata if requested - if include_metadata and folders: - folders = [self._format_metadata(folder) for folder in folders] - - if folders: - print(f"Found {len(folders)} subfolders in '{folder_path}':") - for folder in folders: - if include_metadata and 'createdTimeFormatted' in folder: - print(f" - {folder['name']} (Created: {folder['createdTimeFormatted']})") - else: - print(f" - {folder['name']}") - else: - print(f"No subfolders found in '{folder_path}'") - - return folders - - except HttpError as error: - print(f"Error listing folders: {error}") - return [] - - def get_files_in_folder(self, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> List[Dict[str, Any]]: - """ - Get all files in a folder specified by path. - - Args: - folder_path: Path to the folder, using '/' as separator - include_metadata: Whether to include detailed metadata (default: True) - include_content: Whether to include file content (default: False) - - Returns: - List of file metadata dictionaries, optionally including file content - """ - # Get the folder ID - folder_id = self.get_folder_id(folder_path) - if not folder_id: - print(f"Could not find folder: '{folder_path}'") - return [] - - # List all non-folder files in this folder - query = f"'{folder_id}' in parents and mimeType != '{self.MIME_TYPES['folder']}'" - - try: - results = self.service.files().list( - q=query, - spaces='drive', - fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)', - pageSize=1000 - ).execute() - - files = results.get('files', []) - - # Format metadata if requested - if include_metadata and files: - files = [self._format_metadata(file) for file in files] - - # Add file content if requested - if include_content and files: - for file in files: - try: - # Skip files that are likely not text-based - if any(ext in file['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']): - print(f"Skipping content for non-text file: {file['name']}") - file['file_content'] = None - continue - - # Read the file content - content = self.read_file_from_object(file) - file['file_content'] = content - - if content is not None: - print(f"Successfully read content for: {file['name']} ({len(content)} characters)") - else: - print(f"Unable to read content for: {file['name']}") - except Exception as e: - print(f"Error reading content for {file['name']}: {e}") - file['file_content'] = None - - if files: - print(f"Found {len(files)} files in '{folder_path}':") - for file in files: - if include_metadata and 'createdTimeFormatted' in file: - print(f" - {file['name']} ({file.get('fileType', 'Unknown')}, Created: {file['createdTimeFormatted']})") - else: - print(f" - {file['name']} ({file.get('mimeType', 'Unknown')})") - else: - print(f"No files found in '{folder_path}'") - - return files - - except HttpError as error: - print(f"Error listing files: {error}") - return [] - - def get_file(self, file_name: str, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> Optional[Dict[str, Any]]: - """ - Get a specific file by name from a folder. - - Args: - file_name: Name of the file to get - folder_path: Path to the folder containing the file - include_metadata: Whether to include detailed metadata (default: True) - include_content: Whether to include file content (default: False) - - Returns: - File metadata dictionary, optionally including content, or None if file not found - """ - # Get the folder ID - folder_id = self.get_folder_id(folder_path) - if not folder_id: - print(f"Could not find folder: '{folder_path}'") - return None - - # Find the file by name in this folder - query = f"'{folder_id}' in parents and name = '{file_name}'" - - try: - results = self.service.files().list( - q=query, - spaces='drive', - fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)', - pageSize=1 - ).execute() - - files = results.get('files', []) - if not files: - # Try a more flexible search - query = query.replace(f"name = '{file_name}'", f"name contains '{file_name}'") - results = self.service.files().list( - q=query, - spaces='drive', - fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)', - pageSize=10 - ).execute() - - files = results.get('files', []) - if not files: - print(f"Could not find file '{file_name}' in '{folder_path}'") - return None - - # Use the first match - file = files[0] - - # Format metadata if requested - if include_metadata: - file = self._format_metadata(file) - - # Add file content if requested - if include_content: - try: - # Skip files that are likely not text-based - if any(ext in file['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']): - print(f"Skipping content for non-text file: {file['name']}") - file['file_content'] = None - else: - # Read the file content - content = self.read_file_from_object(file) - file['file_content'] = content - - if content is not None: - print(f"Successfully read content for: {file['name']} ({len(content)} characters)") - else: - print(f"Unable to read content for: {file['name']}") - except Exception as e: - print(f"Error reading content for {file['name']}: {e}") - file['file_content'] = None - - print(f"Found file: {file['name']}") - return file - - except HttpError as error: - print(f"Error getting file: {error}") - return None - - def get_all_files_in_folder(self, folder_path: str, include_metadata: bool = True, include_content: bool = False) -> List[Dict[str, Any]]: - """ - Get all items (files and folders) in a folder specified by path. - - Args: - folder_path: Path to the folder, using '/' as separator - include_metadata: Whether to include detailed metadata (default: True) - include_content: Whether to include file content (default: False) - - Returns: - List of file and folder metadata dictionaries, optionally including file content - """ - # Get the folder ID - folder_id = self.get_folder_id(folder_path) - if not folder_id: - print(f"Could not find folder: '{folder_path}'") - return [] - - # List all items in this folder - query = f"'{folder_id}' in parents" - - try: - results = self.service.files().list( - q=query, - spaces='drive', - fields=f'files({self.FILE_FIELDS})' if include_metadata else 'files(id, name, mimeType)', - pageSize=1000 - ).execute() - - items = results.get('files', []) - - # Format metadata if requested - if include_metadata and items: - items = [self._format_metadata(item) for item in items] - - # Add file content if requested - if include_content and items: - for item in items: - # Skip folders and non-text files - if item.get('mimeType') == self.MIME_TYPES['folder'] or any(ext in item['name'].lower() for ext in ['.jpg', '.png', '.gif', '.mp3', '.mp4']): - item['file_content'] = None - continue - - try: - # Read the file content - content = self.read_file_from_object(item) - item['file_content'] = content - - if content is not None: - print(f"Successfully read content for: {item['name']} ({len(content)} characters)") - else: - print(f"Unable to read content for: {item['name']}") - except Exception as e: - print(f"Error reading content for {item['name']}: {e}") - item['file_content'] = None - - if items: - print(f"Found {len(items)} items in '{folder_path}':") - for item in items: - if include_metadata and 'createdTimeFormatted' in item: - item_type = 'Folder' if item.get('mimeType') == self.MIME_TYPES['folder'] else item.get('fileType', 'Unknown') - print(f" - {item['name']} ({item_type}, Created: {item['createdTimeFormatted']})") - else: - item_type = 'Folder' if item.get('mimeType') == self.MIME_TYPES['folder'] else item.get('mimeType', 'Unknown') - print(f" - {item['name']} ({item_type})") - else: - print(f"No items found in '{folder_path}'") - - return items - - except HttpError as error: - print(f"Error listing items: {error}") - return [] - - def file_exists(self, file_name: str, folder_path: str) -> bool: - """ - Check if a file exists at the specified path in Google Drive. - - Args: - file_name: Name of the file to check - folder_path: Path to the folder containing the file - - Returns: - True if the file exists, False otherwise - """ - # Get the folder ID - folder_id = self.get_folder_id(folder_path) - if not folder_id: - print(f"Could not find folder: '{folder_path}'") - return False - - # Check if the file exists in this folder - query = f"'{folder_id}' in parents and name = '{file_name}'" - - try: - results = self.service.files().list( - q=query, - spaces='drive', - fields='files(id, name)', - pageSize=1 - ).execute() - - files = results.get('files', []) - if not files: - # Try a more flexible search - query = query.replace(f"name = '{file_name}'", f"name contains '{file_name}'") - results = self.service.files().list( - q=query, - spaces='drive', - fields='files(id, name)', - pageSize=10 - ).execute() - - files = results.get('files', []) - if not files: - print(f"File '{file_name}' does not exist in '{folder_path}'") - return False - - # File exists - print(f"File '{file_name}' exists in '{folder_path}'") - return True - - except HttpError as error: - print(f"Error checking if file exists: {error}") - return False - - def get_file_modified_time(self, file_name: str, folder_path: str) -> Optional[datetime.datetime]: - """ - Get the last modified time of a file. - - Args: - file_name: Name of the file - folder_path: Path to the folder containing the file - - Returns: - The last modified time as a datetime object, or None if the file doesn't exist - """ - # Get the file metadata - file = self.get_file(file_name, folder_path, include_metadata=True) - if not file: - return None - - # Return the modified time - return file.get('modifiedTime') - - def read_file_from_object(self, file_object: Dict[str, Any]) -> Optional[str]: - """ - Read the contents of a file using a file object. - - Args: - file_object: A Google file object with at least 'id' and 'mimeType' fields - - Returns: - The file contents as a string, or None if the file couldn't be read - """ - file_id = file_object.get('id') - mime_type = file_object.get('mimeType') - - if not file_id or not mime_type: - print("File object is missing 'id' or 'mimeType' fields.") - return None - - try: - # Read the file based on its type - if mime_type == self.MIME_TYPES['document']: - # Export Google Doc as plain text - response = self.service.files().export( - fileId=file_id, - mimeType='text/plain' - ).execute() - return response.decode('utf-8') - - else: - # Download regular files - request = self.service.files().get_media(fileId=file_id) - fh = io.BytesIO() - downloader = MediaIoBaseDownload(fh, request) - - done = False - while not done: - _, done = downloader.next_chunk() - - return fh.getvalue().decode('utf-8') - - except HttpError as error: - print(f"Error reading file: {error}") - return None - except Exception as e: - print(f"Error decoding file content: {e}") - return None \ No newline at end of file diff --git a/src/ctp_slack_bot/services/google_drive_basic_usage.py b/src/ctp_slack_bot/services/google_drive_basic_usage.py deleted file mode 100644 index 441386000af80e258f70c6930d6e71355780c6a2..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/services/google_drive_basic_usage.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -Basic Usage Examples for EasyGoogleDrive - -This file demonstrates how to use the EasyGoogleDrive class to interact with Google Drive. -It provides examples of the main functionality without printing all results to keep the output clean. -""" - -from google_drive_access import EasyGoogleDrive -import datetime - -def main(): - """ - Main function demonstrating the basic usage of EasyGoogleDrive. - """ - # Initialize the Google Drive client - # This will prompt for authentication the first time it's run - drive = EasyGoogleDrive() - - # Example folder path - replace with your actual folder path - folder_path = "Spring-2025-BAI" - subfolder_path = "Spring-2025-BAI/transcripts" - - print("=== Basic Usage Examples for EasyGoogleDrive ===\n") - - # Example 1: List folders in a directory - print("Example 1: Listing folders in a directory") - print("----------------------------------------") - folders = drive.get_folders_in_folder(folder_path) - - # Print only the first 3 folders (if any exist) - if folders: - print(f"Found {len(folders)} folders. Showing first 3:") - for i, folder in enumerate(folders[:3]): - print(f" - {folder['name']} (Created: {folder.get('createdTimeFormatted', 'Unknown')})") - if len(folders) > 3: - print(f" ... and {len(folders) - 3} more folders") - else: - print("No folders found.") - print() - - # Example 2: List files in a directory - print("Example 2: Listing files in a directory") - print("--------------------------------------") - files = drive.get_files_in_folder(subfolder_path) - - # Print only the first 3 files (if any exist) - if files: - print(f"Found {len(files)} files. Showing first 3:") - for i, file in enumerate(files[:3]): - file_type = file.get('fileType', 'Unknown') - created_time = file.get('createdTimeFormatted', 'Unknown') - print(f" - {file['name']} ({file_type}, Created: {created_time})") - if len(files) > 3: - print(f" ... and {len(files) - 3} more files") - else: - print("No files found.") - print() - - # Example 3: Get a specific file - print("Example 3: Getting a specific file") - print("--------------------------------") - # Use the first file found in the previous example, or a default if none were found - file_name = files[-1]['name'] if files and len(files) > 0 else "example.txt" - - file = drive.get_file(file_name, subfolder_path, include_metadata=True) - if file: - print(f"File found: {file['name']}") - print(f" Type: {file.get('fileType', 'Unknown')}") - print(f" Created: {file.get('createdTimeFormatted', 'Unknown')}") - print(f" Modified: {file.get('modifiedTimeFormatted', 'Unknown')}") - print(f" Size: {file.get('sizeFormatted', 'Unknown')}") - else: - print(f"File '{file_name}' not found.") - print() - - # Example 4: Get all items in a folder (files and folders) - print("Example 4: Getting all items in a folder") - print("--------------------------------------") - all_items = drive.get_all_files_in_folder(folder_path) - - # Print only the first 3 items (if any exist) - if all_items: - print(f"Found {len(all_items)} items. Showing first 3:") - for i, item in enumerate(all_items[:3]): - item_type = "Folder" if item.get('mimeType') == drive.MIME_TYPES['folder'] else item.get('fileType', 'Unknown') - created_time = item.get('createdTimeFormatted', 'Unknown') - print(f" - {item['name']} ({item_type}, Created: {created_time})") - if len(all_items) > 3: - print(f" ... and {len(all_items) - 3} more items") - else: - print("No items found.") - print() - - # Example 5: Check if a file exists - print("Example 5: Checking if a file exists") - print("----------------------------------") - # Use the same file name from Example 3 - file_to_check = file_name - - exists = drive.file_exists(file_to_check, subfolder_path) - print(f"File '{file_to_check}' {'exists' if exists else 'does not exist'} in '{subfolder_path}'.") - print() - - # Example 6: Get file modified time - print("Example 6: Getting file modified time") - print("-----------------------------------") - # Use the same file name from Example 3 - file_to_check_time = file_name - - modified_time = drive.get_file_modified_time(file_to_check_time, subfolder_path) - if modified_time: - print(f"File '{file_to_check_time}' was last modified on: {modified_time}") - else: - print(f"Could not get modified time for '{file_to_check_time}'.") - print() - - # Example 7: Get file with content - print("Example 7: Getting file with content") - print("----------------------------------") - # Use the same file name from Example 3 - file_with_content = file_name - - file_with_content_obj = drive.get_file(file_with_content, subfolder_path, include_content=True) - if file_with_content_obj and 'file_content' in file_with_content_obj: - content = file_with_content_obj['file_content'] - if content: - print(f"File '{file_with_content}' content (first 100 chars):") - print(f" {content[:100]}...") - else: - print(f"File '{file_with_content}' has no content or content could not be read.") - else: - print(f"File '{file_with_content}' not found or content could not be retrieved.") - print() - - # Example 8: Get contents of all files in a folder - print("Example 8: Getting contents of all files in a folder") - print("------------------------------------------------") - # Get all files with content - all_files_with_content = drive.get_files_in_folder(subfolder_path, include_content=True) - - if all_files_with_content: - print(f"Found {len(all_files_with_content)} files. Showing content preview for first 3:") - for i, file in enumerate(all_files_with_content[:3]): - print(f" File: {file['name']}") - if 'file_content' in file and file['file_content']: - content = file['file_content'] - print(f" Content preview: {content[:50]}...") - else: - print(f" No content available or file is not text-based.") - - if len(all_files_with_content) > 3: - print(f" ... and {len(all_files_with_content) - 3} more files with content") - else: - print("No files found or no content could be retrieved.") - print() - - # Example 9: Get content from a specific file using read_file_from_object - print("Example 9: Getting content from a specific file using read_file_from_object") - print("------------------------------------------------------------------------") - # Get a file object first - file_obj = drive.get_file(file_name, subfolder_path) - - if file_obj: - # Read the content directly from the file object - content = drive.read_file_from_object(file_obj) - if content: - print(f"File '{file_obj['name']}' content (first 100 chars):") - print(f" {content[:100]}...") - else: - print(f"File '{file_obj['name']}' has no content or content could not be read.") - else: - print(f"File '{file_name}' not found.") - print() - - print("=== End of Examples ===") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/ctp_slack_bot/services/google_drive_service.py b/src/ctp_slack_bot/services/google_drive_service.py new file mode 100644 index 0000000000000000000000000000000000000000..58a7a4da9c73321368b192825228d043c26cc430 --- /dev/null +++ b/src/ctp_slack_bot/services/google_drive_service.py @@ -0,0 +1,142 @@ +from datetime import datetime +from cachetools import TTLCache +from google.oauth2 import service_account +from googleapiclient.discovery import build +from googleapiclient.http import MediaIoBaseDownload +from googleapiclient.errors import HttpError +from io import BytesIO +from loguru import logger +from pydantic import BaseModel, PrivateAttr +from typing import Collection, Dict, List, Optional, Self + +from ctp_slack_bot.core import Settings +from ctp_slack_bot.models import GoogleDriveMetadata + + +FOLDER_MIME_TYPE: str = "application/vnd.google-apps.folder" +PATH_SEPARATOR: str = "/" + + +class GoogleDriveService(BaseModel): + """Service for interacting with Google Drive.""" + + settings: Settings + _google_drive_client: PrivateAttr = PrivateAttr() + _folder_cache: PrivateAttr = PrivateAttr(default_factory=lambda: TTLCache(maxsize=256, ttl=60)) + + class Config: + frozen=True + + def __init__(self: Self, **data) -> None: + super().__init__(**data) + credentials = service_account.Credentials.from_service_account_info({ + "type": "service_account", + "project_id": self.settings.GOOGLE_PROJECT_ID, + "private_key_id": self.settings.GOOGLE_PRIVATE_KEY_ID.get_secret_value(), + "private_key": self.settings.GOOGLE_PRIVATE_KEY.get_secret_value(), + "client_email": self.settings.GOOGLE_CLIENT_EMAIL, + "client_id": self.settings.GOOGLE_CLIENT_ID, + "token_uri": self.settings.GOOGLE_TOKEN_URI, + }, scopes=["https://www.googleapis.com/auth/drive"]) + self._google_drive_client = build('drive', 'v3', credentials=credentials) + logger.debug("Created {}", self.__class__.__name__) + + def _resolve_folder_id(self: Self, folder_path: str) -> Optional[str]: + """Resolve a folder path to a Google Drive ID.""" + + if not folder_path: + return self.settings.GOOGLE_DRIVE_ROOT_ID + + if folder_path in self._folder_cache: + return self._folder_cache[folder_path] + + current_id = self.settings.GOOGLE_DRIVE_ROOT_ID + try: + for part in folder_path.split(PATH_SEPARATOR): + results = self._google_drive_client.files().list( + q=f"name='{part.replace("\\", "\\\\").replace("'", "\\'")}' and mimeType='{FOLDER_MIME_TYPE}' and '{current_id}' in parents", + fields="files(id,name)", + supportsAllDrives=True, + includeItemsFromAllDrives=True + ).execute() + match results: + case {"files": [ {"id": id} ]}: + current_id = id + case _: + logger.debug("Folder not found by path: {}", folder_path) + return None + except HttpError as e: + logger.error("Error resolving folder path: {}", folder_path) + return None + + self._folder_cache[folder_path] = current_id + return current_id + + def list_directory(self: Self, folder_path: str) -> Collection[GoogleDriveMetadata]: + """List contents of a directory with basic metadata.""" + + folder_id = self._resolve_folder_id(folder_path) + if not folder_id: + logger.debug("Folder not found by path: {}", folder_path) + return () + + try: + results = self._google_drive_client.files().list( + q=f"'{folder_id}' in parents", + fields="files(id,name,mimeType,modifiedTime)", + supportsAllDrives=True, + includeItemsFromAllDrives=True, + pageSize=1000 + ).execute() + return tuple(GoogleDriveMetadata.from_folder_path_and_dict(folder_path, result) + for result + in results.get('files', ())) + except HttpError as e: + logger.error("Error listing folder by path, {}: {}", folder_path, e) + return () + + def get_metadata(self: Self, item_path: str) -> Optional[GoogleDriveMetadata]: + """Get metadata for a specific file/folder by path.""" + + match item_path.rsplit(PATH_SEPARATOR, 1): + case [item_name]: + folder_path = "" + folder_id = self.settings.GOOGLE_DRIVE_ROOT_ID + case [folder_path, item_name]: + folder_id = self._resolve_folder_id(folder_path) + + if not folder_id: + logger.debug("Folder not found by path: {}", folder_path) + return None + + try: + results = self._google_drive_client.files().list( + q=f"name='{item_name}' and '{folder_id}' in parents", + fields="files(id,name,mimeType,modifiedTime)", + supportsAllDrives=True, + includeItemsFromAllDrives=True, + pageSize=1 + ).execute() + match results: + case {"files": [result]}: + return GoogleDriveMetadata.from_folder_path_and_dict(folder_path, result) + except HttpError as e: + logger.error("Error getting metadata for item by path, {}: {}", item_path, e) + + logger.debug("Item not found by path: {}", item_path) + return None + + def read_file_by_id(self: Self, file_id: str) -> Optional[bytes]: + """Read contents of a file by its unique identifier.""" + + try: + request = self._google_drive_client.files().get_media(fileId=file_id) + buffer = BytesIO() + downloader = MediaIoBaseDownload(buffer, request) + done = False + while not done: + _, done = downloader.next_chunk() + return buffer.getvalue() + except HttpError as e: + logger.error("Error reading file by ID, {}: {}", file_id, e) + return None diff --git a/src/ctp_slack_bot/services/language_model_service.py b/src/ctp_slack_bot/services/language_model_service.py new file mode 100644 index 0000000000000000000000000000000000000000..f3dbb00978181d9decb1b3f7dc1ec7f4c15a52ca --- /dev/null +++ b/src/ctp_slack_bot/services/language_model_service.py @@ -0,0 +1,55 @@ +from loguru import logger +from openai import OpenAI +from openai.types.chat import ChatCompletion +from pydantic import BaseModel, PrivateAttr +from typing import Collection, Self + +from ctp_slack_bot.core import Settings +from ctp_slack_bot.models import Chunk + +class LanguageModelService(BaseModel): + """ + Service for language model operations. + """ + + settings: Settings + _open_ai_client: PrivateAttr = PrivateAttr() + + class Config: + frozen=True + + def __init__(self: Self, **data) -> None: + super().__init__(**data) + self._open_ai_client = OpenAI(api_key=self.settings.OPENAI_API_KEY.get_secret_value()) + logger.debug("Created {}", self.__class__.__name__) + + def answer_question(self, question: str, context: Collection[Chunk]) -> str: + """Generate a response using OpenAI’s API with retrieved context. + + Args: + question (str): The user’s question + context (List[RetreivedContext]): The context retreived for answering the question + + Returns: + str: Generated answer + """ + logger.debug("Generating response for question ā€œ{}ā€ using {} context chunks…", question, len(context)) + messages = [ + {"role": "system", "content": self.settings.SYSTEM_PROMPT}, + {"role": "user", "content": + f"""Student Question: {question} + + Context from class materials and transcripts: + {'\n'.join(chunk.text for chunk in context)} + + Please answer the Student Question based on the Context from class materials and transcripts. If the context doesn’t contain relevant information, acknowledge that and suggest asking the professor."""} + ] + response: ChatCompletion = self._open_ai_client.chat.completions.create( + model=self.settings.CHAT_MODEL, + messages=messages, + max_tokens=self.settings.MAX_TOKENS, + temperature=self.settings.TEMPERATURE + ) + + return response.choices[0].message.content + # return f"Mock response to ā€œ{question}ā€" diff --git a/src/ctp_slack_bot/services/question_dispatch_service.py b/src/ctp_slack_bot/services/question_dispatch_service.py index be7464caba78f3b3bc90b29d1d41537c1919e2e8..1d53a742c280bf66c790abe447821a92f24c8002 100644 --- a/src/ctp_slack_bot/services/question_dispatch_service.py +++ b/src/ctp_slack_bot/services/question_dispatch_service.py @@ -1,11 +1,11 @@ # from asyncio import create_task from loguru import logger -from openai import OpenAI -from pydantic import BaseModel, model_validator -from typing import List, Optional, Self, Tuple +from pydantic import BaseModel +from typing import Self from ctp_slack_bot.core import Settings -from ctp_slack_bot.models import RetreivedContext, SlackMessage +from ctp_slack_bot.enums import EventType +from ctp_slack_bot.models import Chunk, SlackMessage from ctp_slack_bot.services.answer_retrieval_service import AnswerRetrievalService from ctp_slack_bot.services.context_retrieval_service import ContextRetrievalService from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService @@ -21,11 +21,16 @@ class QuestionDispatchService(BaseModel): context_retrieval_service: ContextRetrievalService answer_retrieval_service: AnswerRetrievalService - @model_validator(mode='after') - def post_init(self: Self) -> Self: + class Config: + frozen=True + + def __init__(self: Self, **data) -> None: + super().__init__(**data) + self.event_brokerage_service.subscribe(EventType.INCOMING_SLACK_MESSAGE, self.__process_incoming_slack_message) logger.debug("Created {}", self.__class__.__name__) - return self - def push(self: Self, message: SlackMessage) -> None: - context = self.context_retrieval_service.get_context(message) - self.answer_retrieval_service.generate_answer(message, context) + async def __process_incoming_slack_message(self: Self, message: SlackMessage) -> None: + if message.subtype != 'bot_message': + logger.debug("Question dispatch service received an answerable question: {}", message.text) + context = await self.context_retrieval_service.get_context(message) + await self.answer_retrieval_service.push(message, context) diff --git a/src/ctp_slack_bot/services/schedule_service.py b/src/ctp_slack_bot/services/schedule_service.py new file mode 100644 index 0000000000000000000000000000000000000000..244134b84fd331d25d9a0f54db613346f587751e --- /dev/null +++ b/src/ctp_slack_bot/services/schedule_service.py @@ -0,0 +1,68 @@ +from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.triggers.cron import CronTrigger +from asyncio import create_task, iscoroutinefunction, to_thread +from datetime import datetime +from dependency_injector.resources import Resource +from loguru import logger +from pydantic import BaseModel, PrivateAttr +from pytz import timezone +from typing import Optional, Self + +from ctp_slack_bot.core import Settings + +class ScheduleService(BaseModel): + """ + Service for running scheduled tasks. + """ + + settings: Settings + _scheduler: PrivateAttr + + class Config: + frozen=True + + def __init__(self: Self, **data) -> None: + super().__init__(**data) + zone = self.settings.SCHEDULER_TIMEZONE + self._configure_jobs() + self._scheduler = AsyncIOScheduler(timezone=timezone(zone)) + logger.debug("Created {}", self.__class__.__name__) + + def _configure_jobs(self: Self) -> None: + # Example jobs (uncomment and implement as needed) + # self._scheduler.add_job( + # send_error_report, + # CronTrigger(hour=7, minute=0), + # id="daily_error_report", + # name="Daily Error Report", + # replace_existing=True, + # ) + # self._scheduler.add_job( + # cleanup_old_transcripts, + # CronTrigger(day_of_week="sun", hour=1, minute=0), + # id="weekly_transcript_cleanup", + # name="Weekly Transcript Cleanup", + # replace_existing=True, + # ) + pass + + def start(self: Self) -> None: + self._scheduler.start() + + def stop(self: Self) -> None: + if self._scheduler.running: + self._scheduler.shutdown() + else: + logger.debug("The scheduler is not running. There is no scheduler to shut down.") + +class ScheduleServiceResource(Resource): + def init(self: Self, settings: Settings) -> ScheduleService: + logger.info("Starting scheduler…") + schedule_service = ScheduleService(settings=settings) + schedule_service.start() + return schedule_service + + def shutdown(self: Self, schedule_service: ScheduleService) -> None: + """Stop scheduler on shutdown.""" + schedule_service.stop() + logger.info("Stopped scheduler.") diff --git a/src/ctp_slack_bot/services/slack_service.py b/src/ctp_slack_bot/services/slack_service.py index b2f195fe4435229b300a749fb5f25c599a93ed25..6d99573f0ca784c6a879176fbcc9c271c0f019d4 100644 --- a/src/ctp_slack_bot/services/slack_service.py +++ b/src/ctp_slack_bot/services/slack_service.py @@ -1,11 +1,12 @@ -# from asyncio import create_task +from dependency_injector.resources import Resource from loguru import logger from openai import OpenAI -from pydantic import BaseModel, model_validator -from typing import List, Optional, Self, Tuple +from pydantic import BaseModel +from slack_bolt.async_app import AsyncApp +from typing import Any, Mapping, Self -from ctp_slack_bot.core import Settings -from ctp_slack_bot.models import RetreivedContext, SlackMessage +from ctp_slack_bot.enums import EventType +from ctp_slack_bot.models import SlackMessage, SlackResponse from ctp_slack_bot.services.event_brokerage_service import EventBrokerageService @@ -14,10 +15,55 @@ class SlackService(BaseModel): Service for interfacing with Slack. """ - settings: Settings event_brokerage_service: EventBrokerageService + slack_bolt_app: AsyncApp - @model_validator(mode='after') - def post_init(self: Self) -> Self: + class Config: + arbitrary_types_allowed = True + frozen=True + + def __init__(self: Self, **data) -> None: + super().__init__(**data) + self.event_brokerage_service.subscribe(EventType.OUTGOING_SLACK_RESPONSE, self.send_message) logger.debug("Created {}", self.__class__.__name__) - return self + + def adapt_event_payload(self: Self, event: Mapping[str, Any]) -> SlackMessage: + return SlackMessage( + type=event.get("type"), + subtype=event.get("subtype"), + channel=event.get("channel"), + channel_type=event.get("channel_type"), + user=event.get("user"), + bot_id=event.get("bot_id"), + thread_ts=event.get("thread_ts"), + text=event.get("text", ""), + ts=event.get("ts"), + event_ts=event.get("event_ts") + ) + + async def process_message(self: Self, event: Mapping[str, Any]) -> None: + slack_message = self.adapt_event_payload(event.get("event", {})) + logger.debug("Received message from Slack: {}", slack_message) + await self.event_brokerage_service.publish(EventType.INCOMING_SLACK_MESSAGE, slack_message) + + async def send_message(self: Self, message: SlackResponse) -> None: + await self.slack_bolt_app.client.chat_postMessage(channel=message.channel, text=message.text, thread_ts=message.thread_ts) + + async def handle_message_event(self: Self, body: Mapping[str, Any]) -> None: + logger.debug("Ignored regular message: {}", body.get("event", {}).get("text")) + # await self.process_message(body) + + async def handle_app_mention_event(self: Self, body: Mapping[str, Any]) -> None: + logger.debug("Received app mention for processing: {}", body.get("event", {}).get("text")) + await self.process_message(body) + + def register(self: Self) -> None: + self.slack_bolt_app.event("message")(self.handle_message_event) + self.slack_bolt_app.event("app_mention")(self.handle_app_mention_event) + logger.debug("Registered 2 handlers for Slack Bolt message and app mention events.") + +class SlackServiceResource(Resource): + def init(self: Self, event_brokerage_service: EventBrokerageService, slack_bolt_app: AsyncApp) -> SlackService: + slack_service = SlackService(event_brokerage_service=event_brokerage_service, slack_bolt_app=slack_bolt_app) + slack_service.register() + return slack_service diff --git a/src/ctp_slack_bot/services/vector_database_service.py b/src/ctp_slack_bot/services/vector_database_service.py index a0e60a4058369c9c7a6f7ea21d4f0b231f62db94..65559ae2102876becd2fed495c4ec27f1db320e5 100644 --- a/src/ctp_slack_bot/services/vector_database_service.py +++ b/src/ctp_slack_bot/services/vector_database_service.py @@ -1,111 +1,118 @@ from loguru import logger -from pydantic import BaseModel, model_validator -from typing import Any, Dict, List, Self +from pydantic import BaseModel +from typing import Any, Collection, Dict, List, Optional, Self, Sequence from ctp_slack_bot.core import Settings from ctp_slack_bot.db import MongoDB -from ctp_slack_bot.models import VectorQuery, RetreivedContext +from ctp_slack_bot.models import Chunk, VectorizedChunk, VectorQuery class VectorDatabaseService(BaseModel): # TODO: this should not rely specifically on MongoDB. """ Service for storing and retrieving vector embeddings from MongoDB. """ - settings: Settings mongo_db: MongoDB - @model_validator(mode='after') - def post_init(self: Self) -> Self: + class Config: + frozen=True + + def __init__(self: Self, **data) -> None: + super().__init__(**data) logger.debug("Created {}", self.__class__.__name__) - return self - def content_exists(self, text: str) -> bool: + async def store(self: Self, chunks: Collection[VectorizedChunk]) -> None: """ - Check if a text content already exists in the database. + Stores vectorized chunks and their embedding vectors in the database. Args: - text: The text content to check for existence + chunks: Collection of VectorizedChunk objects to store - Returns: - bool: True if the content exists, False otherwise + Returns: None """ - if not self.mongo_db.initialized: - self.mongo_db.initialize() + if not chunks: + logger.debug("No chunks to store") + return try: - # Check if the content already exists - result = self.mongo_db.vector_collection.find_one({"text": text}) + # Get the vector collection - this will create it if it doesn't exist + logger.debug("Getting vectors collection for storing {} chunks", len(chunks)) + vector_collection = await self.mongo_db.get_collection("vectors") + + # Ensure vector search index exists + logger.debug("Creating vector search index for vectors collection") + await self.mongo_db.create_indexes("vectors") + + # Create documents to store, ensuring compatibility with BSON + documents = [] + for chunk in chunks: + # Convert embedding to standard list format (important for BSON compatibility) + embedding = list(chunk.embedding) if not isinstance(chunk.embedding, list) else chunk.embedding + + # Build document with proper structure + document = { + "text": chunk.text, + "embedding": embedding, + "metadata": chunk.metadata, + "parent_id": chunk.parent_id, + "chunk_id": chunk.chunk_id + } + documents.append(document) + + # Insert into collection as a batch + logger.debug("Inserting {} documents into vectors collection", len(documents)) + result = await vector_collection.insert_many(documents) + logger.info("Stored {} vector chunks in database", len(result.inserted_ids)) - return result is not None except Exception as e: - logger.error(f"Error checking content existence: {str(e)}") + logger.error("Error storing vector embeddings: {}", str(e)) + # Include more diagnostic information + logger.debug("MongoDB connection info: URI defined: {}, DB name: {}", + bool(self.settings.MONGODB_URI), self.settings.MONGODB_NAME) raise - def store(self, text: str, embedding: List[float], metadata: Dict[str, Any]) -> str: + async def content_exists(self: Self, key: str)-> bool: # TODO: implement this. """ - Store text and its embedding vector in the database. + Check if content exists in the database. Args: - text: The text content to store - embedding: The vector embedding of the text - metadata: Additional metadata about the text (source, timestamp, etc.) - - Returns: - str: The ID of the stored document + key: The key to check for content existence """ - if not self.mongo_db.initialized: - self.mongo_db.initialize() - - try: - # Create document to store - document = { - "text": text, - "embedding": embedding, - "metadata": metadata - } - - # Insert into collection - result = self.mongo_db.vector_collection.insert_one(document) - logger.debug(f"Stored document with ID: {result.inserted_id}") - - return str(result.inserted_id) - except Exception as e: - logger.error(f"Error storing embedding: {str(e)}") - raise - - def search_by_similarity(self, query: VectorQuery, query_embedding: List[float]) -> List[RetreivedContext]: + pass + + async def search_by_similarity(self: Self, query: VectorQuery) -> Sequence[Chunk]: """ Query the vector database for similar documents. Args: query: VectorQuery object with search parameters - query_embedding: The vector embedding of the query text - + Returns: - List[RetreivedContext]: List of similar documents with similarity scores + Sequence[Chunk]: List of similar chunks """ - if not self.mongo_db.initialized: - self.mongo_db.initialize() - try: - # Build aggregation pipeline for vector search + # Get the vector collection + logger.debug("Getting vectors collection for similarity search") + vector_collection = await self.mongo_db.get_collection("vectors") + + # Build aggregation pipeline for vector search using official MongoDB format + logger.debug("Building vector search pipeline with query embedding dimension: {}", len(query.query_embeddings)) pipeline = [ { - "$search": { - "index": "vector_index", - "knnBeta": { - "vector": query_embedding, - "path": "embedding", - "k": query.k - } + "$vectorSearch": { + "index": "vectors_vector_index", + "path": "embedding", + "queryVector": query.query_embeddings, #list(query.query_embeddings), + "numCandidates": query.k * 10, + "limit": query.k } }, { "$project": { - "_id": 0, "text": 1, "metadata": 1, - "score": {"$meta": "searchScore"} + "parent_id": 1, + "chunk_id": 1, + "score": { "$meta": "vectorSearchScore" } } } ] @@ -114,33 +121,55 @@ class VectorDatabaseService(BaseModel): # TODO: this should not rely specificall if query.filter_metadata: metadata_filter = {f"metadata.{k}": v for k, v in query.filter_metadata.items()} pipeline.insert(1, {"$match": metadata_filter}) + logger.debug("Added metadata filters to search: {}", query.filter_metadata) - # Execute the pipeline - results = list(self.mongo_db.vector_collection.aggregate(pipeline, maxTimeMS=30000)) + # Add score threshold filter if needed + if query.score_threshold > 0: + pipeline.append({ + "$match": { + "score": { "$gte": query.score_threshold } + } + }) + logger.debug("Added score threshold filter: {}", query.score_threshold) - # Convert to RetreivedContext objects directly - context_results = [] - for result in results: - # Normalize score to [0,1] range - normalized_score = result.get("score", 0) + try: + # Execute the vector search pipeline + logger.debug("Executing vector search pipeline") + results = await vector_collection.aggregate(pipeline).to_list(length=query.k) + logger.debug("Vector search returned {} results", len(results)) + except Exception as e: + logger.warning("Vector search failed: {}. Falling back to basic text search.", str(e)) + # Fall back to basic filtering with limit + query_filter = {} + if query.filter_metadata: + query_filter.update({f"metadata.{k}": v for k, v in query.filter_metadata.items()}) - # Skip if below threshold - if normalized_score < query.score_threshold: - continue - - context_results.append( - RetreivedContext( - contextual_text=result["text"], - metadata_source=result["metadata"].get("source", "unknown"), - similarity_score=normalized_score, - said_by=result["metadata"].get("speaker", None), - in_reation_to_question=result["metadata"].get("related_question", None) - ) + logger.debug("Executing fallback basic search with filter: {}", query_filter) + results = await vector_collection.find(query_filter).limit(query.k).to_list(length=query.k) + logger.debug("Fallback search returned {} results", len(results)) + + # Convert results to Chunk objects + chunks = [] + for result in results: + chunk = Chunk( + text=result["text"], + parent_id=result["parent_id"], + chunk_id=result["chunk_id"], + metadata={ + **result["metadata"], + "similarity_score": result.get("score", 0) + } ) + chunks.append(chunk) - logger.debug(f"Found {len(context_results)} similar documents") - return context_results + logger.info("Found {} similar chunks with similarity search", len(chunks)) + return chunks except Exception as e: - logger.error(f"Error in similarity search: {str(e)}") - raise \ No newline at end of file + logger.error("Error in similarity search: {}", str(e)) + # Include additional diagnostic information + logger.debug("MongoDB connection info: URI defined: {}, DB name: {}", + bool(self.settings.MONGODB_URI), self.settings.MONGODB_NAME) + logger.debug("Query details: k={}, dimension={}", + query.k, len(query.query_embeddings) if query.query_embeddings else "None") + raise diff --git a/src/ctp_slack_bot/services/vectorization_service.py b/src/ctp_slack_bot/services/vectorization_service.py index 55a3f8cce08d0f4f3d9a7677030c40bf166ca271..e4ae2d55fbc1b10be8793171edf9e5e3dae4b26a 100644 --- a/src/ctp_slack_bot/services/vectorization_service.py +++ b/src/ctp_slack_bot/services/vectorization_service.py @@ -1,10 +1,10 @@ from loguru import logger -import numpy as np -from openai import OpenAI -from pydantic import BaseModel, model_validator -from typing import List, Optional, Self +from pydantic import BaseModel +from typing import Self, Sequence from ctp_slack_bot.core import Settings +from ctp_slack_bot.models import Chunk, VectorizedChunk +from ctp_slack_bot.services.embeddings_model_service import EmbeddingsModelService class VectorizationService(BaseModel): """ @@ -12,57 +12,23 @@ class VectorizationService(BaseModel): """ settings: Settings - client: OpenAI # TODO: this should separate the OpenAI backend out into its own service, one that is agnostic. + embeddings_model_service: EmbeddingsModelService class Config: - arbitrary_types_allowed = True + frozen=True - @model_validator(mode='after') - def post_init(self: Self) -> Self: + def __init__(self: Self, **data) -> None: + super().__init__(**data) logger.debug("Created {}", self.__class__.__name__) - return self - - def get_embeddings(self, texts: List[str]) -> np.ndarray: - """Get embeddings for a list of texts using OpenAI's API. - - Args: - texts (List[str]): List of text chunks to embed - - Returns: - np.ndarray: Array of embeddings with shape (n_texts, VECTOR_DIMENSION) - - Raises: - ValueError: If the embedding dimensions don't match expected size - """ - try: - # Use the initialized client instead of the global openai module - response = self.client.embeddings.create( - model=self.settings.EMBEDDING_MODEL, - input=texts, - encoding_format="float" # Ensure we get raw float values - ) - - # Extract embeddings and verify dimensions - embeddings = np.array([data.embedding for data in response.data]) - - if embeddings.shape[1] != self.settings.VECTOR_DIMENSION: - raise ValueError( - f"Embedding dimension mismatch. Expected {self.settings.VECTOR_DIMENSION}, " - f"but got {embeddings.shape[1]}. Please update VECTOR_DIMENSION " - f"in config.py to match the model's output." - ) - - return embeddings - - except Exception as e: - print(f"Error getting embeddings: {str(e)}") - pass - def _test(self, list_of_strings: List[str] = ['Hello my sweet Svetlana.', 'You mean the world to me.']): - """ - Test the vectorization service. - """ - print('embedding list', list_of_strings) - embeddings = self.get_embeddings(list_of_strings) - print(embeddings) - return embeddings + def vectorize(self: Self, chunks: Sequence[Chunk]) -> Sequence[VectorizedChunk]: + embeddings = self.embeddings_model_service.get_embeddings([chunk.text for chunk in chunks]) + return tuple(VectorizedChunk( + text=chunk.text, + parent_id=chunk.parent_id, + chunk_id=chunk.chunk_id, + metadata=chunk.metadata, + embedding=embedding + ) + for chunk, embedding + in zip(chunks, embeddings)) diff --git a/src/ctp_slack_bot/tasks/__init__.py b/src/ctp_slack_bot/tasks/__init__.py index 8c95dda83b501b804f9958da3be284cecf783d86..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/src/ctp_slack_bot/tasks/__init__.py +++ b/src/ctp_slack_bot/tasks/__init__.py @@ -1 +0,0 @@ -from ctp_slack_bot.tasks.scheduler import start_scheduler, stop_scheduler diff --git a/src/ctp_slack_bot/tasks/scheduler.py b/src/ctp_slack_bot/tasks/scheduler.py deleted file mode 100644 index c06f1a63f30ad60ef5737cf3cdd1532480b31cb4..0000000000000000000000000000000000000000 --- a/src/ctp_slack_bot/tasks/scheduler.py +++ /dev/null @@ -1,60 +0,0 @@ -from apscheduler.schedulers.asyncio import AsyncIOScheduler -from apscheduler.triggers.cron import CronTrigger -from datetime import datetime -from dependency_injector.wiring import inject, Provide -from loguru import logger -from pytz import timezone -from typing import Optional - -from ctp_slack_bot import Container - -@inject -def start_scheduler(container: Container) -> AsyncIOScheduler: - """ - Start and configure the APScheduler instance. - - Returns: - AsyncIOScheduler: Configured scheduler instance - """ - settings = container.settings() if container else Provide[Container.settings] - zone = settings.SCHEDULER_TIMEZONE - scheduler = AsyncIOScheduler(timezone=timezone(zone)) - - # Add jobs to the scheduler. - # scheduler.add_job( - # send_error_report, - # CronTrigger(hour=7, minute=0), - # id="daily_error_report", - # name="Daily Error Report", - # replace_existing=True, - # ) - # scheduler.add_job( - # cleanup_old_transcripts, - # CronTrigger(day_of_week="sun", hour=1, minute=0), - # id="weekly_transcript_cleanup", - # name="Weekly Transcript Cleanup", - # replace_existing=True, - # ) - - # Start the scheduler. - scheduler.start() - logger.info("Scheduler started with timezone: {}", settings.SCHEDULER_TIMEZONE) - # logger.info("Next run for error report: {}", - # scheduler.get_job("daily_error_report").next_run_time) - # logger.info("Next run for transcript cleanup: {}", - # scheduler.get_job("weekly_transcript_cleanup").next_run_time) - - return scheduler - - -def stop_scheduler(scheduler: AsyncIOScheduler) -> None: - """ - Shutdown the scheduler gracefully. - - Args: - scheduler: The scheduler instance to shut down - """ - if scheduler.running: - logger.info("Shutting down scheduler") - scheduler.shutdown(wait=False) - logger.info("Scheduler shutdown complete") diff --git a/src/ctp_slack_bot/utils/__init__.py b/src/ctp_slack_bot/utils/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..15436f6b0dbbc1d534abd20a8e7264a3fed55749 100644 --- a/src/ctp_slack_bot/utils/__init__.py +++ b/src/ctp_slack_bot/utils/__init__.py @@ -0,0 +1 @@ +from ctp_slack_bot.utils.secret_stripper import sanitize_mongo_db_uri diff --git a/src/ctp_slack_bot/utils/secret_stripper.py b/src/ctp_slack_bot/utils/secret_stripper.py new file mode 100644 index 0000000000000000000000000000000000000000..d23b2f35c46ca25b56144c785433696cfcfdaab2 --- /dev/null +++ b/src/ctp_slack_bot/utils/secret_stripper.py @@ -0,0 +1,6 @@ +from urllib.parse import urlparse, urlunparse + +def sanitize_mongo_db_uri(uri: str) -> str: + parts = urlparse(uri) + sanitized_netloc = ":".join(filter(None, (parts.hostname, parts.port))) + return urlunparse((parts.scheme, sanitized_netloc, parts.path, parts.params, parts.query, parts.fragment))