Spaces:

SoumyaJ
/

DynamicScheduleRecommendInAstra

Sleeping

App Files Files Community

SoumyaJ commited on May 19

Commit

585ebd2

verified ·

1 Parent(s): 53945ce

Upload 4 files

Browse files

Files changed (4) hide show

app.py +158 -0
config.yaml +8 -0
requirements.txt +10 -0
tools.py +130 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+from tools import RetrievalTool
+from dotenv import load_dotenv
+from langchain_core.prompts import PromptTemplate
+from langchain_groq import ChatGroq
+from pydantic import BaseModel, Field
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+import pandas as pd
+import uvicorn
+import re
+import os
+load_dotenv()
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+os.environ["ASTRA_DB_API_ENDPOINT"] = os.getenv("ASTRA_DB_API_ENDPOINT")
+os.environ["ASTRA_DB_APPLICATION_TOKEN"] = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
+os.environ["ASTRA_DB_NAMESPACE"] = os.getenv("ASTRA_DB_NAMESPACE")
+os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")
+os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
+os.environ["CALENDARIFIC_API_KEY"] = os.getenv("CALENDARIFIC_API_KEY")
+retrieval_tool = RetrievalTool()
+class ScheduleRecommendationModel(BaseModel):
+    programme_schedule: str = Field(description="The entire list of recommended programs..")
+    reasoning: str = Field(description="The reasoning behind the recommendation.")
+template = """
+You are a smart TV schedule assistant.
+Your job is to generate a clean, formatted program schedule for a specific day.
+Constraints:
+- Do NOT include any explanation, notes, or Markdown.
+- Do NOT repeat any program on the same day.
+- Format must be exactly: HH:MM - HH:MM : ProgramName
+- Use the provided channel start time as the beginning of the schedule.
+- Prime time is from 18:00 to 22:00 — prioritize the highest-rated programs here.
+- If the date is a holiday (e.g., Christmas), ensure 2 to 3 holiday-themed programs (based on keywords like "Christmas", "Santa", or "Carol" in the synopsis) are included in the schedule.
+- If it's a weekend, favor family-friendly or entertainment-heavy content.
+- If it's a weekday, prefer shorter or lighter content during the day and prioritize core genre in prime time.
+- Do not schedule past 23:59.
+Inputs:
+- Genre: {genre}
+- DayType: {day_type}  # Either "weekday" or "weekend"
+- Holiday: {holiday_event}  # Either "Christmas", "New year" or None
+- Start Time: {start_time}
+- Available Programs:
+{program_list}
+Now generate the full day schedule starting from {start_time} using the above constraints.
+"""
+summary_template = """
+You are a smart TV reasoning summary assistant.
+Your task is to clearly explain the thought process behind a given TV schedule recommendation.
+The summary should help the user understand why specific programs were selected, why they appear at certain times, and how the genre, ratings, time of day, day type (weekday/weekend), and special events (e.g., holidays like Christmas) influenced the schedule.
+✳️ Instructions:
+Do not add any information that is not already present in the reasoning.Do not hallucinate or make assumptions.
+The summary must reflect the actual reasoning provided by the model.
+Write in a natural, human-readable tone, suitable for a user reading a TV planner explanation.
+Keep it concise but detailed enough to convey scheduling logic (approx. 8-10 lines).
+Highlight how prime-time slots (18:00–22:00) were used for high-rated programs.
+If applicable, explain how holiday content or weekend scheduling influenced the selection.
+Use the reasoning provided to you and summarize it in a clear and concise manner.
+{reasoning}
+"""
+prompt = PromptTemplate.from_template(template)
+summary_prompt = PromptTemplate.from_template(summary_template)
+llm = ChatGroq(model_name = "deepseek-r1-distill-llama-70b", api_key = os.environ["GROQ_API_KEY"])
+summary_llm = ChatGroq(model_name = "gemma2-9b-it", api_key = os.environ["GROQ_API_KEY"])
+chain = prompt | llm
+summary_chain = summary_prompt | summary_llm
+def get_dynamic_schedule(program_df:str, genre:str, start_time:str, day_type:str, holiday_event:str):
+    try:
+        response = chain.invoke({"program_list": program_df,
+                                 "genre": genre,
+                                "day_type": day_type,
+                                "holiday_event": holiday_event,
+                                "start_time": start_time})
+        text_data = response.content
+        think_match = re.search(r'<think>(.*?)</think>', text_data, re.DOTALL)
+        if think_match:
+            reasoning = think_match.group(1).strip()
+            reasoning_answer = summarize_reasoning(reasoning)
+            final_answer = text_data.split("</think>")[-1].strip()
+            return ScheduleRecommendationModel(programme_schedule=final_answer, reasoning=reasoning_answer)
+        # if text_data and "</think>" in text_data:
+        #     result = re.split(r'</think>', text_data, maxsplit=1)[-1].strip()
+        #     return result
+        return ScheduleRecommendationModel(programme_schedule=response, reasoning="Error while generating reasoning.")
+    except Exception as e:
+        return f"Error: {str(e)}"
+def get_weekday_or_weekend(date:str):
+    try:
+        schedule_date = pd.to_datetime(date)
+        if schedule_date.weekday() < 5:  # Monday to Friday
+            return "weekday"
+        else:  # Saturday and Sunday
+            return "weekend"
+    except ValueError:
+        raise ValueError("Invalid date format. Please use YYYY-MM-DD.")
+def get_schedule_recommendation(genre:str, date:str, start_time:str):
+    program_list, holidayEvent = retrieval_tool.get_relevant_programmes(genre, date)
+    day_of_week = get_weekday_or_weekend(date)
+    schedule_recommendation = get_dynamic_schedule(program_list, genre, start_time, day_of_week, holidayEvent)
+    print("Schedule Recommendation:", schedule_recommendation)
+    return schedule_recommendation
+def summarize_reasoning(reasoning:str):
+    if reasoning:
+        response = summary_chain.invoke({"reasoning": reasoning})
+        return response.content
+    return "Error while generating reasoning."
+@app.post("/api/v1/getScheduleRecommendation/")
+async def extract_details(genre:str, date:str, start_time:str):
+    try:
+        return get_schedule_recommendation(genre, date, start_time)
+    except HTTPException as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+# if __name__ == "__main__":
+#     get_schedule_recommendation('comedy', '2023-12-25', '09:00')

config.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+astra_db:
+  genreSearchWithEvent:
+    k : 35
+  holidaySearch:
+    k : 5
+  genreSearchWithoutEvent:
+    k : 40

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+langchain
+langchain-community
+langchain-core
+langchain-groq
+langchain-astradb
+langchain-huggingface
+python-dotenv
+pandas
+fastapi
+uvicorn

tools.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from langchain_astradb import AstraDBVectorStore
+from langchain_huggingface import HuggingFaceEndpointEmbeddings
+from langchain.tools.retriever import create_retriever_tool
+from langchain_huggingface import HuggingFaceEmbeddings
+import os
+import pandas as pd
+import requests
+import yaml
+HOLIDAY_KEYWORDS ={
+    "christmas": ["christmas", "santa", "carol", "holiday"]}
+class RetrievalTool:
+    def __init__(self):
+        # self.embeddings = HuggingFaceEndpointEmbeddings(
+        # model= "sentence-transformers/all-MiniLM-L6-v2",
+        # task="feature-extraction",
+        # huggingfacehub_api_token= os.environ["HF_TOKEN"])
+        self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+        self.vector_store = AstraDBVectorStore(collection_name="program_astra",
+                                           embedding=self.embeddings,
+                                           api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"],
+                                           token= os.environ["ASTRA_DB_APPLICATION_TOKEN"],
+                                           namespace= os.environ["ASTRA_DB_NAMESPACE"])
+        self.calendarKey = os.environ["CALENDARIFIC_API_KEY"]
+        self.config = yaml.safe_load(open("config.yaml"))
+    def get_HolidayInCalendar(self,scheduleDate:str):
+        schedule_date = pd.to_datetime(scheduleDate, errors='coerce')
+        if schedule_date is pd.NaT:
+            raise ValueError("Invalid date format. Please use YYYY-MM-DD.")
+        schedule_date = schedule_date.date()
+        year = schedule_date.year
+        month = schedule_date.month
+        holidaylist = []
+        endpoint_url = f"https://calendarific.com/api/v2/holidays?api_key={self.calendarKey}&country='US'&year={year}&month={month}"
+        response = requests.get(endpoint_url)
+        eventName = ""
+        if response.status_code == 200:
+            holidays = response.json()['response']['holidays']
+            holidaylist = list(map(lambda x: x['description'],holidays))
+            if any("Christmas" in sentence for sentence in holidaylist):
+                eventName = "christmas"
+            elif any("New year" in sentence for sentence in holidaylist):
+                eventName = "new year"
+            return eventName
+        else:
+            return ""
+    #This is what will be coming from the front end to populate genre and schedule date for astra filtering
+    def buildDataToIncludeHolidayEvents(self, genre:str, scheduleDate:str):
+        if not genre or not scheduleDate:
+            raise ValueError("Genre and schedule date are required.")
+        genre_data = genre.strip().lower()
+        holidayEvent = self.get_HolidayInCalendar(scheduleDate)
+        return genre_data, holidayEvent
+    def get_retrievers(self, user_genres: list, holiday_event: str = None):
+        astraConfig = self.config["astra_db"]
+        astra_filter_genre = {"genre": {"$in": user_genres}}
+        if holiday_event:
+            keywords = HOLIDAY_KEYWORDS.get(holiday_event.lower(), [])
+            astra_filter_holiday = {
+            "$or": [
+                {"synopsis": {"$in": keywords}}
+            ]}
+            retriever_holiday = self.vector_store.as_retriever(search_kwargs={"filter": astra_filter_holiday, "k": astraConfig["holidaySearch"]["k"]})
+            retriever_genre = self.vector_store.as_retriever(search_kwargs={"filter": astra_filter_genre, "k": astraConfig["genreSearchWithEvent"]["k"]})
+            return retriever_genre,retriever_holiday
+        else:
+            retriever = self.vector_store.as_retriever(search_kwargs={"filter": astra_filter_genre, "k": astraConfig["genreSearchWithoutEvent"]["k"]})
+            return retriever, None
+    #This is the function to pull the relevant docs based on the genre and date
+    def get_relevant_programmes(self, genre: str, scheduleDate: str)-> pd.DataFrame:
+        """Retrieves relevant documents from the vector store based on the genre and date."""
+        if not self.vector_store or not self.embeddings:
+            raise ValueError("Vector store or embeddings not initialized.")
+        genre, holidayEvents = self.buildDataToIncludeHolidayEvents(genre, scheduleDate)
+        retriever_genre, retriever_holiday = self.get_retrievers([genre], holidayEvents)
+        documents= []
+        if retriever_holiday:
+            documents.extend(retriever_holiday.invoke(holidayEvents))
+        if retriever_genre:
+            documents.extend(retriever_genre.invoke(f"{genre} genre based programs"))
+        if not documents:
+            raise ValueError("No relevant documents found.")
+        program_df = pd.DataFrame([doc.metadata for doc in documents])
+        formatted_entries = []
+        for _, row in program_df.iterrows():
+            title = row['programme_title']
+            duration = row['duration']
+            rating = row['ratings']
+            synopsis = " ".join(row['synopsis']) if isinstance(row['synopsis'], list) else str(row['synopsis'])
+            genre = " ".join(row['genre']) if isinstance(row['genre'], list) else str(row['genre'])
+            formatted_entries.append(
+            f"programme_title: {title}, duration: {duration}, ratings: {rating}, synopsis: {synopsis}, genre: {genre}"
+            )
+        # Join all formatted strings with newline
+        docs = "\n".join(formatted_entries)
+        return docs, holidayEvents
+# if __name__ == "__main__":
+#     tool = RetrievalTool()
+#     tool2 = tool.get_relevant_documents("comedy", "2023-10-25")
+#     print(tool2)