Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -727,7 +727,7 @@ def find_similar_movies(query_description, top_n=3):
|
|
727 |
return movie_kb.find_similar_movies(query_description, top_n)
|
728 |
|
729 |
# Original functions for analysis
|
730 |
-
def predict_box_office(movie_description, similar_movies):
|
731 |
|
732 |
import numpy as np
|
733 |
|
@@ -742,9 +742,13 @@ def predict_box_office(movie_description, similar_movies):
|
|
742 |
for movie_info in similar_movies:
|
743 |
movie = movie_info["movie"]
|
744 |
sim_score = movie_info["similarity_score"]
|
|
|
|
|
|
|
|
|
745 |
inflation_factor = get_inflation_adjustment(movie["year"])
|
746 |
year_weight = get_year_weight(movie["year"])
|
747 |
-
adjusted_bo = movie["box_office"] * year_weight *
|
748 |
adjusted_box_offices.append(sim_score * adjusted_bo)
|
749 |
total_weight += sim_score * year_weight
|
750 |
|
@@ -765,7 +769,7 @@ def predict_box_office(movie_description, similar_movies):
|
|
765 |
genre_movies = [m for m in movie_knowledge_base if genre in m["genre"] and m["budget"] > 0]
|
766 |
if not genre_movies:
|
767 |
continue
|
768 |
-
success_rate = sum(1 for m in genre_movies if (m["box_office"] / m["budget"]) >=
|
769 |
rois = [m["box_office"] / m["budget"] for m in genre_movies]
|
770 |
roi_median = np.median(rois)
|
771 |
revenues = [m["box_office"] for m in genre_movies]
|
@@ -789,7 +793,6 @@ def predict_box_office(movie_description, similar_movies):
|
|
789 |
|
790 |
return simulations
|
791 |
|
792 |
-
|
793 |
def predict_awards(movie_description, similar_movies):
|
794 |
"""Predict potential awards based on similar movies."""
|
795 |
# Count awards in similar movies and recommend the most common ones
|
@@ -845,8 +848,17 @@ def get_similar_movies(movie_description: str):
|
|
845 |
@log_function_tool(logger)
|
846 |
def get_box_office_prediction(movie_description: str):
|
847 |
"""Predict the box office revenue for a movie based on its description."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
848 |
similar_movies = find_similar_movies(movie_description, top_n=3)
|
849 |
-
simulations = predict_box_office(movie_description, similar_movies)
|
850 |
baseline_prediction = np.median(simulations)
|
851 |
lower_bound = np.percentile(simulations, 25)
|
852 |
upper_bound = np.percentile(simulations, 75)
|
@@ -857,8 +869,8 @@ def get_box_office_prediction(movie_description: str):
|
|
857 |
else:
|
858 |
avg_budget = 1
|
859 |
|
860 |
-
threshold_exceed = 3 *
|
861 |
-
threshold_below = 2 *
|
862 |
prob_exceed = float(np.mean(simulations > threshold_exceed))
|
863 |
prob_below = float(np.mean(simulations < threshold_below))
|
864 |
|
@@ -882,7 +894,6 @@ def get_box_office_prediction(movie_description: str):
|
|
882 |
},
|
883 |
"similar_movies": similar_movie_info
|
884 |
}
|
885 |
-
|
886 |
@function_tool
|
887 |
@log_function_tool(logger)
|
888 |
def get_award_predictions(movie_description: str):
|
|
|
727 |
return movie_kb.find_similar_movies(query_description, top_n)
|
728 |
|
729 |
# Original functions for analysis
|
730 |
+
def predict_box_office(movie_description, similar_movies, target_budget):
|
731 |
|
732 |
import numpy as np
|
733 |
|
|
|
742 |
for movie_info in similar_movies:
|
743 |
movie = movie_info["movie"]
|
744 |
sim_score = movie_info["similarity_score"]
|
745 |
+
if movie["budget"] > 0:
|
746 |
+
budget_ratio = target_budget / movie["budget"]
|
747 |
+
else:
|
748 |
+
budget_ratio = 1
|
749 |
inflation_factor = get_inflation_adjustment(movie["year"])
|
750 |
year_weight = get_year_weight(movie["year"])
|
751 |
+
adjusted_bo = movie["box_office"] * year_weight * budget_ratio
|
752 |
adjusted_box_offices.append(sim_score * adjusted_bo)
|
753 |
total_weight += sim_score * year_weight
|
754 |
|
|
|
769 |
genre_movies = [m for m in movie_knowledge_base if genre in m["genre"] and m["budget"] > 0]
|
770 |
if not genre_movies:
|
771 |
continue
|
772 |
+
success_rate = sum(1 for m in genre_movies if (m["box_office"] / m["budget"]) >= 3) / len(genre_movies)
|
773 |
rois = [m["box_office"] / m["budget"] for m in genre_movies]
|
774 |
roi_median = np.median(rois)
|
775 |
revenues = [m["box_office"] for m in genre_movies]
|
|
|
793 |
|
794 |
return simulations
|
795 |
|
|
|
796 |
def predict_awards(movie_description, similar_movies):
|
797 |
"""Predict potential awards based on similar movies."""
|
798 |
# Count awards in similar movies and recommend the most common ones
|
|
|
848 |
@log_function_tool(logger)
|
849 |
def get_box_office_prediction(movie_description: str):
|
850 |
"""Predict the box office revenue for a movie based on its description."""
|
851 |
+
target_budget = None
|
852 |
+
import re
|
853 |
+
match = re.search(r"Budget:\s*:\s*(\d+)", movie_description, re.IGNORECASE)
|
854 |
+
if match:
|
855 |
+
target_budget = float(match.group(1))
|
856 |
+
else:
|
857 |
+
target_budget = 10000000
|
858 |
+
# print(f"the movie budget is {target_budget}")
|
859 |
+
|
860 |
similar_movies = find_similar_movies(movie_description, top_n=3)
|
861 |
+
simulations = predict_box_office(movie_description, similar_movies, target_budget)
|
862 |
baseline_prediction = np.median(simulations)
|
863 |
lower_bound = np.percentile(simulations, 25)
|
864 |
upper_bound = np.percentile(simulations, 75)
|
|
|
869 |
else:
|
870 |
avg_budget = 1
|
871 |
|
872 |
+
threshold_exceed = 3 * target_budget
|
873 |
+
threshold_below = 2 * target_budget
|
874 |
prob_exceed = float(np.mean(simulations > threshold_exceed))
|
875 |
prob_below = float(np.mean(simulations < threshold_below))
|
876 |
|
|
|
894 |
},
|
895 |
"similar_movies": similar_movie_info
|
896 |
}
|
|
|
897 |
@function_tool
|
898 |
@log_function_tool(logger)
|
899 |
def get_award_predictions(movie_description: str):
|