Spaces:
Sleeping
Sleeping
File size: 6,538 Bytes
e522499 cdf0803 a1d5478 cdf0803 e522499 cdf0803 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import streamlit as st
import pandas as pd
import numpy as np
# 设置页面布局为宽屏模式,并设置页面标题
st.set_page_config(layout="wide", page_title="影城效率分析 - 最终版")
def clean_movie_title(title):
"""
清理并规范化电影标题,移除版本、语言等标识以便合并统计。
"""
if not isinstance(title, str):
return title
suffixes_to_remove = [
'2D', '3D', '4D', '4K', 'IMAX', 'CINITY', '杜比', '巨幕',
'国语', '英语', '粤语', '日语', '原版', '修复版',
'(国)', '(英)', '(粤)'
]
parts = title.split()
cleaned_parts = [p for p in parts if p.upper() not in [s.upper() for s in suffixes_to_remove]]
if not cleaned_parts:
return title
return ' '.join(cleaned_parts).strip()
def style_efficiency(row):
"""
根据效率值高亮特定行。
如果座次效率或场次效率 < 0.5 或 > 1.5,则高亮为淡黄色。
"""
highlight = 'background-color: #FFFFE0;'
default = ''
seat_efficiency = row.get('座次效率', 0)
session_efficiency = row.get('场次效率', 0)
if (seat_efficiency < 0.5 or seat_efficiency > 1.5 or
session_efficiency < 0.5 or session_efficiency > 1.5):
return [highlight] * len(row)
return [default] * len(row)
def process_and_analyze_data(df):
"""
核心数据处理与分析函数。
"""
if df.empty:
return pd.DataFrame()
analysis_df = df.groupby('影片名称_清理后').agg(
座位数=('座位数', 'sum'),
场次=('影片名称_清理后', 'size'),
票房=('总收入', 'sum'),
人次=('总人次', 'sum')
).reset_index()
analysis_df.rename(columns={'影片名称_清理后': '影片'}, inplace=True)
analysis_df = analysis_df.sort_values(by='票房', ascending=False).reset_index(drop=True)
total_seats = analysis_df['座位数'].sum()
total_sessions = analysis_df['场次'].sum()
total_revenue = analysis_df['票房'].sum()
analysis_df['均价'] = np.divide(analysis_df['票房'], analysis_df['人次']).fillna(0)
analysis_df['座次比'] = np.divide(analysis_df['座位数'], total_seats).fillna(0)
analysis_df['场次比'] = np.divide(analysis_df['场次'], total_sessions).fillna(0)
analysis_df['票房比'] = np.divide(analysis_df['票房'], total_revenue).fillna(0)
analysis_df['座次效率'] = np.divide(analysis_df['票房比'], analysis_df['座次比']).fillna(0)
analysis_df['场次效率'] = np.divide(analysis_df['票房比'], analysis_df['场次比']).fillna(0)
# **优化1:移除“序号”列的定义**
final_columns = [
'影片', '座位数', '场次', '票房', '人次', '均价',
'座次比', '场次比', '票房比', '座次效率', '场次效率'
]
analysis_df = analysis_df[final_columns]
return analysis_df
# --- Streamlit 用户界面 ---
st.title('排片效率分析')
st.write("上传 `影片映出日累计报表.xlsx` 文件。")
uploaded_file = st.file_uploader("请在此处上传 Excel 文件", type=['xlsx', 'xls', 'csv'])
if uploaded_file is not None:
try:
df = pd.read_excel(uploaded_file, skiprows=3, header=None)
df.rename(columns={
0: '影片名称', 2: '放映时间', 5: '总人次', 6: '总收入', 7: '座位数'
}, inplace=True)
required_cols = ['影片名称', '放映时间', '座位数', '总收入', '总人次']
df = df[required_cols]
df.dropna(subset=['影片名称', '放映时间'], inplace=True)
for col in ['座位数', '总收入', '总人次']:
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
df['放映时间'] = pd.to_datetime(df['放映时间'], format='%H:%M:%S', errors='coerce').dt.time
df.dropna(subset=['放映时间'], inplace=True)
df['影片名称_清理后'] = df['影片名称'].apply(clean_movie_title)
st.toast("文件上传成功,数据已按规则处理!", icon="🎉")
format_config = {
'座位数': '{:,.0f}', '场次': '{:,.0f}', '人次': '{:,.0f}',
'票房': '{:,.2f}', '均价': '{:.2f}', '座次比': '{:.2%}', '场次比': '{:.2%}',
'票房比': '{:.2%}', '座次效率': '{:.2f}', '场次效率': '{:.2f}',
}
# --- 1. 全天数据分析 ---
st.header("全天排片效率分析")
full_day_analysis = process_and_analyze_data(df.copy())
if not full_day_analysis.empty:
table_height = (len(full_day_analysis) + 1) * 35 + 3
# **优化2:使用 .hide(axis="index") 隐藏默认序号列**
st.dataframe(
full_day_analysis.style.format(format_config).apply(style_efficiency, axis=1).hide(axis="index"),
height=table_height,
use_container_width=True
)
else:
st.warning("全天数据不足,无法生成分析报告。")
# --- 2. 黄金时段数据分析 ---
st.header("黄金时段 (14:00 - 21:00) 排片效率分析")
start_time = pd.to_datetime('14:00:00').time()
end_time = pd.to_datetime('21:00:00').time()
prime_time_df = df[df['放映时间'].between(start_time, end_time)]
prime_time_analysis = process_and_analyze_data(prime_time_df.copy())
if not prime_time_analysis.empty:
table_height_prime = (len(prime_time_analysis) + 1) * 35 + 3
# **优化2:同样隐藏黄金时段表格的默认序号**
st.dataframe(
prime_time_analysis.style.format(format_config).apply(style_efficiency, axis=1).hide(axis="index"),
height=table_height_prime,
use_container_width=True
)
else:
st.warning("黄金时段内没有有效场次数据,无法生成分析报告。")
# --- 3. 一键复制影片列表 ---
if not full_day_analysis.empty:
st.header("复制当日影片列表")
movie_titles = full_day_analysis['影片'].tolist()
formatted_titles = ''.join([f'《{title}》' for title in movie_titles])
st.code(formatted_titles, language='text')
except Exception as e:
st.error(f"处理文件时出错: {e}")
st.warning("请确保上传的文件是'影片映出日累计报表.xlsx',并且格式正确。")
else:
st.info("请上传文件以开始分析。") |