jiguang

Running

App Files Files Community

jiguang / app.py

Ethscriptions

Update app.py

a1a37cc verified about 1 month ago

raw

history blame

2.51 kB

	import streamlit as st
	import pandas as pd
	import re

	st.set_page_config(layout="wide")

	st.title('影片放映时间表分析')

	# 1. 文件上传组件
	uploaded_file = st.file_uploader("上传“影片放映时间表.xlsx”文件", type=['xlsx'])
	ad_duration = st.number_input('输入每个广告的时长（分钟）', min_value=0, value=9)

	if uploaded_file is not None:
	try:
	# 读取Excel文件
	df = pd.read_excel(uploaded_file, header=3)

	# --- 错误修复 ---
	# 明确将“影片”列转换为字符串类型，以避免混合类型错误
	df['影片'] = df['影片'].astype(str)

	st.subheader('上传的原始数据')
	st.dataframe(df)


	# 2. 数据处理和清洗
	# 清洗“影厅”列
	def clean_hall_name(name):
	if isinstance(name, str):
	match = re.search(r'【(\d+)号', name)
	if match:
	return f"{match.group(1)}号厅"
	return name


	df['影厅'] = df['影厅'].apply(clean_hall_name)

	# 将“放映日期”转换为日期时间对象
	df['放映日期'] = pd.to_datetime(df['放映日期'])
	df['日期'] = df['放映日期'].dt.strftime('%m月%d日')

	# 删除在“影厅”或“片长”列中缺少数据的行
	df.dropna(subset=['影厅', '片长'], inplace=True)

	# 3. 统计每天每个影厅的影片数量和播放时长
	summary = df.groupby(['日期', '影厅']).agg(
	影片数量=('影片', 'count'),
	影片播放时长=('片长', 'sum')
	).reset_index()

	# 计算广告时长
	summary['广告时长'] = summary['影片数量'] * ad_duration

	# 4. 创建数据透视表以进行最终显示
	pivot_table = summary.pivot_table(
	index='日期',
	columns='影厅',
	values=['广告时长', '影片播放时长']
	)

	# 将所有空白（NaN）值填充为 0
	pivot_table = pivot_table.fillna(0)

	# 将数值转换为整数，使表格更整洁
	pivot_table = pivot_table.astype(int)

	# 交换列的层级顺序并排序，以获得所需的输出格式
	if not pivot_table.empty:
	pivot_table = pivot_table.swaplevel(0, 1, axis=1).sort_index(axis=1)

	st.subheader('影厅播放统计')
	st.dataframe(pivot_table)

	except Exception as e:
	st.error(f"处理文件时出错: {e}")