diff --git a/blindbox_analysis.py b/blindbox_analysis.py index 0425f40..b95cce8 100644 --- a/blindbox_analysis.py +++ b/blindbox_analysis.py @@ -50,6 +50,31 @@ blindbox_config: dict[str, BlindboxInfo] = { } } }, + "幸运盲盒": { + "price": 50, + "items": { + "幸运泡泡": { + "price": 15, + "probability": 49.6 + }, + "星光铃铛": { + "price": 52, + "probability": 42.2 + }, + "梦雾纸签": { + "price": 100, + "probability": 5 + }, + "福灵小兽": { + "price": 200, + "probability": 2.4 + }, + "星愿花园": { + "price": 600, + "probability": 0.8 + } + } + }, "心动盲盒": { "price": 150, # 请根据实际盲盒价格填写 "items": { @@ -250,13 +275,33 @@ def profit_statistic(df: pd.DataFrame) -> dict[str, dict[str, Any]]: def run_statistics(df: pd.DataFrame) -> dict[str, dict[str, Any]]: """运行统计分析""" stats = {} + # 统计整体数据 total_stats = profit_statistic(df) stats["整体"] = total_stats + + # 统计去掉每个用户每天第一次开盲盒的数据 + def remove_first_record_per_group(group): + """去掉每组的第一条记录""" + return group.iloc[1:] # 从第二条开始返回 + + # 确保时间列是datetime格式 + if 'time' in df.columns: + df['time'] = pd.to_datetime(df['time']) + # 提取日期部分(去掉时间) + df['date'] = df['time'].dt.date + # 按日期、用户ID、盲盒名称分组 + grouped = df.groupby(['date', 'username', 'blindbox_name']) + # 去掉每组的第一条数据 + df_filtered = grouped.apply(remove_first_record_per_group).reset_index(drop=True) + filtered_stats = profit_statistic(df_filtered) + stats["去除保底"] = filtered_stats + # 统计周五以外的数据 non_friday_df = df[pd.to_datetime(df["time"]).dt.weekday != 4] non_friday_stats = profit_statistic(non_friday_df) stats["非周五"] = non_friday_stats + # 统计周五的数据 friday_df = df[pd.to_datetime(df["time"]).dt.weekday == 4] friday_stats = profit_statistic(friday_df) @@ -336,4 +381,6 @@ if __name__ == "__main__": # 运行统计分析 stats = run_statistics(result_df) print("\n统计结果:") - print_tree(stats) \ No newline at end of file + print_tree(stats) + else: + print("未找到任何盲盒数据") \ No newline at end of file diff --git a/blindbox_simulation.py b/blindbox_simulation.py new file mode 100644 index 0000000..76123f6 --- /dev/null +++ b/blindbox_simulation.py @@ -0,0 +1,54 @@ +import numpy as np +import pandas as pd +from blindbox_analysis import blindbox_config, print_tree, profit_statistic + +def generate_random_data(blindbox_name: str, num_records: int): + """ + 生成随机盲盒数据 + """ + np.random.seed(42) # 固定随机种子,确保结果可复现 + + dataset = [] + for i in range(num_records): + # 随机生成用户ID和用户名 + uid = np.random.randint(1000, 2000) + username = f'user_{uid}' + + blindbox_info = blindbox_config[blindbox_name] + + # 抽选盲盒物品 + rand_num = np.random.rand() + sum_prob = 0 + selected_item = None + for item, item_info in blindbox_info['items'].items(): + prob = item_info['probability'] / 100 + if rand_num > sum_prob and rand_num <= sum_prob + prob: + selected_item = (item, item_info) + break + sum_prob += prob + + if selected_item is None: + raise ValueError("未能选中任何物品") + + # 构建随机数据记录 + dataset.append({ + 'uid': uid, + 'username': username, + 'blindbox_name': blindbox_name, + 'blindbox_price': blindbox_info['price'], + 'item_name': selected_item[0], + 'item_price': selected_item[1]['price'], + 'profit': selected_item[1]['price'] - blindbox_info['price'], + 'time': pd.Timestamp.now() + }) + + return pd.DataFrame(dataset) + +if __name__ == "__main__": + # 生成随机数据 + stats = {} + for box_name in blindbox_config.keys(): + df = generate_random_data(box_name, 10000) + stats[box_name] = profit_statistic(df) + + print_tree(stats) \ No newline at end of file