天猫复购数据分析

create database Tmall_data;
Query OK, 1 row affected (0.01 sec)

mysql> use Tmall_data;
Database changed
mysql> create table purchase_data(user_id int,merchant_id int,prob float);
Query OK, 0 rows affected (0.04 sec)

mysql> load data infile “D:\sample_submission.csv” into table purchase_data fields terminated by ‘,’ enclosed by ‘"’ lines terminated by ‘\n’ ;
Query OK, 261478 rows affected, 3 warnings (2.51 sec)
Records: 261478 Deleted: 0 Skipped: 0 Warnings: 3

mysql> select count() from purchase_data;
±---------+
| count(
) |
±---------+
| 261478 |
±---------+
1 row in set (0.27 sec)
#构建用户分层体系,为精准营销提供数据支撑
select user_id,count(merchant_id) as purchase_frequency,avg(prob) as avg_purchase_prob,ntile(5) over(order by count(merchant_id)) as frequency_segment,ntile(5) over(order by avg(prob)) as value_segment from purchase_data group by user_id;
#商家层面的复购特征
SELECT
merchant_id,
COUNT(DISTINCT user_id) as total_customers,
AVG(ProB) as avg_repurchase_prob,
SUM(CASE WHEN ProB > 0.7 THEN 1 ELSE 0 END)1.0/COUNT() as high_prob_ratio
FROM purchase_ data
GROUP BY merchant_id
ORDER BY avg_repurchase_prob DESC;
#用户跨商家购买模式
SELECT
a.merchant_id as merchant_A,
b.merchant_id as merchant_B,
COUNT(DISTINCT a.user_id) as co_occurrence_count
FROM purchase_data a
JOIN data_table b ON a.user_id = b.user_id
WHERE a.merchant_id != b.merchant_id
GROUP BY a.merchant_id, b.merchant_id
HAVING COUNT(DISTINCT a.user_id) > 100
ORDER BY co_occurrence_count DESC;