数据库实例应用
【电商数据处理案例】
目标需求:将某电商脱敏后数据导入数据库进行加工处理,使用加工好的数据分析业务问题数据获取
- 客户相关:UserInfo.csv:用户主表、RegionInfo.csv:区域表、UserAddress.csv:用户地址表
- 商品相关:GoodsInfo.csv:商品主表、GoodsBrand.csv:商品品牌表、GoodsColor.csv:商品颜色表、GoodsSize.csv:商品尺码
- 订单相关文件:OrderInfo.csv:订单主表、OrderDetail.csv:订单详情表
SQL数据处理:
数据清洗→数据筛选→数据透视→数据排序
表结构一览:
表之间的关联关系:
数据准备:
新建数据库
create database ds;
use ds;
建表并导入数据
-- UserInfo table
create table userinfo(
userid varchar(6) not null default '-',
username varchar(20) not null default '-',
userpassword varchar(100) not null default '-',
sex int not null default 0,
usermoney int not null default 0,
frozenmoney int not null default 0,
addressid varchar(20) not null default '-',
regtime varchar(20) not null default '-',
lastlogin varchar(20) not null default '-',
lasttime date not null
);
#导入数据
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/UserInfo.csv"
into table userinfo
fields terminated by ','
ignore 1 lines;
-- regioninfo
create table regioninfo(
regionid varchar(4) not null default '-',
parentid varchar(4) not null default '-',
regionname varchar(20) not null default '-',
regiontype int not null default 0,
agencyid int not null default 0,
pt varchar(11) not null default '-'
);
#导入数据
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/RegionInfo.csv"
into table regioninfo
fields terminated by ','
ignore 1 lines;
-- UserAddress
create table useraddress(
addressid varchar(5) not null default '-',
userid varchar(6) not null default '-',
consignee varchar(50) not null default '-',
country varchar(1) not null default '-',
province varchar(2) not null default '-',
city varchar(4) not null default '-',
district varchar(4) not null default '-',
address varchar(200) not null default '-',
pt varchar(11) not null default '-'
);
#导入数据
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/UserAddress.csv"
into table useraddress
fields terminated by ','
ignore 1 lines;
-- GoodsInfo
create table goodsinfo(
goodsid varchar(6) not null default '-',
typeid varchar(3) not null default '-',
markid varchar(4) not null default '-',
goodstag varchar(100) not null default '-',
brandtag varchar(100) not null default '-',
customtag varchar(100) not null default '-',
goodsname varchar(100) not null default '-',
clickcount int not null default 0,
clickcr int not null default 0,
goodsnumber int not null default 0,
goodsweight int not null default 0,
marketprice double not null default 0,
shopprice double not null default 0,
addtime varchar(20) not null default 0,
isonsale int not null default 0,
sales int not null default 0,
realsales int not null default 0,
extraprice double not null default 0,
goodsno varchar(10) not null default '-'
);
#导入数据
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/goodsinfo.csv"
into table goodsinfo
fields terminated by ','
ignore 1 lines;
-- GoodsBrand
create table goodsbrand(
SupplierID varchar(4) not null default '-',
BrandType varchar(100) not null default '-',
pt varchar(11) not null default '-'
);
#导入数据
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/GoodsBrand.csv"
into table goodsbrand
fields terminated by ','
ignore 1 lines;
-- GoodsColor
create table goodscolor(
ColorID varchar(4) not null default '-',
ColorNote varchar(20) not null default '-',
ColorSort int not null default 0,
pt varchar(11) not null default '-'
);
#导入数据
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/GoodsColor.csv"
into table goodscolor
fields terminated by ','
ignore 1 lines;
-- GoodsSize
create table goodssize(
SizeID varchar(4) not null default '-',
SizeNote varchar(100) not null default '-',
SizeSort int not null default 0,
pt varchar(11) not null default '-'
);
#导入数据
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/GoodsSize.csv"
into table goodssize
fields terminated by ','
ignore 1 lines;
-- OrderInfo
create table OrderInfo(
OrderID varchar(6) not null default '-',
UserID varchar(10) not null default '-',
OrderState int not null default 0,
PayState int not null default 0,
AllotStatus int not null default 0,
Consignee varchar(100) not null default '-',
Country int not null default 0,
Province int not null default 0,
City int not null default 0,
District int not null default 0,
Address varchar(100) not null default '-',
GoodsAmount double not null default 0,
OrderAmount double not null default 0,
ShippingFee int not null default 0,
RealShippingFee int not null default 0,
PayTool int not null default 0,
IsBalancePay int not null default 0,
BalancePay double not null default 0,
OtherPay double not null default 0,
PayTime varchar(20),
AddTime varchar(20) not null default '-'
);
#导入数据
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/orderinfo.csv"
into table OrderInfo
fields terminated by ','
ignore 1 lines;
-- OrderDetail
create table OrderDetail(
RecID varchar(7) not null default '-',
OrderID varchar(6) not null default '-',
UserID varchar(6) not null default '-',
SpecialID varchar(6) not null default '-',
GoodsID varchar(6) not null default '-',
GoodsPrice double not null default 0,
ColorID varchar(4) not null default '-',
SizeID varchar(4) not null default '-',
Amount int not null default 0
);
#导入数据
load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/OrderDetail.csv"
into table OrderDetail
fields terminated by ','
ignore 1 lines;
-- 查询导入表的行数
select count(*) from userinfo; -- 1000
select count(*) from RegionInfo; -- 3415
select count(*) from useraddress; -- 10000
select count(*) from goodsinfo; -- 10000
select count(*) from goodsbrand; -- 64
select count(*) from goodscolor; -- 2641
select count(*) from goodssize; -- 289
select count(*) from orderinfo; -- 3711
select count(*) from orderdetail; -- 10000
实践题
表数据调整
用户信息表
select * from userinfo;
-- 时间戳转换为标准的日期时间型格式
set sql_safe_updates=0; -- 设置数据库的安全权限
update userinfo set regtime = from_unixtime(regtime);
-- 执行报错 :Error Code: 1406. Data too long for column 'regtime'
-- 因为日期时间型不能直接放到文本格式内
alter table userinfo modify regtime datetime; -- 表中列有数据的情况下不能直接修改数据类型
-- 在表中添加一个新字段“regtime_new”,类型为日期时间型
alter table userinfo add regtime_new datetime;
-- 这个时候可以去更新新字段“regtime_new”的值,将时间戳转换为标准日期时间格式
update userinfo set regtime_new = from_unixtime(regtime);
-- 同样的方法,设置lastlogin(最后登录时间)
alter table userinfo add lastlogin_new datetime;
update userinfo set lastlogin_new = from_unixtime(lastlogin);
区域信息表
select * from RegionInfo;
-- 需要将“pt”字段由文本型转换为日期型
-- 首先需要提取文本中的日期信息(中间8个字符)
-- 然后将提取的信息转换为日期型格式(同样使用添加一列的方式)
alter table RegionInfo add pt_new date;
update RegionInfo set pt_new=mid(pt,2,8);
其他表的调整
select * from useraddress;
alter table useraddress add pt_new date;
update useraddress set pt_new=mid(pt,2,8);
select * from goodsinfo;
alter table goodsinfo add addtime_new datetime;
update goodsinfo set addtime_new = from_unixtime(addtime);
select * from goodsbrand;
alter table goodsbrand add pt_new date;
update goodsbrand set pt_new=mid(pt,2,8);
select * from goodscolor;
alter table goodscolor add pt_new date;
update goodscolor set pt_new=mid(pt,2,8);
select * from goodssize;
alter table goodssize add pt_new date;
update goodssize set pt_new=mid(pt,2,8);
select * from orderinfo;
alter table orderinfo add addtime_new datetime;
update orderinfo set addtime_new = from_unixtime(addtime);
-- paytime字段是支付时间,0的数据不能直接通过from_unixtime()转换(否则会变成1970-01-01 00:00:00),需要先转换成null
alter table orderinfo add paytime_new datetime;
update orderinfo set paytime_new = from_unixtime(paytime) where paytime <> '0';;
select * from orderdetail;
-- orderdetail表不需要调整列数据字段格式
【实际查询案例】
-- 1、不同时段的登陆用户数
-- 按照时间分组,统计每个小时有多少用户登录
-- 针对字段“lastlogin_new”提取小时
select hour(lastlogin_new) 时段,count(userid) 登录用户数
from userinfo
group by hour(lastlogin_new)
order by 时段;
-- 在实际情况中最后登录时间会是不同的日期,我们就可以用来统计最近7天或者30天登录的用户数,用来统计潜在流失用户
-- 2、不同时段的下单数量
-- 在订单信息表orderinfo中,orderid是唯一的
select hour(addtime_new) 时段,count(orderid) 下单数量
from orderinfo
group by hour(addtime_new)
order by 时段;
-- 不同时段的累计下单数量
select hour(addtime_new) 时段,count(orderid) 下单数量,
sum(count(orderid)) over( order by hour(addtime_new) )累计下单数量
from orderinfo
group by hour(addtime_new);
-- 上面的例子指定了分区内的排序, 默认就是统计滑动窗口第一行到当前行的订单数量
-- 3、当日GMV(未付款订单金额0+待发货订单金额1+已发货订单金额2+已取消订单金额3)
-- GMV(Gross Merchandise Volume)即商品交易总额,是一段时间内的成交总额
-- 按照订单状态进行orderstate分组
select orderstate,sum(orderamount) 订单金额
from orderinfo
group by orderstate
with rollup; -- with rollup 对分组之后的聚合值进行求和
-- 4、各省市消费金额(orderinfo-orderamount)
-- orderinfo表中的省份字段province 城市字段city
-- RegionInfo表中都存放在regionid字段
-- select * from RegionInfo;
select r1.regionname 省份,r2.regionname 城市,round(sum(orderamount),2) 消费金额
from orderinfo
join RegionInfo as r1 on province = r1.regionid
join RegionInfo as r2 on city = r2.regionid
group by province,city
order by province,city;
-- 5、不同支付方式的订单量
-- 支付工具 paytool 用户提交订单后会进入支付界面,选择支付方式,不管支付成功与否,都会产生支付方式
select paytool 支付方式,count(orderid) 订单量
from orderinfo
group by paytool
order by paytool;
-- 6、哪种支付方式可能导致用户支付不成功而取消订单
-- 查询因为支付不成功而取消的订单数量
-- 订单状态 orderstate = 3 支付失败 paystate = 0
select t1.paytool, 未支付取消的订单量,每个支付工具订单总量,
ifnull(未支付取消的订单量/每个支付工具订单总量,0) 占比
from
(select paytool,count(orderid) 每个支付工具订单总量
from orderinfo
group by paytool) t1
join
(select paytool,count(orderid) 未支付取消的订单量
from orderinfo
where orderstate = '3' and paystate = '0'
group by paytool) t2
on t1.paytool = t2.paytool;
-- 7、当日不同品牌的总销量
-- 销量在表orderdetail 品牌在表goodsbrand
-- 通过中间表商品表goodsinfo进行三表连接
-- 会存在有的商品品牌没有在品牌表中存在的情况
select goodsinfo.typeid 品牌ID,brandtype 品牌名称,sum(amount) 销量
from orderdetail
left join goodsinfo on orderdetail.goodsid = goodsinfo.goodsid
left join goodsbrand on goodsinfo.typeid = goodsbrand.supplierid
group by goodsinfo.typeid;
-- 8、当日不同品牌的复购用户数
select t.品牌ID,brandtype,count(t.userid) 复购用户数
from
(select goodsinfo.typeid 品牌ID,brandtype,userid,count(distinct orderid) 购买次数
from orderdetail
left join goodsinfo on orderdetail.goodsid = goodsinfo.goodsid
left join goodsbrand on goodsinfo.typeid = goodsbrand.supplierid
group by goodsinfo.typeid,userid
having count(distinct orderid) > 1) t
group by t.品牌ID;
-- 9、查询结果保存为表,用于后续重复使用
create table pro_amount as
select r1.regionname 省份,r2.regionname 城市,round(sum(orderamount),2) 消费金额
from orderinfo
join RegionInfo as r1 on province = r1.regionid
join RegionInfo as r2 on city = r2.regionid
group by province,city
order by province,city;
select * from pro_amount;
-- 10、将查询结果导出到指定的路径中
-- 主要需要使用以下路径 "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads"
select r1.regionname 省份,r2.regionname 城市,round(sum(orderamount),2) 消费金额
from orderinfo
join RegionInfo as r1 on province = r1.regionid
join RegionInfo as r2 on city = r2.regionid
group by province,city
order by province,city
into outfile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/pro_amount.csv";
相关文章
暂无评论...