2021_lg_01.sql_某招聘网站岗位数据分析案例(sql清洗,分析案例,纯代码)
库名:recruitment;1.新建库名#CREATE DATABASE IF NOT EXISTS recruitment DEFAULT CHARSET utf8 COLLATE utf8_general_ci;CREATE DATABASE recruitment CHARSET utf8;2.导入数据语句:mysql -uroot -p recruitment< [recruitm
·
库名:
recruitment;
1.新建库名
#CREATE DATABASE IF NOT EXISTS recruitment DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
CREATE DATABASE recruitment CHARSET utf8;
2.导入数据语句:
mysql -uroot -p recruitment< [recruitment.sql(文件路径)]
/*
1.使用数据库
*/
use recruitment;
/*
2.查看data数据表列名
*/
desc data;
/*
3.去除数据缺失
*/
DROP view if EXISTS v_data_clean_null;
CREATE VIEW v_data_clean_null as
SELECT * FROM data
WHERE
job_href is not null and job_href != '' and
job_name is not null and job_name != '' and
company_href is not null and company_href != '' and
company_name is not null and company_name != '' and
providesalary_text is not null and providesalary_text != '' and
workarea is not null and workarea != '' and
workarea_text is not null and workarea_text != '' and
companytype_text is not null and companytype_text != '' and
degreefrom is not null and degreefrom != '' and
workyear is not null and workyear != '' and
updatedate is not null and updatedate != '' and
issuedate is not null and issuedate != '' and
parse2_job_detail is not null and parse2_job_detail != '';
/*
4.查看条数
*/
SELECT count(1) FROM v_data_clean_null;
SELECT count(1) FROM data;
/*
5.数据去重 运用窗口函数排序去重
*/
DROP view if EXISTS v_data_clean_distinct;
create view v_data_clean_distinct
as
with t1 as
(
SELECT
*,
ROW_NUMBER()
over
(PARTITION by company_name,job_name ORDER BY issuedate desc)
as
row_distinct
FROM
v_data_clean_null
)
SELECT * FROM t1 WHERE t1.row_distinct=1
;
/*
6.北上广深筛选
*/
DROP view if EXISTS v_data_clean_workplace;
create view v_data_clean_workplace
as
with p as
(
SELECT
*,
(
case
when workarea_text LIKE '%北京%' then '北京'
when workarea_text LIKE '%上海%' then '上海'
when workarea_text LIKE '%广州%' then '广州'
when workarea_text LIKE '%深圳%' then '深圳'
end
) as work_place
FROM v_data_clean_distinct
)
SELECT * FROM p WHERE p.work_place is not null;
/*
7.数据分析师筛选
*/
DROP view if EXISTS v_data_clean_data;
create view v_data_clean_data
as
SELECT
*
FROM
v_data_clean_workplace
WHERE
job_name
LIKE
'%数据%'
;
/*
8。清洗完成,建表
*/
DROP VIEW if EXISTS v_data_clean;
CREATE view v_data_clean
as
SELECT * FROM v_data_clean_data;
/*
9.岗位数量分析
*/
CREATE view v_data_market_damand as
SELECt
work_place '城市',
sum(degreefrom) '岗位总数量',
count(DISTINCT job_name) '职位数量'
FROM
v_data_clean
GROUP BY
work_place order by '岗位总数量' desc;
/*
10.企业类型岗位人数分布
*/
with p as
(
SELECT
companytype_text ,
sum(degreefrom) drg_sum
FROM
v_data_clean
GROUP BY
companytype_text
)
SELECT
companytype_text '企业类型',
drg_sum '招聘数量',
concat(ROUND(drg_sum/(SELECT sum(drg_sum) FROM p) ,4)*100,'%') as '招聘占比'
FROM p ORDER BY drg_sum desc;
/*
11.薪资分析
*/
drop view if EXISTS v_data_clean_unit;
CREATE VIEW v_data_clean_unit
AS
with p as
(
SELECT
*,
(
case
when providesalary_text LIKE '%万/月' then 10000
when providesalary_text LIKE '%千/月' then 1000
when providesalary_text LIKE '%万/年' then 833
end
)
unit,
(
case
when providesalary_text LIKE '%万/月' then '万/月'
when providesalary_text LIKE '%千/月' then '千/月'
when providesalary_text LIKE '%万/年' then '万/年'
end
)
unit_2
FROM v_data_clean
)
SELECT *,SUBSTRING_INDEX(providesalary_text,unit_2,1) fee
FROM p ;
drop view if EXISTS v_data_clean_salary;
CREATE VIEW v_data_clean_salary
AS
SELECT
*,
cast(
(
SUBSTRING_INDEX(fee,'-',1)
+
SUBSTRING_INDEX(fee,'-',-1)
)/2 as DECIMAL(6,2))*unit 'salary'
FROM v_data_clean_unit WHERE unit_2 is not null;
SELECT
workyear '工作年限' ,
AVG(salary) '平均薪资'
FROM
v_data_clean_salary
GROUP BY
workyear
ORDER BY
LENGTH(workyear),workyear
asc
;
create view v_data_companytype_salary as
select
companytype_text as '企业类型',
avg(salary) as '平均薪资'
from v_data_clean_salary
group by companytype_text
order by avg(salary) desc
;
select * from v_data_companytype_salary;
-- 第一步,获取前30名高频技能点及其出现频数
create view v_data_skill_quantity as
(with p as
(select
skill,
count(*) as quantity
from skill_table st
inner join v_data_clean v
on v.parse2_job_detail like concat('%',st.skill,'%')
group by st.skill)
select
*
from p
order by quantity desc
limit 30)
;
select * from v_data_skill_quantity;
-- 2
create view v_data_skill as
select
skill as '技能点',
quantity as '出现频数',
concat(cast(quantity / total_quantity * 100 as decimal
(4,2)),'%') as '出现频率'
from
v_data_skill_quantity,
(select count(*) as total_quantity from v_data_clean) as f
;
select * from v_data_skill
更多推荐
已为社区贡献2条内容
所有评论(0)