MapReduce项目案例4——乘用车辆和商用车辆销售数据分析

需求分析👇👇

ChenJieYaYa

6356人浏览 · 2022-06-27 16:41:05

ChenJieYaYa · 2022-06-27 16:41:05 发布

项目介绍

1.数据概况

本数据为上牌汽车的销售数据，分为乘用车辆和商用车辆
数据包含销售相关数据与汽车具体参数

2.数据项包括

省0，月1，市2，区县3，年4，车辆型号5，制造商6，品牌7，车辆类型8，所有权9，
使用性质10，数量11，发动机型号12，排量13，功率14，燃料种类15，车长16，车宽17，车高18，车厢长19，
车厢宽20，车厢高21，轴数22，轴距23，前轮距24，轮胎规格25，轮胎数26，总质量27，整备质量28，核定X质量29，
核定载客30，准牵引质量31，底盘企业32，底盘品牌33，底盘型号34，发动机企业35，车辆名称36，年龄37，性别38

3.输入数据

数据量太大，此处复制不方便，自行百度

需求分析👇👇

汽车行业市场分析

1.通过统计乘用车辆(非营运)和商用车辆(营运)的数量和销售额分布

CountMap

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 1.1通过统计乘用车辆(非营运)和商用车辆(其他)的数量和销售额分布
 */
public class CountMap extends Mapper<LongWritable, Text, IntWritable, LongWritable> {
    private IntWritable intWritable = new IntWritable();
    private LongWritable longWritable = new LongWritable();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().trim().split(",");
        //月1 数量11
        if (split != null && split.length > 11 && split[11] != null && !"".equals(split[11].trim())) {
            try {
                intWritable.set(Integer.parseInt(split[1]));
                longWritable.set(Long.parseLong(split[11]));
                context.write(intWritable, longWritable);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}

CountCombine

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.logging.Logger;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:28
 */
public class CountCombine extends Reducer<Text, LongWritable, Text, LongWritable> {
    private Logger logger = Logger.getLogger(CountCombine.class.getName());

    private LongWritable res = new LongWritable();

    public CountCombine() {
        logger.info("CountCombine的构造方法，是单例吗？");//是
    }

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        logger.info("CountCombine的setup执行时机");//开始一次
    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        logger.info("CountCombine的cleanup执行时机");//结束一次
    }

    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        Long sum = new Long(0);
        for (LongWritable val : values) {
            sum += val.get();
        }
        res.set(sum);
        logger.info("combine合并：" + key.toString() + ":" + res.get());
        context.write(key, res);
    }
}

CountReduce

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:34
 */
public class CountReduce extends Reducer<Text, LongWritable, Text, Text> {
    private Logger logger = Logger.getLogger(CountCombine.class.getName());

    Map<String, Long> map = new HashMap<>();
    double all = 0;

    public CountReduce() {
        logger.info("CountReduce的构造方法，是单例吗？");
    }

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        logger.info("CountReduce的setup执行时机");
    }

    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long sum = 0;
        for (LongWritable val : values) {
            sum += val.get();
        }
        all += sum;
        map.put(key.toString(), sum);
        logger.info("CountReduce的reduce：" + key.toString() + ":" + sum);
    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        logger.info("CountReduce的cleanup执行时机");
        Set<String> keySet = map.keySet();
        for (String key : keySet) {
            long value = map.get(key);
            double percent = value / all;
            logger.info("CountReduce的cleanup：" + key.toString() + ":" + value + "\t" + percent);
            context.write(new Text(key), new Text(value + "\t" + percent));
        }
    }
}

App1

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:45
 */
public class App1 {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Path input = new Path("E:\\HadoopMRData\\input");
        Path output = new Path("E:\\HadoopMRData\\output");
        if (args != null && args.length == 2) {
            input = new Path(args[0]);
            output = new Path(args[1]);
        }
        Configuration conf = new Configuration();

        //conf.set("fs.defaultFS","hdfs://node1:8020");
        /*FileSystem fs = FileSystem.get(conf);
        if (fs.exists(output)) {
            fs.delete(output, true);
        }*/
        Job job = Job.getInstance(conf, "通过统计乘用车辆(非营运)和商用车辆(其他)的数量和销售额分布");
        job.setJarByClass(App1.class);

        job.setMapperClass(CountMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setCombinerClass(CountCombine.class);
        job.setReducerClass(CountReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //job.setNumReduceTasks(2);

        FileInputFormat.addInputPath(job, input);
        FileOutputFormat.setOutputPath(job, output);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

2.统计山西省2013年每个月的汽车销售数量的比例,按月份排序

输出格式：月份数量比例
CountMap

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class CountMap extends Mapper<LongWritable, Text, IntWritable, LongWritable> {
    private IntWritable intWritable = new IntWritable();
    private LongWritable longWritable = new LongWritable();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().trim().split(",");
        //月1 数量11
        if (split != null && split.length > 11 && split[11] != null && !"".equals(split[11].trim())) {
            try {
                intWritable.set(Integer.parseInt(split[1]));
                longWritable.set(Long.parseLong(split[11]));
                context.write(intWritable, longWritable);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
}

CountCombine

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:28
 */
public class CountCombine extends Reducer<IntWritable, LongWritable, IntWritable, LongWritable> {
    private LongWritable res = new LongWritable();

    @Override
    protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        Long sum = new Long(0);
        for (LongWritable val : values) {
            sum += val.get();
        }
        res.set(sum);
        context.write(key, res);
    }
}

CountReduce

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:34
 */
public class CountReduce extends Reducer<IntWritable, LongWritable, IntWritable, Text> {
    private Map<Integer, Long> map = new HashMap<Integer, Long>();
    private Long all = 0L;//总销售数
    private DoubleWritable doubleWritable = new DoubleWritable();//比例

    @Override
    protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        Long sum = 0L;
        for (LongWritable val : values) {
            sum += val.get();
        }
        all += sum;
        map.put(key.get(), sum);
    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        Set<Integer> keySet = map.keySet();
        for (Integer key : keySet) {
            Long value = map.get(key);
            double percent = value / (double) all;
            doubleWritable.set(percent);
            context.write(new IntWritable(key), new Text(value + "\t" + doubleWritable));
        }
    }
}

App2

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:45
 */
public class App2 {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Path input = new Path("E:\\HadoopMRData\\input");
        Path output = new Path("E:\\HadoopMRData\\output");
        if (args != null && args.length == 2) {
            input = new Path(args[0]);
            output = new Path(args[1]);
        }
        Configuration conf = new Configuration();

        //conf.set("fs.defaultFS","hdfs://node1:8020");
        /*FileSystem fs = FileSystem.get(conf);
        if (fs.exists(output)) {
            fs.delete(output, true);
        }*/
        Job job = Job.getInstance(conf, "统计山西省2013年每个月的汽车销售数量的比例,按月份排序");
        job.setJarByClass(App2.class);

        job.setMapperClass(CountMap.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setCombinerClass(CountCombine.class);
        job.setReducerClass(CountReduce.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);

        //job.setNumReduceTasks(2);

        FileInputFormat.addInputPath(job, input);
        FileOutputFormat.setOutputPath(job, output);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

3.统计安徽省2014年4月份各市区县的汽车销售的比例

没有安徽省

用户数据市场分析

1.统计买车的男女比例及男女对车的颜色的选择

没有颜色这个列
CountMap

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


public class CountMap extends Mapper<LongWritable, Text, Text, LongWritable> {
    @Override//map的数量由切片决定，一个map的执行顺序setup-map1-map2-cleanup
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().trim().split(",");
        if (split != null && split.length > 38 && split[38] != null) {
            if ("男性".equals(split[38]) || "女性".equals(split[38])) {
                context.write(new Text(split[38]), new LongWritable(1));
            }
        }
    }
}

CountCombine

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:28
 */
public class CountCombine extends Reducer<Text, LongWritable, Text, LongWritable> {
    private LongWritable res = new LongWritable();

    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long sum = 0L;
        for (LongWritable val : values) {
            sum += val.get();
        }
        res.set(sum);
        context.write(key, res);
    }
}

CountReduce

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:34
 */
public class CountReduce extends Reducer<Text, LongWritable, Text, Text> {
    private Map<String, Long> map = new HashMap<String, Long>();
    private long all = 0L;//总销售数
    private DoubleWritable doubleWritable = new DoubleWritable();//比例

    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long sum = 0L;
        for (LongWritable val : values) {
            sum += val.get();
        }
        all += sum;
        map.put(key.toString(), sum);
    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        Set<String> keySet = map.keySet();
        for (String key : keySet) {
            long value = map.get(key);
            double percent = value / (double) all;
            doubleWritable.set(percent);
            context.write(new Text(key), new Text(value + "\t" + doubleWritable));
        }
    }
}

App3

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:45
 */
public class App3 {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Path input = new Path("E:\\HadoopMRData\\input");
        Path output = new Path("E:\\HadoopMRData\\output");
        if (args != null && args.length == 2) {
            input = new Path(args[0]);
            output = new Path(args[1]);
        }
        Configuration conf = new Configuration();

        //conf.set("fs.defaultFS","hdfs://node1:8020");
        /*FileSystem fs = FileSystem.get(conf);
        if (fs.exists(output)) {
            fs.delete(output, true);
        }*/
        Job job = Job.getInstance(conf, "统计买车的男女比例及男女对车的颜色的选择");
        job.setJarByClass(App3.class);

        job.setMapperClass(CountMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setCombinerClass(CountCombine.class);
        job.setReducerClass(CountReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        //job.setNumReduceTasks(2);

        FileInputFormat.addInputPath(job, input);
        FileOutputFormat.setOutputPath(job, output);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

2.统计的车的所有权、型号和类型的汽车销售数及比例

CountMap

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;


public class CountMap extends Mapper<LongWritable, Text, Text, LongWritable> {
    @Override//map的数量由切片决定，一个map的执行顺序setup-map1-map2-cleanup
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().trim().split(",");
        //所有权10、型号6和类型9
        if (split != null && split.length > 10 && split[10] != null && split[6] != null && split[9] != null) {
            if (!"".equals(split[10]) && !"".equals(split[6]) && !"".equals(split[9])) {
                context.write(new Text(split[10] + "\t" + split[6] + "\t" + split[9]), new LongWritable(1));
            }
        }
    }
}

CountReduce

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:34
 */
public class CountReduce extends Reducer<Text, LongWritable, Text, Text> {
    private Map<String, Long> map = new HashMap<String, Long>();
    private long all = 0L;//总销售数
    private DoubleWritable doubleWritable = new DoubleWritable();//比例

    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long sum = 0L;
        for (LongWritable val : values) {
            sum += val.get();
        }
        all += sum;
        map.put(key.toString(), sum);
    }

    @Override
    protected void cleanup(Context context) throws IOException, InterruptedException {
        Set<String> keySet = map.keySet();
        for (String key : keySet) {
            long value = map.get(key);
            double percent = value / (double) all;
            doubleWritable.set(percent);
            context.write(new Text(key), new Text(value + "\t" + doubleWritable));
        }
    }
}

App4

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * @program: Hadoop_MR
 * @description:
 * @author: 作者
 * @create: 2022-06-21 23:45
 */
public class App4 {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Path input = new Path("E:\\HadoopMRData\\input");
        Path output = new Path("E:\\HadoopMRData\\output");
        if (args != null && args.length == 2) {
            input = new Path(args[0]);
            output = new Path(args[1]);
        }
        Configuration conf = new Configuration();

        //conf.set("fs.defaultFS","hdfs://node1:8020");
        /*FileSystem fs = FileSystem.get(conf);
        if (fs.exists(output)) {
            fs.delete(output, true);
        }*/
        Job job = Job.getInstance(conf, "统计的车的所有权、型号和类型");
        job.setJarByClass(App4.class);

        job.setMapperClass(CountMap.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        job.setGroupingComparatorClass(Count10Group.class);

        job.setReducerClass(CountReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, input);
        FileOutputFormat.setOutputPath(job, output);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

3.每个类型车的用户做年龄和性别的统计

不同车型销售统计分析

1.统计不同类型车在一个月(对一段时间：如每个月或每年)的总销售量

2.通过不同类型(品牌)车销售情况，来统计发动机型号和燃料种类

3.统计价格相同而类型(品牌)不同车的销售量

针对某一品牌的竞争分析

1.统计一汽大众的每一年(每一个月)的销售量和增长率(趋势)

2.统计一汽大众在山西和安徽销售量及其价格的差异

腾讯云开发者社区

腾讯云面向开发者汇聚海量精品云计算使用和开发经验，营造开放的云计算技术生态圈。

更多推荐

自动化提示词生成工具盘点

腾讯云开发者社区

AI PPT免费使用技巧盘点：如何快速制作专业PPT？

腾讯云开发者社区

腾讯云架构师技术沙龙 · 长沙站圆满落幕，共话AI驱动下的技术架构与前沿应用

人工智能已成为推动技术创新与产业变革的重要引擎，开发者正身处一场前所未有的技术变革之中。通过本次腾讯云架构师技术沙龙，各位专家深入分享前沿技术洞察，探讨 AI 落地的应用路径与实践经验，为架构师的职业发展指明方向。腾讯云架构师长沙同盟和腾讯云架构师技术同盟长沙地区理事会正式成立。未来，腾讯云架构师长沙同盟将凝心聚力，打造属于本地架构师的学习与成长的家园，助力中国架构的蓬勃发展。未来已来，让我们携手