此脚本是针对yolohead网络的,从yolohead头部出来的outs(NHWC,C=classnum+4+1)结果,通过后处理,得到最终的坐标框,并在图片上画出。

代码

import numpy as np
import cv2
import torch
import os
import random
import argparse
from mmcv import Config, DictAction
from mmdet.core import multiclass_nms_no_bkg, multiclass_attr_nms_no_bkg
from mmdet.datasets import build_dataloader, build_dataset

def get_files(dir, suffix): 
    res = []
    for root, directory, files in os.walk(dir):
        for filename in files:
            name, suf = os.path.splitext(filename) 

            if suf in suffix:
                #res.append(filename)
                res.append(os.path.join(root, filename))
    return res

def bbox2result(bboxes, labels, num_classes):
    """Convert detection results to a list of numpy arrays.

    Args:
        bboxes (torch.Tensor | np.ndarray): shape (n, 5)
        labels (torch.Tensor | np.ndarray): shape (n, )
        num_classes (int): class number, including background class

    Returns:
        list(ndarray): bbox results of each class
    """
    if bboxes.shape[0] == 0:
        return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)]
    else:
        if isinstance(bboxes, torch.Tensor):
            bboxes = bboxes.detach().cpu().numpy()
            labels = labels.detach().cpu().numpy()
        return [bboxes[labels == i, :] for i in range(num_classes)]

def decode_label(cfg, gt_labels):
    valid_classes = np.arange(cfg.model.bbox_head.num_classes)
    gt_labels_origin = gt_labels.clone()
    for idx in range(cfg.model.bbox_head.num_classes):
        gt_labels[gt_labels_origin == idx] = valid_classes[idx]
    return gt_labels

def _get_anchors_grid_xy(num_grid_h, num_grid_w, stride, device='cpu'):
    grid_x = torch.arange(num_grid_w, dtype=torch.float, device=device).repeat(num_grid_h, 1)
    grid_y = torch.arange(num_grid_h, dtype=torch.float, device=device).repeat(num_grid_w, 1)

    grid_x = grid_x.unsqueeze(0) * stride
    grid_y = grid_y.t().unsqueeze(0) * stride

    return grid_x, grid_y

def _scale_position(pos,scale_x_y=1.0):
    return scale_x_y * pos - (scale_x_y - 1) / 2.0
    
def get_bboxes_single(cfg, results_raw, scale_factor, crop_lefttop, rescale=False):
    num_scales = len(cfg.model.bbox_head.strides)
    num_anchors_per_scale = len(cfg.model.bbox_head.anchor_base_sizes[0])
    num_attrib = cfg.model.bbox_head.num_classes + 5
    assert len(results_raw) == num_scales
    multi_lvl_bboxes = []
    multi_lvl_cls_scores = []
    multi_lvl_conf_scores = []
    for i_scale in range(num_scales):
        result_raw = results_raw[i_scale]
        num_grid_h = result_raw.size(1)
        num_grid_w = result_raw.size(2)

        prediction_raw = result_raw.view(num_anchors_per_scale,
                                            num_attrib,
                                            num_grid_h,
                                            num_grid_w).permute(0, 2, 3, 1).contiguous()

        # grid x y offset, with stride step included
        stride = cfg.model.bbox_head.strides[i_scale]

        grid_x, grid_y = _get_anchors_grid_xy(num_grid_h, num_grid_w, stride, result_raw.device)

        # Get outputs x, y
        x_center_pred = _scale_position(torch.sigmoid(prediction_raw[..., 0]),cfg.model.bbox_head.scale_x_y) * stride\
                + grid_x  # Center x
        y_center_pred = _scale_position(torch.sigmoid(prediction_raw[..., 1]),cfg.model.bbox_head.scale_x_y) * stride\
                + grid_y  # Center y

        anchors = torch.tensor(cfg.model.bbox_head.anchor_base_sizes[i_scale], device=result_raw.device, dtype=torch.float32)

        anchor_w = anchors[:, 0:1].view((-1, 1, 1))
        anchor_h = anchors[:, 1:2].view((-1, 1, 1))

        w_pred = torch.exp(prediction_raw[..., 2]) * anchor_w  # Width
        h_pred = torch.exp(prediction_raw[..., 3]) * anchor_h  # Height

        x1_pred = x_center_pred - w_pred / 2
        y1_pred = y_center_pred - h_pred / 2

        x2_pred = x_center_pred + w_pred / 2
        y2_pred = y_center_pred + h_pred / 2

        # cxcywh
        bbox_pred = torch.stack((x1_pred, y1_pred, x2_pred, y2_pred), dim=3).view(
            (-1, 4))
        # Conf 
        conf_pred = torch.sigmoid(prediction_raw[..., 4]).view(-1)
        # Cls pred one-hot.
        cls_pred = torch.sigmoid(prediction_raw[..., 5:]).view(-1, cfg.model.bbox_head.num_classes)

        conf_thr = cfg.test_cfg.get('conf_thr', -1)
        #conf_inds = conf_pred.ge(conf_thr).nonzero().flatten()
        conf_inds = torch.nonzero(conf_pred.ge(conf_thr), as_tuple=False).flatten()
        bbox_pred = bbox_pred[conf_inds, :]
        cls_pred = cls_pred[conf_inds, :]
        conf_pred = conf_pred[conf_inds]

        nms_pre = cfg.test_cfg.get('nms_pre', -1)
        if 0 < nms_pre < conf_pred.size(0):
            _, topk_inds = conf_pred.topk(nms_pre)
            bbox_pred = bbox_pred[topk_inds, :]
            cls_pred = cls_pred[topk_inds, :]
            conf_pred = conf_pred[topk_inds]
        multi_lvl_bboxes.append(bbox_pred)
        multi_lvl_cls_scores.append(cls_pred)
        multi_lvl_conf_scores.append(conf_pred)

    multi_lvl_bboxes = torch.cat(multi_lvl_bboxes)
    multi_lvl_cls_scores = torch.cat(multi_lvl_cls_scores)
    multi_lvl_conf_scores = torch.cat(multi_lvl_conf_scores)

    if multi_lvl_conf_scores.size(0) == 0:
        return torch.zeros((0, 5)), torch.zeros((0,))

    if rescale:
        multi_lvl_bboxes /= multi_lvl_bboxes.new_tensor(scale_factor)

    multi_lvl_bboxes += torch.tensor(crop_lefttop, device=result_raw.device, dtype=torch.float32).repeat(1, 2)

    # padding = multi_lvl_cls_scores.new_zeros(multi_lvl_cls_scores.shape[0], 1)
    # multi_lvl_cls_scores = torch.cat([padding, multi_lvl_cls_scores], dim=1)

    score_thr = cfg.test_cfg.get('score_thr', 0.05)
    nms = cfg.test_cfg.get('nms', 0.45)
    max_per_img = cfg.test_cfg.get('max_per_img', 100)

    det_bboxes, det_labels = multiclass_nms_no_bkg(multi_lvl_bboxes, multi_lvl_cls_scores,
                                                    score_thr, nms,
                                                    max_per_img, score_factors=multi_lvl_conf_scores)

    det_labels = decode_label(cfg,det_labels)
    return det_bboxes, det_labels


def get_bboxes(cfg, results_raw, img_metas, rescale=False):
    result_list = []
    for img_id in range(len(img_metas)):
        result_raw_list = [
            results_raw[i][img_id].detach() for i in range(cfg.num_scales)
        ]

        scale_factor = img_metas[img_id].get('scale_factor', np.array([1.0,1.0,1.0,1.0])) 
        crop_lefttop = img_metas[img_id].get('crop_lefttop', (0,0))      

        proposals = get_bboxes_single(cfg,result_raw_list, scale_factor, crop_lefttop, rescale)
        result_list.append(proposals)
    return result_list

def draw_rectangle(src_img_path, bbox_results, conf_thresh, save_path, thickness=4, font_scale=0.5):
    #img = cv2.clone(img)
    img = cv2.imread(src_img_path, 1)
    img = np.ascontiguousarray(img)
    for bbox_result in bbox_results:
        for single_bbox_result in bbox_result:
            if conf_thresh > single_bbox_result[-1]:
                continue
            else:
                single_bbox_result_int32 = single_bbox_result[:4]
                single_bbox_result_int32[0] = single_bbox_result_int32[0] * 1920.0 / 512
                single_bbox_result_int32[2] = single_bbox_result_int32[2] * 1920.0 / 512
                # single_bbox_result_int32[1] = single_bbox_result_int32[1] * (860 - 140) / 192
                # single_bbox_result_int32[3] = single_bbox_result_int32[3] * (860 - 140) / 192
                single_bbox_result_int32[1] = single_bbox_result_int32[1] * 1080.0 / 192
                single_bbox_result_int32[3] = single_bbox_result_int32[3] * 1080.0 / 192
                single_bbox_result_int32 = np.array(single_bbox_result_int32, dtype=np.int32)
                # left_top = (single_bbox_result_int32[0], single_bbox_result_int32[1]+140)
                # right_bottom = (single_bbox_result_int32[2], single_bbox_result_int32[3]+140)
                left_top = (single_bbox_result_int32[0], single_bbox_result_int32[1])
                right_bottom = (single_bbox_result_int32[2], single_bbox_result_int32[3])
                cv2.rectangle(img, left_top, right_bottom, (255,255,255), thickness=thickness)
    
    cv2.imwrite(save_path,img)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='MMDet test (and eval) a model')
    parser.add_argument("--iseval", type=bool, default=True)
    parser.add_argument('--options', nargs='+', action=DictAction, help='arguments in dict')
    parser.add_argument('--eval', type=str, nargs='+', default='mAP',
                    help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
                         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
    parser.add_argument('--config', default='/home/szhang/project/git/mmdetection_qincao/mmdetection/tools/zs/8155testconfig.py', help='test config file path')
    args = parser.parse_args()
    cfg = Config.fromfile(args.config)

    width8, width16, width32 = [cfg.input_size[0] // i for i in [8,16,32]]
    height8, height16, height32 = [cfg.input_size[1] // i for i in [8,16,32]]
    channels = (cfg.model.bbox_head.num_classes + 5) * 3

    image_counts = 100
    raw_root = '/home/szhang/project/data/test/test100/detection_SNPE_CDSP_result/SNPE_CDSP_result'
    save_root = '/home/szhang/project/data/test/test100/8155test/8155test_img_result'
    outputs = []
    for i in range(image_counts):
        result8_path = raw_root + "/Result_{}/190.raw".format(i)
        result16_path = raw_root + "/Result_{}/labels.raw".format(i)
        result32_path = raw_root + "/Result_{}/boxes.raw".format(i)
            
        # result8_path = "/home/szhang/project/data/test/test100/8155test/8155test_img_result/8.raw"
        # result16_path = "/home/szhang/project/data/test/test100/8155test/8155test_img_result/16.raw"
        # result32_path = "/home/szhang/project/data/test/test100/8155test/8155test_img_result/32.raw"

        #type = img.dtype  #得到数据格式,如uint8和uint16等

        result8Data = np.fromfile(result8_path, dtype=np.float32)
        result16Data = np.fromfile(result16_path, dtype=np.float32)
        result32Data = np.fromfile(result32_path, dtype=np.float32)

        # result8Data = result8Data.reshape(1,height8, width8, channels)
        # result16Data = result16Data.reshape(1,height16, width16, channels)
        # result32Data = result32Data.reshape(1,height32, width32, channels)

        #这里要注意数据是NCHW格式还是NHWC格式,这里的数据格式要和get_bboxes_single中的代码相对应
        # prediction_raw = result_raw.view(num_anchors_per_scale,num_attrib,num_grid_h,num_grid_w).permute(0, 2, 3, 1).contiguous()
        result8Data = result8Data.reshape(height8, width8, channels).transpose(2, 0, 1)
        result16Data = result16Data.reshape(height16, width16, channels).transpose(2, 0, 1)
        result32Data = result32Data.reshape(height32, width32, channels).transpose(2, 0, 1)

        resultData = [torch.from_numpy(result32Data), torch.from_numpy(result16Data), torch.from_numpy(result8Data)]
        det_bboxes, det_labels = get_bboxes_single(cfg, resultData, 0, (0,0))

        bbox_results = bbox2result(det_bboxes, det_labels, cfg.model.bbox_head.num_classes)
        #读取txt文件中的特定行,找到对应的原图片
        src_txt_path = '/home/szhang/project/data/test/test100/detection_SNPE_CDSP_result/SNPE_CDSP_result/file_list.txt'
        src_txt_data = open(src_txt_path,'r')
        txt_data_lists = src_txt_data.readlines()
        save_image_name = txt_data_lists[i].strip().split('/')[-1].split(".")[0]
        save_image_name = save_image_name + '.jpg'
        src_image_path = '/home/szhang/project/data/test/test100/8155test/JPEGImages_crop/'  + save_image_name
        save_path = save_root + "/Result_{}.jpg".format(i)
        draw_rectangle(src_image_path,bbox_results,0.3,save_path)
        #'''
        for bbox_result in bbox_results:
            for single_bbox_result in bbox_result:
                #single_bbox_result_int32 = single_bbox_result[:4]
                single_bbox_result[0] = single_bbox_result[0] * 1920.0 / 512
                single_bbox_result[2] = single_bbox_result[2] * 1920.0 / 512
                # single_bbox_result_int32[1] = single_bbox_result_int32[1] * (860 - 140) / 192
                # single_bbox_result_int32[3] = single_bbox_result_int32[3] * (860 - 140) / 192
                single_bbox_result[1] = single_bbox_result[1] * 1080.0 / 192
                single_bbox_result[3] = single_bbox_result[3] * 1080.0 / 192
        #'''
        outputs.append(bbox_results)
    if args.iseval:
        dataset = build_dataset(cfg.data.test)
        if args.eval:
            kwargs = {} if args.options is None else args.options
            dataset.evaluate(outputs, args.eval, **kwargs)

配置文件:

input_size = (512, 192)
model = dict(
    type='YoloNet_BM',
    bbox_head=dict(
        type='YoloHead_BM',
        num_classes=8,
        in_channels=[512, 256, 128],
        out_channels=[512, 256, 128],
        strides=[32, 16, 8],
        anchor_base_sizes=[[[24.0, 18.0], [43.0, 32.0], [98.0, 75.0]],
                           [[10.0, 7.0], [17.0, 9.0], [12.0, 15.0]],
                           [[3.0, 7.0], [6.0, 5.0], [5.0, 11.0]]],
        scale_x_y=2.0))
test_cfg = dict(
    nms_pre=1000,
    min_bbox_size=0,
    score_thr=0.05,
    conf_thr=0.005,
    nms=dict(type='nms', iou_thr=0.45),
    max_per_img=100)
img_norm_cfg = dict(mean=[0, 0, 0], std=[255.0, 255.0, 255.0], to_rgb=True)
test_pipeline = [
    dict(type='LoadImageFromFile', to_float32=True),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(512, 192),
        flip=False,
        transforms=[
            dict(
                type='UserSettingCrop',
                rightbottom=(1920, 860),
                lefttop=(0, 140)),
            dict(type='Resize', keep_ratio=False),
            dict(
                type='Normalize',
                mean=[0, 0, 0],
                std=[255.0, 255.0, 255.0],
                to_rgb=True),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(
                type='Collect',
                keys=['img'],
                meta_keys=[
                    'filename', 'ori_shape', 'img_shape', 'pad_shape',
                    'scale_factor', 'img_norm_cfg', 'crop_lefttop',
                    'crop_rightbottom'
                ])
        ])
]
data = dict(
    test=dict(
        type='DDPDataset',
        ann_file='/home/szhang/project/data/test/test100/testshuffle100.txt',
        img_prefix=None,
        pipeline=[
            dict(type='LoadImageFromFile', to_float32=True),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(512, 192),
                flip=False,
                transforms=[
                    dict(
                        type='UserSettingCrop',
                        rightbottom=(1920, 860),
                        lefttop=(0, 140)),
                    dict(type='Resize', keep_ratio=False),
                    dict(
                        type='Normalize',
                        mean=[0, 0, 0],
                        std=[255.0, 255.0, 255.0],
                        to_rgb=True),
                    dict(type='Pad', size_divisor=32),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(
                        type='Collect',
                        keys=['img'],
                        meta_keys=[
                            'filename', 'ori_shape', 'img_shape', 'pad_shape',
                            'scale_factor', 'img_norm_cfg', 'crop_lefttop',
                            'crop_rightbottom'
                        ])
                ])
        ]))
Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐