mmdetection框架中的测试脚本(python)
此脚本是针对yolohead网络的,从yolohead头部出来的outs(NHWC,C=classnum+4+1)结果,通过后处理,得到最终的坐标框,并在图片上画出。代码import numpy as npimport cv2import torchimport osimport randomimport argparsefrom mmcv import Config, DictActionfro
·
此脚本是针对yolohead网络的,从yolohead头部出来的outs(NHWC,C=classnum+4+1)结果,通过后处理,得到最终的坐标框,并在图片上画出。
代码
import numpy as np
import cv2
import torch
import os
import random
import argparse
from mmcv import Config, DictAction
from mmdet.core import multiclass_nms_no_bkg, multiclass_attr_nms_no_bkg
from mmdet.datasets import build_dataloader, build_dataset
def get_files(dir, suffix):
res = []
for root, directory, files in os.walk(dir):
for filename in files:
name, suf = os.path.splitext(filename)
if suf in suffix:
#res.append(filename)
res.append(os.path.join(root, filename))
return res
def bbox2result(bboxes, labels, num_classes):
"""Convert detection results to a list of numpy arrays.
Args:
bboxes (torch.Tensor | np.ndarray): shape (n, 5)
labels (torch.Tensor | np.ndarray): shape (n, )
num_classes (int): class number, including background class
Returns:
list(ndarray): bbox results of each class
"""
if bboxes.shape[0] == 0:
return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)]
else:
if isinstance(bboxes, torch.Tensor):
bboxes = bboxes.detach().cpu().numpy()
labels = labels.detach().cpu().numpy()
return [bboxes[labels == i, :] for i in range(num_classes)]
def decode_label(cfg, gt_labels):
valid_classes = np.arange(cfg.model.bbox_head.num_classes)
gt_labels_origin = gt_labels.clone()
for idx in range(cfg.model.bbox_head.num_classes):
gt_labels[gt_labels_origin == idx] = valid_classes[idx]
return gt_labels
def _get_anchors_grid_xy(num_grid_h, num_grid_w, stride, device='cpu'):
grid_x = torch.arange(num_grid_w, dtype=torch.float, device=device).repeat(num_grid_h, 1)
grid_y = torch.arange(num_grid_h, dtype=torch.float, device=device).repeat(num_grid_w, 1)
grid_x = grid_x.unsqueeze(0) * stride
grid_y = grid_y.t().unsqueeze(0) * stride
return grid_x, grid_y
def _scale_position(pos,scale_x_y=1.0):
return scale_x_y * pos - (scale_x_y - 1) / 2.0
def get_bboxes_single(cfg, results_raw, scale_factor, crop_lefttop, rescale=False):
num_scales = len(cfg.model.bbox_head.strides)
num_anchors_per_scale = len(cfg.model.bbox_head.anchor_base_sizes[0])
num_attrib = cfg.model.bbox_head.num_classes + 5
assert len(results_raw) == num_scales
multi_lvl_bboxes = []
multi_lvl_cls_scores = []
multi_lvl_conf_scores = []
for i_scale in range(num_scales):
result_raw = results_raw[i_scale]
num_grid_h = result_raw.size(1)
num_grid_w = result_raw.size(2)
prediction_raw = result_raw.view(num_anchors_per_scale,
num_attrib,
num_grid_h,
num_grid_w).permute(0, 2, 3, 1).contiguous()
# grid x y offset, with stride step included
stride = cfg.model.bbox_head.strides[i_scale]
grid_x, grid_y = _get_anchors_grid_xy(num_grid_h, num_grid_w, stride, result_raw.device)
# Get outputs x, y
x_center_pred = _scale_position(torch.sigmoid(prediction_raw[..., 0]),cfg.model.bbox_head.scale_x_y) * stride\
+ grid_x # Center x
y_center_pred = _scale_position(torch.sigmoid(prediction_raw[..., 1]),cfg.model.bbox_head.scale_x_y) * stride\
+ grid_y # Center y
anchors = torch.tensor(cfg.model.bbox_head.anchor_base_sizes[i_scale], device=result_raw.device, dtype=torch.float32)
anchor_w = anchors[:, 0:1].view((-1, 1, 1))
anchor_h = anchors[:, 1:2].view((-1, 1, 1))
w_pred = torch.exp(prediction_raw[..., 2]) * anchor_w # Width
h_pred = torch.exp(prediction_raw[..., 3]) * anchor_h # Height
x1_pred = x_center_pred - w_pred / 2
y1_pred = y_center_pred - h_pred / 2
x2_pred = x_center_pred + w_pred / 2
y2_pred = y_center_pred + h_pred / 2
# cxcywh
bbox_pred = torch.stack((x1_pred, y1_pred, x2_pred, y2_pred), dim=3).view(
(-1, 4))
# Conf
conf_pred = torch.sigmoid(prediction_raw[..., 4]).view(-1)
# Cls pred one-hot.
cls_pred = torch.sigmoid(prediction_raw[..., 5:]).view(-1, cfg.model.bbox_head.num_classes)
conf_thr = cfg.test_cfg.get('conf_thr', -1)
#conf_inds = conf_pred.ge(conf_thr).nonzero().flatten()
conf_inds = torch.nonzero(conf_pred.ge(conf_thr), as_tuple=False).flatten()
bbox_pred = bbox_pred[conf_inds, :]
cls_pred = cls_pred[conf_inds, :]
conf_pred = conf_pred[conf_inds]
nms_pre = cfg.test_cfg.get('nms_pre', -1)
if 0 < nms_pre < conf_pred.size(0):
_, topk_inds = conf_pred.topk(nms_pre)
bbox_pred = bbox_pred[topk_inds, :]
cls_pred = cls_pred[topk_inds, :]
conf_pred = conf_pred[topk_inds]
multi_lvl_bboxes.append(bbox_pred)
multi_lvl_cls_scores.append(cls_pred)
multi_lvl_conf_scores.append(conf_pred)
multi_lvl_bboxes = torch.cat(multi_lvl_bboxes)
multi_lvl_cls_scores = torch.cat(multi_lvl_cls_scores)
multi_lvl_conf_scores = torch.cat(multi_lvl_conf_scores)
if multi_lvl_conf_scores.size(0) == 0:
return torch.zeros((0, 5)), torch.zeros((0,))
if rescale:
multi_lvl_bboxes /= multi_lvl_bboxes.new_tensor(scale_factor)
multi_lvl_bboxes += torch.tensor(crop_lefttop, device=result_raw.device, dtype=torch.float32).repeat(1, 2)
# padding = multi_lvl_cls_scores.new_zeros(multi_lvl_cls_scores.shape[0], 1)
# multi_lvl_cls_scores = torch.cat([padding, multi_lvl_cls_scores], dim=1)
score_thr = cfg.test_cfg.get('score_thr', 0.05)
nms = cfg.test_cfg.get('nms', 0.45)
max_per_img = cfg.test_cfg.get('max_per_img', 100)
det_bboxes, det_labels = multiclass_nms_no_bkg(multi_lvl_bboxes, multi_lvl_cls_scores,
score_thr, nms,
max_per_img, score_factors=multi_lvl_conf_scores)
det_labels = decode_label(cfg,det_labels)
return det_bboxes, det_labels
def get_bboxes(cfg, results_raw, img_metas, rescale=False):
result_list = []
for img_id in range(len(img_metas)):
result_raw_list = [
results_raw[i][img_id].detach() for i in range(cfg.num_scales)
]
scale_factor = img_metas[img_id].get('scale_factor', np.array([1.0,1.0,1.0,1.0]))
crop_lefttop = img_metas[img_id].get('crop_lefttop', (0,0))
proposals = get_bboxes_single(cfg,result_raw_list, scale_factor, crop_lefttop, rescale)
result_list.append(proposals)
return result_list
def draw_rectangle(src_img_path, bbox_results, conf_thresh, save_path, thickness=4, font_scale=0.5):
#img = cv2.clone(img)
img = cv2.imread(src_img_path, 1)
img = np.ascontiguousarray(img)
for bbox_result in bbox_results:
for single_bbox_result in bbox_result:
if conf_thresh > single_bbox_result[-1]:
continue
else:
single_bbox_result_int32 = single_bbox_result[:4]
single_bbox_result_int32[0] = single_bbox_result_int32[0] * 1920.0 / 512
single_bbox_result_int32[2] = single_bbox_result_int32[2] * 1920.0 / 512
# single_bbox_result_int32[1] = single_bbox_result_int32[1] * (860 - 140) / 192
# single_bbox_result_int32[3] = single_bbox_result_int32[3] * (860 - 140) / 192
single_bbox_result_int32[1] = single_bbox_result_int32[1] * 1080.0 / 192
single_bbox_result_int32[3] = single_bbox_result_int32[3] * 1080.0 / 192
single_bbox_result_int32 = np.array(single_bbox_result_int32, dtype=np.int32)
# left_top = (single_bbox_result_int32[0], single_bbox_result_int32[1]+140)
# right_bottom = (single_bbox_result_int32[2], single_bbox_result_int32[3]+140)
left_top = (single_bbox_result_int32[0], single_bbox_result_int32[1])
right_bottom = (single_bbox_result_int32[2], single_bbox_result_int32[3])
cv2.rectangle(img, left_top, right_bottom, (255,255,255), thickness=thickness)
cv2.imwrite(save_path,img)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='MMDet test (and eval) a model')
parser.add_argument("--iseval", type=bool, default=True)
parser.add_argument('--options', nargs='+', action=DictAction, help='arguments in dict')
parser.add_argument('--eval', type=str, nargs='+', default='mAP',
help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
parser.add_argument('--config', default='/home/szhang/project/git/mmdetection_qincao/mmdetection/tools/zs/8155testconfig.py', help='test config file path')
args = parser.parse_args()
cfg = Config.fromfile(args.config)
width8, width16, width32 = [cfg.input_size[0] // i for i in [8,16,32]]
height8, height16, height32 = [cfg.input_size[1] // i for i in [8,16,32]]
channels = (cfg.model.bbox_head.num_classes + 5) * 3
image_counts = 100
raw_root = '/home/szhang/project/data/test/test100/detection_SNPE_CDSP_result/SNPE_CDSP_result'
save_root = '/home/szhang/project/data/test/test100/8155test/8155test_img_result'
outputs = []
for i in range(image_counts):
result8_path = raw_root + "/Result_{}/190.raw".format(i)
result16_path = raw_root + "/Result_{}/labels.raw".format(i)
result32_path = raw_root + "/Result_{}/boxes.raw".format(i)
# result8_path = "/home/szhang/project/data/test/test100/8155test/8155test_img_result/8.raw"
# result16_path = "/home/szhang/project/data/test/test100/8155test/8155test_img_result/16.raw"
# result32_path = "/home/szhang/project/data/test/test100/8155test/8155test_img_result/32.raw"
#type = img.dtype #得到数据格式,如uint8和uint16等
result8Data = np.fromfile(result8_path, dtype=np.float32)
result16Data = np.fromfile(result16_path, dtype=np.float32)
result32Data = np.fromfile(result32_path, dtype=np.float32)
# result8Data = result8Data.reshape(1,height8, width8, channels)
# result16Data = result16Data.reshape(1,height16, width16, channels)
# result32Data = result32Data.reshape(1,height32, width32, channels)
#这里要注意数据是NCHW格式还是NHWC格式,这里的数据格式要和get_bboxes_single中的代码相对应
# prediction_raw = result_raw.view(num_anchors_per_scale,num_attrib,num_grid_h,num_grid_w).permute(0, 2, 3, 1).contiguous()
result8Data = result8Data.reshape(height8, width8, channels).transpose(2, 0, 1)
result16Data = result16Data.reshape(height16, width16, channels).transpose(2, 0, 1)
result32Data = result32Data.reshape(height32, width32, channels).transpose(2, 0, 1)
resultData = [torch.from_numpy(result32Data), torch.from_numpy(result16Data), torch.from_numpy(result8Data)]
det_bboxes, det_labels = get_bboxes_single(cfg, resultData, 0, (0,0))
bbox_results = bbox2result(det_bboxes, det_labels, cfg.model.bbox_head.num_classes)
#读取txt文件中的特定行,找到对应的原图片
src_txt_path = '/home/szhang/project/data/test/test100/detection_SNPE_CDSP_result/SNPE_CDSP_result/file_list.txt'
src_txt_data = open(src_txt_path,'r')
txt_data_lists = src_txt_data.readlines()
save_image_name = txt_data_lists[i].strip().split('/')[-1].split(".")[0]
save_image_name = save_image_name + '.jpg'
src_image_path = '/home/szhang/project/data/test/test100/8155test/JPEGImages_crop/' + save_image_name
save_path = save_root + "/Result_{}.jpg".format(i)
draw_rectangle(src_image_path,bbox_results,0.3,save_path)
#'''
for bbox_result in bbox_results:
for single_bbox_result in bbox_result:
#single_bbox_result_int32 = single_bbox_result[:4]
single_bbox_result[0] = single_bbox_result[0] * 1920.0 / 512
single_bbox_result[2] = single_bbox_result[2] * 1920.0 / 512
# single_bbox_result_int32[1] = single_bbox_result_int32[1] * (860 - 140) / 192
# single_bbox_result_int32[3] = single_bbox_result_int32[3] * (860 - 140) / 192
single_bbox_result[1] = single_bbox_result[1] * 1080.0 / 192
single_bbox_result[3] = single_bbox_result[3] * 1080.0 / 192
#'''
outputs.append(bbox_results)
if args.iseval:
dataset = build_dataset(cfg.data.test)
if args.eval:
kwargs = {} if args.options is None else args.options
dataset.evaluate(outputs, args.eval, **kwargs)
配置文件:
input_size = (512, 192)
model = dict(
type='YoloNet_BM',
bbox_head=dict(
type='YoloHead_BM',
num_classes=8,
in_channels=[512, 256, 128],
out_channels=[512, 256, 128],
strides=[32, 16, 8],
anchor_base_sizes=[[[24.0, 18.0], [43.0, 32.0], [98.0, 75.0]],
[[10.0, 7.0], [17.0, 9.0], [12.0, 15.0]],
[[3.0, 7.0], [6.0, 5.0], [5.0, 11.0]]],
scale_x_y=2.0))
test_cfg = dict(
nms_pre=1000,
min_bbox_size=0,
score_thr=0.05,
conf_thr=0.005,
nms=dict(type='nms', iou_thr=0.45),
max_per_img=100)
img_norm_cfg = dict(mean=[0, 0, 0], std=[255.0, 255.0, 255.0], to_rgb=True)
test_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 192),
flip=False,
transforms=[
dict(
type='UserSettingCrop',
rightbottom=(1920, 860),
lefttop=(0, 140)),
dict(type='Resize', keep_ratio=False),
dict(
type='Normalize',
mean=[0, 0, 0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'filename', 'ori_shape', 'img_shape', 'pad_shape',
'scale_factor', 'img_norm_cfg', 'crop_lefttop',
'crop_rightbottom'
])
])
]
data = dict(
test=dict(
type='DDPDataset',
ann_file='/home/szhang/project/data/test/test100/testshuffle100.txt',
img_prefix=None,
pipeline=[
dict(type='LoadImageFromFile', to_float32=True),
dict(
type='MultiScaleFlipAug',
img_scale=(512, 192),
flip=False,
transforms=[
dict(
type='UserSettingCrop',
rightbottom=(1920, 860),
lefttop=(0, 140)),
dict(type='Resize', keep_ratio=False),
dict(
type='Normalize',
mean=[0, 0, 0],
std=[255.0, 255.0, 255.0],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'filename', 'ori_shape', 'img_shape', 'pad_shape',
'scale_factor', 'img_norm_cfg', 'crop_lefttop',
'crop_rightbottom'
])
])
]))
更多推荐
已为社区贡献6条内容
所有评论(0)