Yolo系列对训练十分有效的策略马赛克增强,对于模型泛化能力、最终性能都有很大提升,由于Yolo在mosaic时涉及到一些其他变换例如缩放、随机图像占比等,这里单纯实现将4张图合并成一张,同时更新对应的标签,适合想要将单张图加入多个目标的简单实现

直接完整代码
import os
import cv2
import numpy as np
import random
import shutil
'''
生成4合1代码
'''
def load_image_and_label(image_path, label_path):
    # 加载图像
    img = cv2.imread(image_path)
    # 加载标签
    with open(label_path, 'r') as file:
        labels = file.readlines()
    return img, labels

def save_image_and_label(img, labels, image_output_path, label_output_path):
    # 保存合并后的图像
    cv2.imwrite(image_output_path, img)
    # 保存新的标签文件
    with open(label_output_path, 'w') as file:
        file.writelines(labels)

def adjust_bbox_coordinates(bboxes, width_offset, height_offset, original_width, original_height, new_width, new_height):
    adjusted_bboxes = []
    for bbox in bboxes:
        class_id, *coords = map(float, bbox.split())
        new_coords = []
        for i in range(0, len(coords), 2):
            # 原始坐标
            x, y = coords[i], coords[i + 1]

            # 将归一化坐标还原为像素坐标
            pixel_x = x * original_width
            pixel_y = y * original_height

            # 更新坐标,考虑偏移
            pixel_x += width_offset
            pixel_y += height_offset

            # 将像素坐标归一化到新的拼接图像尺寸
            new_x = pixel_x / new_width
            new_y = pixel_y / new_height

            new_coords.extend([new_x, new_y])

        adjusted_bboxes.append(f"{int(class_id)} " + ' '.join(map(str, new_coords)) + '\n')

    return adjusted_bboxes

def create_mosaic(images, labels, output_image_path, output_label_path):
    # 获取图像的高度和宽度
    img_height, img_width = images[0].shape[:2]
    
    # 合并图像(2x2的mosaic)
    top = np.hstack((images[0], images[1]))
    bottom = np.hstack((images[2], images[3]))
    mosaic_image = np.vstack((top, bottom))

    # 计算合并后图像的新尺寸
    new_height, new_width = mosaic_image.shape[:2]

    # 重新调整坐标
    adjusted_labels = []
    width_offset = 0
    height_offset = 0
    
    for i, label in enumerate(labels):
        # 更新偏移量
        if i == 1:  # 第二张图像,右上角
            width_offset += img_width
        elif i == 2:  # 第三张图像,左下角
            width_offset = 0
            height_offset += img_height
        elif i == 3:  # 第四张图像,右下角
            width_offset += img_width
            # height_offset += img_height
        # 根据图像在拼接中的位置调整标签坐标
        new_bboxes = adjust_bbox_coordinates(label, width_offset, height_offset, img_width, img_height, new_width, new_height)
        adjusted_labels.extend(new_bboxes)
    
    # 保存合并后的图像和标签
    save_image_and_label(mosaic_image, adjusted_labels, output_image_path, output_label_path)


def generate_mosaic_images(input_img_folder, input_label_folder, output_img_folder, output_label_folder, mosaic_count=500):
    # 获取文件夹中的所有图像文件和标签文件
    img_files = [f for f in os.listdir(input_img_folder) if f.endswith(('.jpg', '.png'))]
    label_files = [f for f in os.listdir(input_label_folder) if f.endswith('.txt')]

    # 随机选择 1000 张图像进行 mosaic 拼接
    selected_files = random.sample(img_files, mosaic_count * 4)

    # 将拼接图像与标签保存到新的文件夹
    for i in range(mosaic_count):
        img_paths = [os.path.join(input_img_folder, selected_files[i * 4 + j]) for j in range(4)]
        label_paths = [os.path.join(input_label_folder, selected_files[i * 4 + j].replace('.jpg', '.txt').replace('.png', '.txt')) for j in range(4)]
        
        # 读取图像和标签
        images = [cv2.imread(img_path) for img_path in img_paths]
        labels = [open(label_path, 'r').readlines() for label_path in label_paths]
        
        # 拼接图像和更新标签
        output_img_path = os.path.join(output_img_folder, f"mosaic_dmqr_fuza_{i + 1}.jpg")
        output_label_path = os.path.join(output_label_folder, f"mosaic_dmqr_fuza_{i + 1}.txt")
        create_mosaic(images, labels, output_img_path, output_label_path)

    # 将剩余的 1000 张图像和标签拷贝到新的文件夹
    remaining_files = [f for f in img_files if f not in selected_files]
    
    for img_file in remaining_files:
        # 拷贝图像
        shutil.copy(os.path.join(input_img_folder, img_file), output_img_folder)
        
        # 拷贝标签
        label_file = img_file.replace('.jpg', '.txt').replace('.png', '.txt')
        shutil.copy(os.path.join(input_label_folder, label_file), output_label_folder)

    print(f"已完成 {mosaic_count} 张 mosaic 图像生成,剩余图像和标签已拷贝。")

if __name__ == "__main__":
    input_img_folder = 'images/val'
    input_label_folder = 'labels/val'
    output_img_folder = 'split/images'
    output_label_folder = 'split/labels'
    os.makedirs(output_img_folder, exist_ok=True)
    os.makedirs(output_label_folder, exist_ok=True)

    generate_mosaic_images(input_img_folder, input_label_folder, output_img_folder, output_label_folder)

  • 重点就是检查坐标更新是否正确,所以实现后最好将标签绘制在图上确定是否符合我们的预期
  • 适用于四点坐标的标签,多点坐标需要对应修改代码
Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐