YoloV8实现简化版数据增强mosaic
Yolo系列对训练十分有效的策略马赛克增强,对于模型泛化能力、最终性能都有很大提升,由于Yolo在mosaic时涉及到一些其他变换例如缩放、随机图像占比等,这里单纯实现将4张图合并成一张,同时更新对应的标签,适合想要将单张图加入多个目标的简单实现。
·
Yolo系列对训练十分有效的策略马赛克增强,对于模型泛化能力、最终性能都有很大提升,由于Yolo在mosaic时涉及到一些其他变换例如缩放、随机图像占比等,这里单纯实现将4张图合并成一张,同时更新对应的标签,适合想要将单张图加入多个目标的简单实现
直接完整代码
import os
import cv2
import numpy as np
import random
import shutil
'''
生成4合1代码
'''
def load_image_and_label(image_path, label_path):
# 加载图像
img = cv2.imread(image_path)
# 加载标签
with open(label_path, 'r') as file:
labels = file.readlines()
return img, labels
def save_image_and_label(img, labels, image_output_path, label_output_path):
# 保存合并后的图像
cv2.imwrite(image_output_path, img)
# 保存新的标签文件
with open(label_output_path, 'w') as file:
file.writelines(labels)
def adjust_bbox_coordinates(bboxes, width_offset, height_offset, original_width, original_height, new_width, new_height):
adjusted_bboxes = []
for bbox in bboxes:
class_id, *coords = map(float, bbox.split())
new_coords = []
for i in range(0, len(coords), 2):
# 原始坐标
x, y = coords[i], coords[i + 1]
# 将归一化坐标还原为像素坐标
pixel_x = x * original_width
pixel_y = y * original_height
# 更新坐标,考虑偏移
pixel_x += width_offset
pixel_y += height_offset
# 将像素坐标归一化到新的拼接图像尺寸
new_x = pixel_x / new_width
new_y = pixel_y / new_height
new_coords.extend([new_x, new_y])
adjusted_bboxes.append(f"{int(class_id)} " + ' '.join(map(str, new_coords)) + '\n')
return adjusted_bboxes
def create_mosaic(images, labels, output_image_path, output_label_path):
# 获取图像的高度和宽度
img_height, img_width = images[0].shape[:2]
# 合并图像(2x2的mosaic)
top = np.hstack((images[0], images[1]))
bottom = np.hstack((images[2], images[3]))
mosaic_image = np.vstack((top, bottom))
# 计算合并后图像的新尺寸
new_height, new_width = mosaic_image.shape[:2]
# 重新调整坐标
adjusted_labels = []
width_offset = 0
height_offset = 0
for i, label in enumerate(labels):
# 更新偏移量
if i == 1: # 第二张图像,右上角
width_offset += img_width
elif i == 2: # 第三张图像,左下角
width_offset = 0
height_offset += img_height
elif i == 3: # 第四张图像,右下角
width_offset += img_width
# height_offset += img_height
# 根据图像在拼接中的位置调整标签坐标
new_bboxes = adjust_bbox_coordinates(label, width_offset, height_offset, img_width, img_height, new_width, new_height)
adjusted_labels.extend(new_bboxes)
# 保存合并后的图像和标签
save_image_and_label(mosaic_image, adjusted_labels, output_image_path, output_label_path)
def generate_mosaic_images(input_img_folder, input_label_folder, output_img_folder, output_label_folder, mosaic_count=500):
# 获取文件夹中的所有图像文件和标签文件
img_files = [f for f in os.listdir(input_img_folder) if f.endswith(('.jpg', '.png'))]
label_files = [f for f in os.listdir(input_label_folder) if f.endswith('.txt')]
# 随机选择 1000 张图像进行 mosaic 拼接
selected_files = random.sample(img_files, mosaic_count * 4)
# 将拼接图像与标签保存到新的文件夹
for i in range(mosaic_count):
img_paths = [os.path.join(input_img_folder, selected_files[i * 4 + j]) for j in range(4)]
label_paths = [os.path.join(input_label_folder, selected_files[i * 4 + j].replace('.jpg', '.txt').replace('.png', '.txt')) for j in range(4)]
# 读取图像和标签
images = [cv2.imread(img_path) for img_path in img_paths]
labels = [open(label_path, 'r').readlines() for label_path in label_paths]
# 拼接图像和更新标签
output_img_path = os.path.join(output_img_folder, f"mosaic_dmqr_fuza_{i + 1}.jpg")
output_label_path = os.path.join(output_label_folder, f"mosaic_dmqr_fuza_{i + 1}.txt")
create_mosaic(images, labels, output_img_path, output_label_path)
# 将剩余的 1000 张图像和标签拷贝到新的文件夹
remaining_files = [f for f in img_files if f not in selected_files]
for img_file in remaining_files:
# 拷贝图像
shutil.copy(os.path.join(input_img_folder, img_file), output_img_folder)
# 拷贝标签
label_file = img_file.replace('.jpg', '.txt').replace('.png', '.txt')
shutil.copy(os.path.join(input_label_folder, label_file), output_label_folder)
print(f"已完成 {mosaic_count} 张 mosaic 图像生成,剩余图像和标签已拷贝。")
if __name__ == "__main__":
input_img_folder = 'images/val'
input_label_folder = 'labels/val'
output_img_folder = 'split/images'
output_label_folder = 'split/labels'
os.makedirs(output_img_folder, exist_ok=True)
os.makedirs(output_label_folder, exist_ok=True)
generate_mosaic_images(input_img_folder, input_label_folder, output_img_folder, output_label_folder)
- 重点就是检查坐标更新是否正确,所以实现后最好将标签绘制在图上确定是否符合我们的预期
- 适用于四点坐标的标签,多点坐标需要对应修改代码
更多推荐
已为社区贡献1条内容
所有评论(0)