3D点云分类数据集:ModelNet40转modelnet40_normal_resampled

在三维计算机视觉领域,点云分类的算法研究需要高质量、多样化的数据集作为基础支撑。ModelNet40作为该领域最经典的基准数据集之一,自2015年发布以来已成为PointNet、PointCNN等顶级论文的必选数据集。本文将深入解析ModelNet40的三个主要数据版本,为研究者提供选型建议,帮助读者避开数据预处理的常见陷阱。


一、 ModelNet40 数据集全景

1.1 数据集起源
ModelNet40 源自普林斯顿大学的 ModelNet 项目(官网),其原始设计目标是为三维视觉算法提供:

40 类常见物体(如飞机、汽车、椅子等)
10 类对齐版本(ModelNet10)
多格式存储的三维 CAD 模型
该数据集首次在 CVPR 2015 论文《 3D ShapeNets 》中登场,开创了深度学习处理三维点云的新范式。

1.2 核心价值
跨领域兼容性:支持计算机视觉、机器人学、图形学等多领域研究
标注规范:提供统一的物体类别标注和空间坐标系统
扩展性:包含原始 CAD 模型,便于生成不同点云密度的实验数据

二、ModelNet40 版本深度解析

数据集modelnet40_normal_resampled.zipmodelnet40_ply_hdf5_2048.zipModelNet40.zip
文件大小1.71G435M2.04G
内容point: x, y, z, normal_x, normal_y, normal_z;
shape: 10k points
point: x, y, z; normal_x, normal_y, normal_z;
shape: 2048 points
off格式, 具体参考这里
训练集 / 测试集9843 / 24689840 / 24689844 / 2468
下载地址modelnet40_normal_resampled.zipmodelnet40_ply_hdf5_2048.zipModelNet40.zip

* 参考文献:https://github.com/zhulf0804/3D-PointCloud/blob/master/Datasets.md

三、ModelNet40 转 modelnet40_normal_resampled 数据集

原创脚本,经过pointnet++模型验证后与原始modelnet40_normal_resampled数据集精度一致:

import os
import glob
import numpy as np
import trimesh
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed


def normalize_point_cloud(pc):
    centroid = np.mean(pc, axis=0)
    pc -= centroid
    furthest_distance = np.max(np.linalg.norm(pc, axis=1))
    pc /= furthest_distance
    return pc


def sample_point_cloud_with_normals(mesh, num_points=10000):
    points, face_indices = trimesh.sample.sample_surface(mesh, num_points)
    normals = mesh.face_normals[face_indices]
    points = normalize_point_cloud(points)
    return np.hstack((points, normals))


def process_off_file_single_task(args):
    input_path, output_path, num_points = args
    try:
        mesh = trimesh.load_mesh(input_path)
        if not isinstance(mesh, trimesh.Trimesh):
            raise ValueError("不是合法的 Trimesh 对象")
        sampled_data = sample_point_cloud_with_normals(mesh, num_points)
        np.savetxt(output_path, sampled_data, fmt="%.6f", delimiter=',')
        return True, input_path
    except Exception as e:
        return False, f"{input_path}: {e}"


def convert_modelnet10_dataset(
        root_path="/workspace/pointcloud_classification/ModelNet40/ModelNet40/",
        out_path="/workspace/pointcloud_classification/ModelNet40/modelnet40_normal_resampled_me/",
        num_points=10000,
        num_workers=96):

    os.makedirs(out_path, exist_ok=True)
    all_classes = sorted([d for d in os.listdir(root_path) if os.path.isdir(os.path.join(root_path, d))])

    shape_names = []
    train_files = []
    test_files = []
    all_files = []

    task_list = []

    for cls in all_classes:
        cls_input_path = os.path.join(root_path, cls)
        cls_output_path = os.path.join(out_path, cls)
        os.makedirs(cls_output_path, exist_ok=True)
        shape_names.append(cls)

        for split in ['train', 'test']:
            split_path = os.path.join(cls_input_path, split)
            off_files = glob.glob(os.path.join(split_path, '*.off'))

            for off_file in off_files:
                base_name = os.path.splitext(os.path.basename(off_file))[0]
                output_txt = os.path.join(cls_output_path, base_name + ".txt")
                rel_path = os.path.join(cls, base_name + ".txt")
                task_list.append((off_file, output_txt, num_points))
                if split == 'train':
                    train_files.append(rel_path)
                else:
                    test_files.append(rel_path)
                all_files.append(rel_path)

    # 多进程处理所有 OFF 文件
    print(f"🚀 启动多进程({num_workers} workers)处理 {len(task_list)} 个 OFF 文件")
    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        futures = [executor.submit(process_off_file_single_task, task) for task in task_list]
        for f in tqdm(as_completed(futures), total=len(futures), desc="Processing OFF files"):
            success, msg = f.result()
            if not success:
                print(f"❌ 失败: {msg}")

    # 写入 shape_names
    with open(os.path.join(out_path, "modelnet10_shape_names.txt"), 'w') as f:
        for name in shape_names:
            f.write(name + "\n")

    # 写入 train/test/filelist
    def save_list(filelist, filename, strip_path=False, strip_ext=False):
        with open(os.path.join(out_path, filename), 'w') as f:
            for item in sorted(filelist):
                name = item
                if strip_path:
                    name = os.path.basename(name)
                if strip_ext:
                    name = os.path.splitext(name)[0]
                f.write(name + "\n")

    save_list(all_files, "filelist.txt")
    save_list(train_files, "modelnet40_train.txt", strip_path=True, strip_ext=True)
    save_list(test_files, "modelnet40_test.txt", strip_path=True, strip_ext=True)

    print(f"\n✅ 全部转换完成,结果保存在:{out_path}")


if __name__ == "__main__":
    convert_modelnet10_dataset()
Python中对AI模型的加密 使用OpenCV替代Ultralytics部署YOLO模型方案
View Comments
There are currently no comments.