Skip to content

_cutlass_implicit_gemm_forward_logic with ConvBlock #10

@AIBluefisher

Description

@AIBluefisher

Hi, @chrischoy

I'm testing the MinkUNet model and encountered an issue:

CuPy allocator set to PyTorch memory pool.
[INFO] benchmark_cache.py:203 - Loaded benchmark cache v2.0: 3 total configurations
[INFO] sparse_conv.py:307 - Loaded 3 forward and 0 backward benchmark configurations from cache
coordinates shape: torch.Size([131072, 3])
features shape: torch.Size([131072, 3])
offsets shape: torch.Size([3])
points shape: torch.Size([2, 4, 128, 128, 3])
images shape: torch.Size([2, 4, 128, 128, 3])

Traceback (most recent call last):
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/chenyu/Projects/DOGE/conerf/model/backbone/warp_conv_minkunet.py", line 390, in <module>
    outputs = model(sparse_tensor)
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/chenyu/Projects/DOGE/conerf/model/backbone/warp_conv_minkunet.py", line 168, in forward
    out = self.conv1(out_p1)
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/warpconvnet-0.3.5-py3.10-linux-x86_64.egg/warpconvnet/nn/modules/sequential.py", line 56, in forward
    x, in_sf = run_forward(module, x, in_sf)
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/warpconvnet-0.3.5-py3.10-linux-x86_64.egg/warpconvnet/nn/modules/sequential.py", line 15, in run_forward
    return module(x), in_sf
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/warpconvnet-0.3.5-py3.10-linux-x86_64.egg/warpconvnet/nn/modules/sparse_conv.py", line 200, in forward
    return spatially_sparse_conv(
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/warpconvnet-0.3.5-py3.10-linux-x86_64.egg/warpconvnet/nn/functional/sparse_conv.py", line 1981, in spatially_sparse_conv
    out_feature_tensor = UnifiedSpatiallySparseConvFunction.apply(
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/torch/autograd/function.py", line 575, in apply
    return super().apply(*args, **kwargs)  # type: ignore[misc]
  File "/home/chenyu/anaconda3/envs/dogs/lib/python3.10/site-packages/warpconvnet-0.3.5-py3.10-linux-x86_64.egg/warpconvnet/nn/functional/sparse_conv.py", line 1663, in forward
    raise RuntimeError(
RuntimeError: Error in _cutlass_implicit_gemm_forward_logic: Kernel execution failed

My code for testing:

import torch

from warpconvnet.geometry.types.points import Points
from warpconvnet.geometry.types.voxels import Voxels
from warpconvnet.nn.functional.point_pool import point_pool


def colored_pc_to_warp_conv_points(
    point_coordinates: torch.Tensor,
    point_features: torch.Tensor,
):
    """
    Process colored point cloud to voxelized sparse tensor using Point2Voxel

    Args:
        point_cloud: (B, F, H, W, 3) tensor - XYZ
        point_features: (B, F, H, W, 3) tensor - RGB

    Returns:
        points: warpconvnet points
    """
    device = point_coordinates.device
    B, F, H, W, _ = point_coordinates.shape

    # 1. Reshape and filter valid points
    point_cloud = point_coordinates.reshape(B, F*H*W, 3)
    point_feats = point_features.reshape(B, F*H*W, 3)
    valid_mask = (point_cloud[..., :3].abs().sum(-1) > 1e-6)  # XYZ not zero

    all_coordinates, all_features, offsets = [], [], [0]
    for b in range(B):
        batch_mask = valid_mask[b]
        if not batch_mask.any():
            continue

        batch_points = point_cloud[b][batch_mask]
        batch_features = point_feats[b][batch_mask]
        offset = offsets[-1] + batch_points.shape[0]

        all_coordinates.append(batch_points)
        all_features.append(batch_features)
        offsets.append(offset)

    # Combine all batches
    coordinates = torch.cat(all_coordinates).to(device)
    features = torch.cat(all_features).to(device)
    offsets = torch.tensor(offsets).to(device)
    print(f'coordinates shape: {coordinates.shape}')
    print(f'features shape: {features.shape}')
    print(f'offsets shape: {offsets.shape}')

    return Points(coordinates, features, offsets.squeeze(-1))


if __name__ == "__main__":
    seed = 100
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    model = MinkUNet18(in_channels=3, out_channels=64).to("cuda")

    # Inputs: dense point cloud + optional RGB
    points = torch.randn(2, 4, 128, 128, 3).to("cuda")  # [B, F, H, W, 3]
    images = torch.randn(2, 4, 128, 128, 3).to("cuda")  # [B, F, H, W, 3]

    point_cloud = colored_pc_to_warp_conv_points(points, images)

    print(f'points shape: {points.shape}')
    print(f'images shape: {images.shape}\n')

    sparse_tensor, to_unique = point_pool(
        point_cloud,
        reduction="mean",
        downsample_voxel_size=0.02,
        return_type="voxel",
        return_to_unique=True,
    )

    outputs = model(sparse_tensor)

My testing environments:

pytorch: '2.6.0+cu124'
nvcc: Build cuda_12.3.r12.3/compiler.33567101_0

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions