from __future__ import print_function import torch import torch.nn as nn import torch.nn.parallel import torch.utils.data from torch.autograd import Variable import numpy as np import torch.nn.functional as F import PytorchBoot.stereotype as stereotype @stereotype.module("pointnet_encoder") class PointNetEncoder(nn.Module): def __init__(self, config:dict): super(PointNetEncoder, self).__init__() self.out_dim = config["out_dim"] self.in_dim = config["in_dim"] self.feature_transform = config.get("feature_transform", False) self.stn = STNkd(k=self.in_dim) self.conv1 = torch.nn.Conv1d(self.in_dim , 64, 1) self.conv2 = torch.nn.Conv1d(64, 128, 1) self.conv3 = torch.nn.Conv1d(128, 512, 1) self.conv4 = torch.nn.Conv1d(512, self.out_dim , 1) if self.feature_transform: self.f_stn = STNkd(k=64) def forward(self, x): trans = self.stn(x) x = x.transpose(2, 1) x = torch.bmm(x, trans) x = x.transpose(2, 1) x = F.relu(self.conv1(x)) if self.feature_transform: trans_feat = self.f_stn(x) x = x.transpose(2, 1) x = torch.bmm(x, trans_feat) x = x.transpose(2, 1) point_feat = x x = F.relu(self.conv2(x)) x = F.relu(self.conv3(x)) x = self.conv4(x) x = torch.max(x, 2, keepdim=True)[0] x = x.view(-1, self.out_dim) return x, point_feat def encode_points(self, pts, require_per_point_feat=False): pts = pts.transpose(2, 1) global_pts_feature, per_point_feature = self(pts) if require_per_point_feat: return global_pts_feature, per_point_feature.transpose(2, 1) else: return global_pts_feature class STNkd(nn.Module): def __init__(self, k=64): super(STNkd, self).__init__() self.conv1 = torch.nn.Conv1d(k, 64, 1) self.conv2 = torch.nn.Conv1d(64, 128, 1) self.conv3 = torch.nn.Conv1d(128, 1024, 1) self.fc1 = nn.Linear(1024, 512) self.fc2 = nn.Linear(512, 256) self.fc3 = nn.Linear(256, k * k) self.relu = nn.ReLU() self.k = k def forward(self, x): batchsize = x.size()[0] x = F.relu(self.conv1(x)) x = F.relu(self.conv2(x)) x = F.relu(self.conv3(x)) x = torch.max(x, 2, keepdim=True)[0] x = x.view(-1, 1024) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) iden = ( Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))) .view(1, self.k * self.k) .repeat(batchsize, 1) ) if x.is_cuda: iden = iden.to(x.get_device()) x = x + iden x = x.view(-1, self.k, self.k) return x if __name__ == "__main__": sim_data = Variable(torch.rand(32, 2500, 3)) config = { "in_dim": 3, "out_dim": 1024, "feature_transform": False } pointnet = PointNetEncoder(config) out = pointnet.encode_points(sim_data) print("global feat", out.size()) out, per_point_out = pointnet.encode_points(sim_data, require_per_point_feat=True) print("point feat", out.size()) print("per point feat", per_point_out.size())