108 lines
3.4 KiB
Python
108 lines
3.4 KiB
Python
from __future__ import print_function
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.parallel
|
|
import torch.utils.data
|
|
from torch.autograd import Variable
|
|
import numpy as np
|
|
import torch.nn.functional as F
|
|
|
|
import PytorchBoot.stereotype as stereotype
|
|
@stereotype.module("pointnet_encoder")
|
|
class PointNetEncoder(nn.Module):
|
|
|
|
def __init__(self, config:dict):
|
|
super(PointNetEncoder, self).__init__()
|
|
|
|
self.out_dim = config["out_dim"]
|
|
self.in_dim = config["in_dim"]
|
|
self.feature_transform = config.get("feature_transform", False)
|
|
self.stn = STNkd(k=self.in_dim)
|
|
self.conv1 = torch.nn.Conv1d(self.in_dim , 64, 1)
|
|
self.conv2 = torch.nn.Conv1d(64, 128, 1)
|
|
self.conv3 = torch.nn.Conv1d(128, 512, 1)
|
|
self.conv4 = torch.nn.Conv1d(512, self.out_dim , 1)
|
|
if self.feature_transform:
|
|
self.f_stn = STNkd(k=64)
|
|
|
|
def forward(self, x):
|
|
trans = self.stn(x)
|
|
x = x.transpose(2, 1)
|
|
x = torch.bmm(x, trans)
|
|
x = x.transpose(2, 1)
|
|
x = F.relu(self.conv1(x))
|
|
|
|
if self.feature_transform:
|
|
trans_feat = self.f_stn(x)
|
|
x = x.transpose(2, 1)
|
|
x = torch.bmm(x, trans_feat)
|
|
x = x.transpose(2, 1)
|
|
|
|
point_feat = x
|
|
x = F.relu(self.conv2(x))
|
|
x = F.relu(self.conv3(x))
|
|
x = self.conv4(x)
|
|
x = torch.max(x, 2, keepdim=True)[0]
|
|
x = x.view(-1, self.out_dim)
|
|
return x, point_feat
|
|
|
|
def encode_points(self, pts, require_per_point_feat=False):
|
|
pts = pts.transpose(2, 1)
|
|
global_pts_feature, per_point_feature = self(pts)
|
|
if require_per_point_feat:
|
|
return global_pts_feature, per_point_feature.transpose(2, 1)
|
|
else:
|
|
return global_pts_feature
|
|
|
|
class STNkd(nn.Module):
|
|
def __init__(self, k=64):
|
|
super(STNkd, self).__init__()
|
|
self.conv1 = torch.nn.Conv1d(k, 64, 1)
|
|
self.conv2 = torch.nn.Conv1d(64, 128, 1)
|
|
self.conv3 = torch.nn.Conv1d(128, 1024, 1)
|
|
self.fc1 = nn.Linear(1024, 512)
|
|
self.fc2 = nn.Linear(512, 256)
|
|
self.fc3 = nn.Linear(256, k * k)
|
|
self.relu = nn.ReLU()
|
|
|
|
self.k = k
|
|
|
|
def forward(self, x):
|
|
batchsize = x.size()[0]
|
|
x = F.relu(self.conv1(x))
|
|
x = F.relu(self.conv2(x))
|
|
x = F.relu(self.conv3(x))
|
|
x = torch.max(x, 2, keepdim=True)[0]
|
|
x = x.view(-1, 1024)
|
|
|
|
x = F.relu(self.fc1(x))
|
|
x = F.relu(self.fc2(x))
|
|
x = self.fc3(x)
|
|
|
|
iden = (
|
|
Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32)))
|
|
.view(1, self.k * self.k)
|
|
.repeat(batchsize, 1)
|
|
)
|
|
if x.is_cuda:
|
|
iden = iden.to(x.get_device())
|
|
x = x + iden
|
|
x = x.view(-1, self.k, self.k)
|
|
return x
|
|
|
|
if __name__ == "__main__":
|
|
sim_data = Variable(torch.rand(32, 2500, 3))
|
|
config = {
|
|
"in_dim": 3,
|
|
"out_dim": 1024,
|
|
"feature_transform": False
|
|
}
|
|
pointnet = PointNetEncoder(config)
|
|
out = pointnet.encode_points(sim_data)
|
|
|
|
print("global feat", out.size())
|
|
|
|
out, per_point_out = pointnet.encode_points(sim_data, require_per_point_feat=True)
|
|
print("point feat", out.size())
|
|
print("per point feat", per_point_out.size())
|