nbv_reconstruction/modules/pointnet_encoder.py
2024-09-29 20:12:44 +08:00

108 lines
3.4 KiB
Python

from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.utils.data
from torch.autograd import Variable
import numpy as np
import torch.nn.functional as F
import PytorchBoot.stereotype as stereotype
@stereotype.module("pointnet_encoder")
class PointNetEncoder(nn.Module):
def __init__(self, config:dict):
super(PointNetEncoder, self).__init__()
self.out_dim = config["out_dim"]
self.in_dim = config["in_dim"]
self.feature_transform = config.get("feature_transform", False)
self.stn = STNkd(k=self.in_dim)
self.conv1 = torch.nn.Conv1d(self.in_dim , 64, 1)
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 512, 1)
self.conv4 = torch.nn.Conv1d(512, self.out_dim , 1)
if self.feature_transform:
self.f_stn = STNkd(k=64)
def forward(self, x):
trans = self.stn(x)
x = x.transpose(2, 1)
x = torch.bmm(x, trans)
x = x.transpose(2, 1)
x = F.relu(self.conv1(x))
if self.feature_transform:
trans_feat = self.f_stn(x)
x = x.transpose(2, 1)
x = torch.bmm(x, trans_feat)
x = x.transpose(2, 1)
point_feat = x
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = self.conv4(x)
x = torch.max(x, 2, keepdim=True)[0]
x = x.view(-1, self.out_dim)
return x, point_feat
def encode_points(self, pts, require_per_point_feat=False):
pts = pts.transpose(2, 1)
global_pts_feature, per_point_feature = self(pts)
if require_per_point_feat:
return global_pts_feature, per_point_feature.transpose(2, 1)
else:
return global_pts_feature
class STNkd(nn.Module):
def __init__(self, k=64):
super(STNkd, self).__init__()
self.conv1 = torch.nn.Conv1d(k, 64, 1)
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 1024, 1)
self.fc1 = nn.Linear(1024, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, k * k)
self.relu = nn.ReLU()
self.k = k
def forward(self, x):
batchsize = x.size()[0]
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = torch.max(x, 2, keepdim=True)[0]
x = x.view(-1, 1024)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
iden = (
Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32)))
.view(1, self.k * self.k)
.repeat(batchsize, 1)
)
if x.is_cuda:
iden = iden.to(x.get_device())
x = x + iden
x = x.view(-1, self.k, self.k)
return x
if __name__ == "__main__":
sim_data = Variable(torch.rand(32, 2500, 3))
config = {
"in_dim": 3,
"out_dim": 1024,
"feature_transform": False
}
pointnet = PointNetEncoder(config)
out = pointnet.encode_points(sim_data)
print("global feat", out.size())
out, per_point_out = pointnet.encode_points(sim_data, require_per_point_feat=True)
print("point feat", out.size())
print("per point feat", per_point_out.size())