add STANet

LinoSun · LinoSun · commit 7f501b8a52e7 · 2021-10-29T17:17:13.000+08:00
diff --git a/README.md b/README.md
@@ -49,6 +49,8 @@ Please refer to local_test.py temporarily.
 
 - [x] UPerNet [[paper](https://arxiv.org/abs/1807.10221)]
 
+- [x] STANet [[paper](https://www.mdpi.com/2072-4292/12/10/1662)]
+
 #### Encoders <a name="encoders"></a>
 
 The following is a list of supported encoders in the CDP. Select the appropriate family of encoders and click to expand the table and select a specific encoder and its pre-trained weights (`encoder_name` and `encoder_weights` parameters).
diff --git a/change_detection_pytorch/stanet/decoder.py b/change_detection_pytorch/stanet/decoder.py
@@ -50,8 +50,9 @@ def __init__(
             sa_mode='PAM'
     ):
         super(STANetDecoder, self).__init__()
+        self.out_channel = f_c
         self.backbone_decoder = BackboneDecoder(f_c, nn.BatchNorm2d, encoder_out_channels)
-        self.netA = CDSA(in_c=64, ds=1, mode=sa_mode)
+        self.netA = CDSA(in_c=f_c, ds=1, mode=sa_mode)
 
     def forward(self, *features):
         # fetch feature maps
@@ -149,7 +150,6 @@ def forward(self, input):
         x = self.relu(x)
         return x
 
-
 # if __name__ == '__main__':
 #     from change_detection_pytorch.encoders import get_encoder
 #
diff --git a/change_detection_pytorch/stanet/model.py b/change_detection_pytorch/stanet/model.py
@@ -3,17 +3,42 @@
 from torch.nn import functional as F
 from ..encoders import get_encoder
 from .decoder import STANetDecoder
+from ..base import SegmentationHead
 
 
 class STANet(torch.nn.Module):
+    """
+    Args:
+        encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
+            to extract features of different spatial resolution
+        encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
+            other pretrained weights (see table with available weights for each encoder_name)
+        in_channels: A number of input channels for the model, default is 3 (RGB images)
+        classes: A number of classes for output mask (or you can think as a number of channels of output mask)
+        activation: An activation function to apply after the final convolution layer.
+            Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**, **callable** and **None**.
+            Default is **None**
+        return_distance_map: If True, return distance map, which shape is (BatchSize, Height, Width), of feature maps from images of two periods. Default False.
+
+    Returns:
+        ``torch.nn.Module``: STANet
+
+    .. STANet:
+        https://www.mdpi.com/2072-4292/12/10/1662
+
+    """
     def __init__(
             self,
             encoder_name: str = "resnet",
             encoder_weights: Optional[str] = "imagenet",
             sa_mode: str = "PAM",
             in_channels: int = 3,
+            classes=2,
+            activation=None,
+            return_distance_map=False
     ):
         super(STANet, self).__init__()
+        self.return_distance_map = return_distance_map
         self.encoder = get_encoder(
             encoder_name,
             in_channels=in_channels,
@@ -24,11 +49,23 @@ def __init__(
             encoder_out_channels=self.encoder.out_channels,
             sa_mode=sa_mode
         )
+        self.segmentation_head = SegmentationHead(
+            in_channels=self.decoder.out_channel * 2,
+            out_channels=classes,
+            activation=activation,
+            kernel_size=3,
+        )
 
     def forward(self, x1, x2):
         # only support siam encoder
         features = self.encoder(x1), self.encoder(x2)
         features = self.decoder(*features)
-        dist = F.pairwise_distance(features[0], features[1],keepdim=True)
-        dist = F.interpolate(dist, x1.shape[2:], mode='bilinear', align_corners=True)
-        return dist
+        if self.return_distance_map:
+            dist = F.pairwise_distance(features[0], features[1], keepdim=True)
+            dist = F.interpolate(dist, x1.shape[2:], mode='bilinear', align_corners=True)
+            return dist
+        else:
+            decoder_output = torch.cat([features[0], features[1]], dim=1)
+            decoder_output = F.interpolate(decoder_output, x1.shape[2:], mode='bilinear', align_corners=True)
+            masks = self.segmentation_head(decoder_output)
+            return masks
diff --git a/lino_test.py b/lino_test.py
@@ -1,12 +1,20 @@
 import torch
-from change_detection_pytorch.stanet import STANet
+from torch.utils.data import DataLoader, Dataset
 
-if __name__ == '__main__':
+import change_detection_pytorch as cdp
+from change_detection_pytorch.datasets import LEVIR_CD_Dataset, SVCD_Dataset
+from change_detection_pytorch.utils.lr_scheduler import GradualWarmupScheduler
 
-    samples = torch.ones([1, 3, 256, 256])
-    model = STANet(
-        encoder_name='vgg16',
-        in_channels=3
-    )
-    dist = model(samples, samples)
-    print(dist.size())
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+
+model = cdp.STANet(
+    encoder_name="resnet34",  # choose encoder, e.g. mobilenet_v2 or efficientnet-b7
+    encoder_weights="imagenet",  # use `imagenet` pre-trained weights for encoder initialization
+    in_channels=3,  # model input channels (1 for gray-scale images, 3 for RGB, etc.)
+    return_distance_map=False
+).to(DEVICE)
+
+sampel1 = torch.ones([1, 3, 256, 256]).to(DEVICE)
+sampel2 = torch.ones([1, 3, 256, 256]).to(DEVICE)
+preds = model(sampel1, sampel2)
+print(preds.size())