From 3f9084acb79a815d0b76b7da42d2a4687971d229 Mon Sep 17 00:00:00 2001 From: Jiawei-Li20 Date: Fri, 25 Dec 2020 12:07:20 +0800 Subject: [PATCH 1/8] add citeseer --- cogdl/options.py | 1 + cogdl/tasks/node_classification.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/cogdl/options.py b/cogdl/options.py index 48507f37..d8cd76a3 100644 --- a/cogdl/options.py +++ b/cogdl/options.py @@ -24,6 +24,7 @@ def get_parser(): help='which GPU to use') parser.add_argument('--save-dir', default='.', type=str) parser.add_argument('--enhance', type=str, default=None, help='use prone or prone++ to enhance embedding') + parser.add_argument('--dropedge', default=0.0, help='the drop edge probability') # fmt: on return parser diff --git a/cogdl/tasks/node_classification.py b/cogdl/tasks/node_classification.py index 6bc215cc..30e0e516 100644 --- a/cogdl/tasks/node_classification.py +++ b/cogdl/tasks/node_classification.py @@ -97,6 +97,8 @@ def __init__( self.dataset = dataset self.data = dataset[0] + print(self.data.edge_index) + print(self.data.edge_index.shape) args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_nodes = dataset.data.x.shape[0] From 445260b091a43de794fe8ec84980fa3956af1cca Mon Sep 17 00:00:00 2001 From: Jiawei-Li20 Date: Fri, 25 Dec 2020 13:31:38 +0800 Subject: [PATCH 2/8] add dropedge --- cogdl/tasks/node_classification.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/cogdl/tasks/node_classification.py b/cogdl/tasks/node_classification.py index 30e0e516..03dd08d1 100644 --- a/cogdl/tasks/node_classification.py +++ b/cogdl/tasks/node_classification.py @@ -97,8 +97,13 @@ def __init__( self.dataset = dataset self.data = dataset[0] - print(self.data.edge_index) - print(self.data.edge_index.shape) + + # add dropedge args + self.dropedge = float(args.dropedge) + # store the original edge index + self.original_edge_idx = torch.tensor(self.data.edge_index) + self.original_edge_num = self.original_edge_idx.shape[1] + args.num_features = dataset.num_features args.num_classes = dataset.num_classes args.num_nodes = dataset.data.x.shape[0] @@ -170,6 +175,13 @@ def train(self): def _train_step(self): self.model.train() self.optimizer.zero_grad() + + # drop the edge + remaining_edge_num = int((1 - self.dropedge) * self.original_edge_num) + perm = np.random.permutation(self.original_edge_num) + remaining_edge = perm[:remaining_edge_num] + self.data.edge_index = self.original_edge_idx[:, remaining_edge] + self.model.node_classification_loss(self.data).backward() self.optimizer.step() From 3502540345c5296e17b695e8132ab7e3aa626920 Mon Sep 17 00:00:00 2001 From: Jiawei-Li20 Date: Sat, 26 Dec 2020 21:05:07 +0800 Subject: [PATCH 3/8] add BingGeNormalization --- cogdl/models/nn/gcn.py | 97 ++++++++++++++++++++++++++++++++++++++++++ match.yml | 1 + 2 files changed, 98 insertions(+) diff --git a/cogdl/models/nn/gcn.py b/cogdl/models/nn/gcn.py index 71b3e9e4..354c9b1d 100644 --- a/cogdl/models/nn/gcn.py +++ b/cogdl/models/nn/gcn.py @@ -49,6 +49,50 @@ def forward(self, input, edge_index, edge_attr=None): def __repr__(self): return self.__class__.__name__ + " (" + str(self.in_features) + " -> " + str(self.out_features) + ")" +class GraphConvolutionBGNorm(nn.Module): + """ + Simple GCN layer with BingGe Normalization, similar to https://arxiv.org/abs/1609.02907 + """ + + def __init__(self, in_features, out_features, bias=True): + super(GraphConvolutionBGNorm, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.weight = Parameter(torch.FloatTensor(in_features, out_features)) + if bias: + self.bias = Parameter(torch.FloatTensor(out_features)) + else: + self.register_parameter("bias", None) + self.reset_parameters() + + def reset_parameters(self): + stdv = 1.0 / math.sqrt(self.weight.size(1)) + self.weight.data.uniform_(-stdv, stdv) + if self.bias is not None: + self.bias.data.zero_() + + def forward(self, input, edge_index, edge_attr=None): + if edge_attr is None: + edge_attr = torch.ones(edge_index.shape[1]).float().to(input.device) + normalization_attr = torch.ones(input.shape[0]).float().to(input.device) + normalization_idx = torch.arange(input.shape[0]).repeat(2,1).to(input.device) + edge_attr = torch.hstack((edge_attr, normalization_attr)).to(input.device) + edge_index = torch.hstack((edge_index, normalization_idx)) + adj = torch.sparse_coo_tensor( + edge_index, + edge_attr, + (input.shape[0], input.shape[0]), + ).to(input.device) + support = torch.mm(input, self.weight) + output = torch.spmm(adj, support) + if self.bias is not None: + return output + self.bias + else: + return output + + def __repr__(self): + return self.__class__.__name__ + " (" + str(self.in_features) + " -> " + str(self.out_features) + ")" + @register_model("gcn") class TKipfGCN(BaseModel): @@ -100,3 +144,56 @@ def forward(self, x, adj): def predict(self, data): return self.forward(data.x, data.edge_index) + +@register_model("gcnbg") +class TKipfGCNBGNorm(BaseModel): + r"""The GCN model from the `"Semi-Supervised Classification with Graph Convolutional Networks" + `_ paper + + We implement this model with BingGe normalization + + Args: + num_features (int) : Number of input features. + num_classes (int) : Number of classes. + hidden_size (int) : The dimension of node representation. + dropout (float) : Dropout rate for model training. + """ + + @staticmethod + def add_args(parser): + """Add model-specific arguments to the parser.""" + # fmt: off + parser.add_argument("--num-features", type=int) + parser.add_argument("--num-classes", type=int) + parser.add_argument("--hidden-size", type=int, default=64) + parser.add_argument("--dropout", type=float, default=0.5) + # fmt: on + + @classmethod + def build_model_from_args(cls, args): + return cls(args.num_features, args.hidden_size, args.num_classes, args.dropout) + + def __init__(self, in_feats, hidden_size, out_feats, dropout): + super(TKipfGCNBGNorm, self).__init__() + + self.gc1 = GraphConvolutionBGNorm(in_feats, hidden_size) + self.gc2 = GraphConvolutionBGNorm(hidden_size, out_feats) + self.dropout = dropout + # self.nonlinear = nn.SELU() + + def forward(self, x, adj): + device = x.device + adj_values = torch.ones(adj.shape[1]).to(device) + adj, adj_values = add_remaining_self_loops(adj, adj_values, 1, x.shape[0]) + deg = spmm(adj, adj_values, torch.ones(x.shape[0], 1).to(device)).squeeze() + deg_sqrt = deg.pow(-1 / 2) + adj_values = deg_sqrt[adj[1]] * adj_values * deg_sqrt[adj[0]] + + x = F.dropout(x, self.dropout, training=self.training) + x = F.relu(self.gc1(x, adj, adj_values)) + x = F.dropout(x, self.dropout, training=self.training) + x = self.gc2(x, adj, adj_values) + return x + + def predict(self, data): + return self.forward(data.x, data.edge_index) diff --git a/match.yml b/match.yml index aa7658bb..8bebec0d 100644 --- a/match.yml +++ b/match.yml @@ -26,6 +26,7 @@ node_classification: - sgc - dropedge_gcn - gunet + - gcnbg dataset: - cora - citeseer From 28ea40a75207a8e7dffec21375919b5bdbf851ca Mon Sep 17 00:00:00 2001 From: Jiawei-Li20 Date: Sun, 27 Dec 2020 13:56:11 +0800 Subject: [PATCH 4/8] add unit test --- cogdl/models/nn/gcn.py | 3 ++- tests/tasks/test_node_classification.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cogdl/models/nn/gcn.py b/cogdl/models/nn/gcn.py index 354c9b1d..1b9220bd 100644 --- a/cogdl/models/nn/gcn.py +++ b/cogdl/models/nn/gcn.py @@ -125,6 +125,7 @@ def __init__(self, in_feats, hidden_size, out_feats, dropout): self.gc1 = GraphConvolution(in_feats, hidden_size) self.gc2 = GraphConvolution(hidden_size, out_feats) + self.bn = nn.BatchNorm1d(hidden_size) self.dropout = dropout # self.nonlinear = nn.SELU() @@ -137,7 +138,7 @@ def forward(self, x, adj): adj_values = deg_sqrt[adj[1]] * adj_values * deg_sqrt[adj[0]] x = F.dropout(x, self.dropout, training=self.training) - x = F.relu(self.gc1(x, adj, adj_values)) + x = F.relu(self.bn(self.gc1(x, adj, adj_values))) x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, adj, adj_values) return x diff --git a/tests/tasks/test_node_classification.py b/tests/tasks/test_node_classification.py index c6639cf3..5bfb0df2 100644 --- a/tests/tasks/test_node_classification.py +++ b/tests/tasks/test_node_classification.py @@ -646,6 +646,15 @@ def test_dropedge_inceptiongcn_cora(): ret = task.train() assert 0 <= ret["Acc"] <= 1 +def test_dropedge_gcnbg_citeseer(): + args = get_default_args() + args.task = "node_classification" + args.dataset = "citeseer" + args.model = "gcnbg" + task = build_task(args) + ret = task.train() + assert 0 <= ret["Acc"] <= 1 + if __name__ == "__main__": test_gdc_gcn_cora() @@ -685,3 +694,4 @@ def test_dropedge_inceptiongcn_cora(): test_dropedge_inceptiongcn_cora() test_dropedge_densegcn_cora() test_unet_cora() + test_dropedge_gcnbg_citeseer() \ No newline at end of file From dc2eeb13c4455542ee6da920d16df22abc9122eb Mon Sep 17 00:00:00 2001 From: Jiawei-Li20 Date: Sun, 27 Dec 2020 14:20:12 +0800 Subject: [PATCH 5/8] fix typo --- cogdl/models/nn/gcn.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cogdl/models/nn/gcn.py b/cogdl/models/nn/gcn.py index 1b9220bd..9d28308a 100644 --- a/cogdl/models/nn/gcn.py +++ b/cogdl/models/nn/gcn.py @@ -125,7 +125,6 @@ def __init__(self, in_feats, hidden_size, out_feats, dropout): self.gc1 = GraphConvolution(in_feats, hidden_size) self.gc2 = GraphConvolution(hidden_size, out_feats) - self.bn = nn.BatchNorm1d(hidden_size) self.dropout = dropout # self.nonlinear = nn.SELU() @@ -138,7 +137,7 @@ def forward(self, x, adj): adj_values = deg_sqrt[adj[1]] * adj_values * deg_sqrt[adj[0]] x = F.dropout(x, self.dropout, training=self.training) - x = F.relu(self.bn(self.gc1(x, adj, adj_values))) + x = F.relu(self.gc1(x, adj, adj_values)) x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, adj, adj_values) return x @@ -179,6 +178,7 @@ def __init__(self, in_feats, hidden_size, out_feats, dropout): self.gc1 = GraphConvolutionBGNorm(in_feats, hidden_size) self.gc2 = GraphConvolutionBGNorm(hidden_size, out_feats) + self.bn = nn.BatchNorm1d(hidden_size) self.dropout = dropout # self.nonlinear = nn.SELU() @@ -191,7 +191,7 @@ def forward(self, x, adj): adj_values = deg_sqrt[adj[1]] * adj_values * deg_sqrt[adj[0]] x = F.dropout(x, self.dropout, training=self.training) - x = F.relu(self.gc1(x, adj, adj_values)) + x = F.relu(self.bn(self.gc1(x, adj, adj_values))) x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, adj, adj_values) return x From 4052b0b670166f86b37bb72becf0753b3bac95ac Mon Sep 17 00:00:00 2001 From: Jiawei-Li20 Date: Sun, 27 Dec 2020 14:38:43 +0800 Subject: [PATCH 6/8] fix unit test error --- cogdl/tasks/node_classification.py | 5 ++++- tests/tasks/test_node_classification.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cogdl/tasks/node_classification.py b/cogdl/tasks/node_classification.py index 03dd08d1..07c88fab 100644 --- a/cogdl/tasks/node_classification.py +++ b/cogdl/tasks/node_classification.py @@ -99,7 +99,10 @@ def __init__( self.data = dataset[0] # add dropedge args - self.dropedge = float(args.dropedge) + try: + self.dropedge = float(args.dropedge) + except: + self.dropedge = 0.0 # store the original edge index self.original_edge_idx = torch.tensor(self.data.edge_index) self.original_edge_num = self.original_edge_idx.shape[1] diff --git a/tests/tasks/test_node_classification.py b/tests/tasks/test_node_classification.py index 4053a6dc..8c802e1c 100644 --- a/tests/tasks/test_node_classification.py +++ b/tests/tasks/test_node_classification.py @@ -651,6 +651,7 @@ def test_dropedge_gcnbg_citeseer(): args.task = "node_classification" args.dataset = "citeseer" args.model = "gcnbg" + args.dropedge = '0.05' task = build_task(args) ret = task.train() assert 0 <= ret["Acc"] <= 1 From faf23af1ccf00c4d300168d84183552b505678b8 Mon Sep 17 00:00:00 2001 From: Jiawei-Li20 Date: Sun, 27 Dec 2020 14:52:07 +0800 Subject: [PATCH 7/8] fix unit test err --- cogdl/tasks/node_classification.py | 5 +---- tests/tasks/test_node_classification.py | 3 ++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cogdl/tasks/node_classification.py b/cogdl/tasks/node_classification.py index 07c88fab..03dd08d1 100644 --- a/cogdl/tasks/node_classification.py +++ b/cogdl/tasks/node_classification.py @@ -99,10 +99,7 @@ def __init__( self.data = dataset[0] # add dropedge args - try: - self.dropedge = float(args.dropedge) - except: - self.dropedge = 0.0 + self.dropedge = float(args.dropedge) # store the original edge index self.original_edge_idx = torch.tensor(self.data.edge_index) self.original_edge_num = self.original_edge_idx.shape[1] diff --git a/tests/tasks/test_node_classification.py b/tests/tasks/test_node_classification.py index 8c802e1c..a615dcca 100644 --- a/tests/tasks/test_node_classification.py +++ b/tests/tasks/test_node_classification.py @@ -22,6 +22,7 @@ def get_default_args(): "missing_rate": -1, "task": "node_classification", "dataset": "cora", + "dropedge": 0.0, } return build_args_from_dict(default_dict) @@ -651,7 +652,7 @@ def test_dropedge_gcnbg_citeseer(): args.task = "node_classification" args.dataset = "citeseer" args.model = "gcnbg" - args.dropedge = '0.05' + args.dropedge = 0.05 task = build_task(args) ret = task.train() assert 0 <= ret["Acc"] <= 1 From a5540e018661d8a62f40282f2bfdde87e766c76f Mon Sep 17 00:00:00 2001 From: Jiawei-Li20 Date: Sun, 27 Dec 2020 15:33:08 +0800 Subject: [PATCH 8/8] fix unit test err: using function not existed in pytorch1.6.0 --- cogdl/models/nn/gcn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cogdl/models/nn/gcn.py b/cogdl/models/nn/gcn.py index 9d28308a..a926afdd 100644 --- a/cogdl/models/nn/gcn.py +++ b/cogdl/models/nn/gcn.py @@ -76,8 +76,8 @@ def forward(self, input, edge_index, edge_attr=None): edge_attr = torch.ones(edge_index.shape[1]).float().to(input.device) normalization_attr = torch.ones(input.shape[0]).float().to(input.device) normalization_idx = torch.arange(input.shape[0]).repeat(2,1).to(input.device) - edge_attr = torch.hstack((edge_attr, normalization_attr)).to(input.device) - edge_index = torch.hstack((edge_index, normalization_idx)) + edge_attr = torch.cat((edge_attr, normalization_attr)).to(input.device) + edge_index = torch.cat((edge_index, normalization_idx), dim=1) adj = torch.sparse_coo_tensor( edge_index, edge_attr,