green-code-initiative · cleophass · May 19, 2025 · May 19, 2025 · Jun 2, 2025 · Jun 2, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Add rule GCI98 Require Usecols Argument in Pandas Read Functions
+
 ### Changed
 
 - compatibility updates for SonarQube 25.5.0

diff --git a/src/main/java/org/greencodeinitiative/creedengo/python/PythonRuleRepository.java b/src/main/java/org/greencodeinitiative/creedengo/python/PythonRuleRepository.java
@@ -40,7 +40,8 @@ public class PythonRuleRepository implements RulesDefinition, PythonCustomRuleRe
             AvoidFullSQLRequest.class,
             AvoidListComprehensionInIterations.class,
             DetectUnoptimizedImageFormat.class,
-            AvoidMultipleIfElseStatementCheck.class
+            AvoidMultipleIfElseStatementCheck.class,
+            RequireUsecolsArgument.class
     );
 
     public static final String LANGUAGE = "py";

diff --git a/src/main/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgument.java b/src/main/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgument.java
@@ -0,0 +1,77 @@
+/*
+ * creedengo - Python language - Provides rules to reduce the environmental footprint of your Python programs
+ * Copyright © 2024 Green Code Initiative (https://green-code-initiative.org)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package org.greencodeinitiative.creedengo.python.checks;
+
+import java.util.Arrays;
+import java.util.List;
+import org.sonar.check.Rule;
+import org.sonar.plugins.python.api.PythonSubscriptionCheck;
+import org.sonar.plugins.python.api.SubscriptionContext;
+import org.sonar.plugins.python.api.tree.Argument;
+import org.sonar.plugins.python.api.tree.CallExpression;
+import org.sonar.plugins.python.api.tree.Tree;
+import org.sonar.plugins.python.api.tree.Expression;
+import org.sonar.plugins.python.api.tree.QualifiedExpression;
+import org.sonar.plugins.python.api.tree.RegularArgument;
+import static org.sonar.plugins.python.api.tree.Tree.Kind.*;
+
+@Rule(key = "GCI98")
+public class RequireUsecolsArgument extends PythonSubscriptionCheck {
+
+    public static final String DESCRIPTION = "Specify 'usecols' or 'columns' when reading a DataFrame to load only necessary columns";
+    private static final List<String> READ_METHODS = Arrays.asList(
+            "read_csv", "read_parquet", "read_excel", "read_feather", "read_json"
+    );
+
+    @Override
+    public void initialize(Context context) {
+        context.registerSyntaxNodeConsumer(Tree.Kind.CALL_EXPR, this::visitCallExpression);
+    }
+
+    public void visitCallExpression(SubscriptionContext ctx) {
+        CallExpression callExpression = (CallExpression) ctx.syntaxNode();
+        Expression callee = callExpression.callee();
+
+        if (callee.is(Tree.Kind.QUALIFIED_EXPR)) {
+            QualifiedExpression qualifiedExpression = (QualifiedExpression) callee;
+            String methodName = qualifiedExpression.name().name();
+
+            if (READ_METHODS.contains(methodName)) {
+
+                if (!hasColumnsSpecified(callExpression)) {
+                    ctx.addIssue(callExpression.firstToken(), DESCRIPTION);
+                }
+            }
+        }
+    }
+
+    private boolean hasColumnsSpecified(CallExpression callExpression) {
+        List<Argument> arguments = callExpression.arguments();
+
+        for (Argument arg : arguments) {
+            if (arg.is(REGULAR_ARGUMENT)) {
+                RegularArgument regularArg = (RegularArgument) arg;                
+                String paramName = regularArg.keywordArgument() != null ? regularArg.keywordArgument().name() : null;
+                if (paramName != null && (paramName.equals("usecols") || paramName.equals("columns"))) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+}
diff --git a/...test/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgumentTest.java b/...test/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgumentTest.java
@@ -0,0 +1,29 @@
+/*
+ * creedengo - Python language - Provides rules to reduce the environmental footprint of your Python programs
+ * Copyright © 2024 Green Code Initiative (https://green-code-initiative.org)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package org.greencodeinitiative.creedengo.python.checks;
+
+import org.junit.Test;
+import org.sonar.python.checks.utils.PythonCheckVerifier;
+
+public class RequireUsecolsArgumentTest {
+
+    @Test
+    public void test() {
+        PythonCheckVerifier.verify("src/test/resources/checks/requireUsecolsArgument.py", new RequireUsecolsArgument());
+    }
+}
diff --git a/src/test/resources/checks/requireUsecolsArgument.py b/src/test/resources/checks/requireUsecolsArgument.py
@@ -0,0 +1,23 @@
+import pandas as pd
+
+df1 = pd.read_csv('data.csv')  # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}}
+df2 = pd.read_parquet('data.parquet')  # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}}
+df3 = pd.read_excel('data.xlsx')  # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}}
+df4 = pd.read_json('data.json')  # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}}
+df5 = pd.read_feather('data.feather')  # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}}
+
+df7 = pd.read_csv('data.csv', usecols=['col1', 'col2'])
+df8 = pd.read_parquet('data.parquet', columns=['col1', 'col2'])
+df9 = pd.read_excel('data.xlsx', usecols=[0, 1, 2])
+df10 = pd.read_json('data.json', columns=['col1', 'col2'])
+df11 = pd.read_feather('data.feather', columns=['col1', 'col2'])
+
+import pandas as pandas_alias
+df14 = pandas_alias.read_csv('data.csv')  # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}}
+df15 = pandas_alias.read_csv('data.csv', usecols=['col1'])
+
+df16 = pd.read_csv('data.csv', sep=',', header=0)  # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}}
+df17 = pd.read_csv('data.csv', sep=',', header=0, usecols=['col1', 'col2'])
+
+cols_to_use = ['col1', 'col2', 'col3']
+df18 = pd.read_parquet('data.parquet', columns=cols_to_use)