diff --git a/CHANGELOG.md b/CHANGELOG.md index f97f08d..3ac7408 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Add rule GCI98 Require Usecols Argument in Pandas Read Functions + ### Changed - compatibility updates for SonarQube 25.5.0 diff --git a/src/main/java/org/greencodeinitiative/creedengo/python/PythonRuleRepository.java b/src/main/java/org/greencodeinitiative/creedengo/python/PythonRuleRepository.java index c385979..500325d 100644 --- a/src/main/java/org/greencodeinitiative/creedengo/python/PythonRuleRepository.java +++ b/src/main/java/org/greencodeinitiative/creedengo/python/PythonRuleRepository.java @@ -40,7 +40,8 @@ public class PythonRuleRepository implements RulesDefinition, PythonCustomRuleRe AvoidFullSQLRequest.class, AvoidListComprehensionInIterations.class, DetectUnoptimizedImageFormat.class, - AvoidMultipleIfElseStatementCheck.class + AvoidMultipleIfElseStatementCheck.class, + RequireUsecolsArgument.class ); public static final String LANGUAGE = "py"; diff --git a/src/main/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgument.java b/src/main/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgument.java new file mode 100644 index 0000000..097de47 --- /dev/null +++ b/src/main/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgument.java @@ -0,0 +1,77 @@ +/* + * creedengo - Python language - Provides rules to reduce the environmental footprint of your Python programs + * Copyright © 2024 Green Code Initiative (https://green-code-initiative.org) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.greencodeinitiative.creedengo.python.checks; + +import java.util.Arrays; +import java.util.List; +import org.sonar.check.Rule; +import org.sonar.plugins.python.api.PythonSubscriptionCheck; +import org.sonar.plugins.python.api.SubscriptionContext; +import org.sonar.plugins.python.api.tree.Argument; +import org.sonar.plugins.python.api.tree.CallExpression; +import org.sonar.plugins.python.api.tree.Tree; +import org.sonar.plugins.python.api.tree.Expression; +import org.sonar.plugins.python.api.tree.QualifiedExpression; +import org.sonar.plugins.python.api.tree.RegularArgument; +import static org.sonar.plugins.python.api.tree.Tree.Kind.*; + +@Rule(key = "GCI98") +public class RequireUsecolsArgument extends PythonSubscriptionCheck { + + public static final String DESCRIPTION = "Specify 'usecols' or 'columns' when reading a DataFrame to load only necessary columns"; + private static final List READ_METHODS = Arrays.asList( + "read_csv", "read_parquet", "read_excel", "read_feather", "read_json" + ); + + @Override + public void initialize(Context context) { + context.registerSyntaxNodeConsumer(Tree.Kind.CALL_EXPR, this::visitCallExpression); + } + + public void visitCallExpression(SubscriptionContext ctx) { + CallExpression callExpression = (CallExpression) ctx.syntaxNode(); + Expression callee = callExpression.callee(); + + if (callee.is(Tree.Kind.QUALIFIED_EXPR)) { + QualifiedExpression qualifiedExpression = (QualifiedExpression) callee; + String methodName = qualifiedExpression.name().name(); + + if (READ_METHODS.contains(methodName)) { + + if (!hasColumnsSpecified(callExpression)) { + ctx.addIssue(callExpression.firstToken(), DESCRIPTION); + } + } + } + } + + private boolean hasColumnsSpecified(CallExpression callExpression) { + List arguments = callExpression.arguments(); + + for (Argument arg : arguments) { + if (arg.is(REGULAR_ARGUMENT)) { + RegularArgument regularArg = (RegularArgument) arg; + String paramName = regularArg.keywordArgument() != null ? regularArg.keywordArgument().name() : null; + if (paramName != null && (paramName.equals("usecols") || paramName.equals("columns"))) { + return true; + } + } + } + return false; + } +} \ No newline at end of file diff --git a/src/test/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgumentTest.java b/src/test/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgumentTest.java new file mode 100644 index 0000000..beee891 --- /dev/null +++ b/src/test/java/org/greencodeinitiative/creedengo/python/checks/RequireUsecolsArgumentTest.java @@ -0,0 +1,29 @@ +/* + * creedengo - Python language - Provides rules to reduce the environmental footprint of your Python programs + * Copyright © 2024 Green Code Initiative (https://green-code-initiative.org) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.greencodeinitiative.creedengo.python.checks; + +import org.junit.Test; +import org.sonar.python.checks.utils.PythonCheckVerifier; + +public class RequireUsecolsArgumentTest { + + @Test + public void test() { + PythonCheckVerifier.verify("src/test/resources/checks/requireUsecolsArgument.py", new RequireUsecolsArgument()); + } +} diff --git a/src/test/resources/checks/requireUsecolsArgument.py b/src/test/resources/checks/requireUsecolsArgument.py new file mode 100644 index 0000000..9ab8667 --- /dev/null +++ b/src/test/resources/checks/requireUsecolsArgument.py @@ -0,0 +1,23 @@ +import pandas as pd + +df1 = pd.read_csv('data.csv') # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}} +df2 = pd.read_parquet('data.parquet') # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}} +df3 = pd.read_excel('data.xlsx') # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}} +df4 = pd.read_json('data.json') # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}} +df5 = pd.read_feather('data.feather') # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}} + +df7 = pd.read_csv('data.csv', usecols=['col1', 'col2']) +df8 = pd.read_parquet('data.parquet', columns=['col1', 'col2']) +df9 = pd.read_excel('data.xlsx', usecols=[0, 1, 2]) +df10 = pd.read_json('data.json', columns=['col1', 'col2']) +df11 = pd.read_feather('data.feather', columns=['col1', 'col2']) + +import pandas as pandas_alias +df14 = pandas_alias.read_csv('data.csv') # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}} +df15 = pandas_alias.read_csv('data.csv', usecols=['col1']) + +df16 = pd.read_csv('data.csv', sep=',', header=0) # Noncompliant {{Specify 'usecols' when reading a DataFrame to load only necessary columns}} +df17 = pd.read_csv('data.csv', sep=',', header=0, usecols=['col1', 'col2']) + +cols_to_use = ['col1', 'col2', 'col3'] +df18 = pd.read_parquet('data.parquet', columns=cols_to_use) \ No newline at end of file