Skip to content

Commit a2e392e

Browse files
authored
Add a flush-projects management command for bulk deletion #1289 (#1291)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent 371199f commit a2e392e

File tree

5 files changed

+173
-0
lines changed

5 files changed

+173
-0
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ v34.6.4 (unreleased)
77
- Add all "classify" plugin fields from scancode-toolkit on the CodebaseResource model.
88
https://github.com/nexB/scancode.io/issues/1275
99

10+
- Add a ``flush-projects`` management command, to Delete all project data and their
11+
related work directories created more than a specified number of days ago.
12+
https://github.com/nexB/scancode.io/issues/1289
13+
1014
v34.6.3 (2024-06-21)
1115
--------------------
1216

docs/command-line-interface.rst

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,27 @@ Optional arguments:
298298
- ``--no-input`` Does not prompt the user for input of any kind.
299299

300300

301+
.. _cli_flush_projects:
302+
303+
`$ scanpipe flush-projects`
304+
---------------------------
305+
306+
Delete all project data and their related work directories created more than a
307+
specified number of days ago.
308+
309+
Optional arguments:
310+
311+
- ``---retain-days RETAIN_DAYS`` Specify the number of days to retain data.
312+
All data older than this number of days will be deleted.
313+
**Defaults to 0 (delete all data)**.
314+
315+
For example, to delete all projects created more than one week ago::
316+
317+
scanpipe flush-projects --retain-days 7
318+
319+
- ``--no-input`` Does not prompt the user for input of any kind.
320+
321+
301322
.. _cli_create_user:
302323

303324
`$ scanpipe create-user <username>`

docs/faq.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,3 +249,32 @@ Note that only the HTTPS type of URL is supported::
249249
A GitHub repository URL example::
250250

251251
https://github.com/username/repository.git
252+
253+
How can I cleanup my ScanCode.io installation, removing all projects and related data?
254+
--------------------------------------------------------------------------------------
255+
256+
You can use the :ref:`cli_flush_projects` command to perform bulk deletion of projects
257+
and their associated data stored on disk::
258+
259+
$ scanpipe flush-projects
260+
261+
**Confirmation will be required before deletion.**
262+
263+
To automate this process, such as running it from a cron job, you can use the
264+
``--no-input`` option to skip confirmation prompts.
265+
266+
Additionally, you can retain specific projects and their data based on their
267+
creation date using the ``--retain-days`` option.
268+
269+
Here's an example of a crontab entry that runs daily and flushes all projects and
270+
data older than 7 days::
271+
272+
@daily scanpipe flush-projects --retain-days 7 --no-input
273+
274+
.. note:: If you are use Docker for running ScanCode.io, you can run the scanpipe
275+
``flush-projects`` command using::
276+
277+
docker compose run --rm web scanpipe flush-projects
278+
279+
See :ref:`command_line_interface` chapter for more information about the scanpipe
280+
command.
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
#
3+
# http://nexb.com and https://github.com/nexB/scancode.io
4+
# The ScanCode.io software is licensed under the Apache License version 2.0.
5+
# Data generated with ScanCode.io is provided as-is without warranties.
6+
# ScanCode is a trademark of nexB Inc.
7+
#
8+
# You may not use this software except in compliance with the License.
9+
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
10+
# Unless required by applicable law or agreed to in writing, software distributed
11+
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12+
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
13+
# specific language governing permissions and limitations under the License.
14+
#
15+
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
16+
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
17+
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
18+
# for any legal advice.
19+
#
20+
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
21+
# Visit https://github.com/nexB/scancode.io for support and download.
22+
23+
import datetime
24+
import sys
25+
26+
from django.core.management.base import BaseCommand
27+
from django.template.defaultfilters import pluralize
28+
from django.utils import timezone
29+
30+
from scanpipe.models import Project
31+
32+
33+
class Command(BaseCommand):
34+
help = (
35+
"Delete all project data and their related work directories created more than "
36+
"a specified number of days ago."
37+
)
38+
39+
def add_arguments(self, parser):
40+
super().add_arguments(parser)
41+
parser.add_argument(
42+
"--retain-days",
43+
type=int,
44+
help=(
45+
"Optional. Specify the number of days to retain data. "
46+
"All data older than this number of days will be deleted. "
47+
"Defaults to 0 (delete all data)."
48+
),
49+
default=0,
50+
)
51+
parser.add_argument(
52+
"--no-input",
53+
action="store_false",
54+
dest="interactive",
55+
help="Do not prompt the user for input of any kind.",
56+
)
57+
58+
def handle(self, *inputs, **options):
59+
verbosity = options["verbosity"]
60+
retain_days = options["retain_days"]
61+
projects = Project.objects.all()
62+
63+
if retain_days:
64+
cutoff_date = timezone.now() - datetime.timedelta(days=retain_days)
65+
projects = projects.filter(created_date__lt=cutoff_date)
66+
67+
projects_count = projects.count()
68+
if projects_count == 0:
69+
if verbosity > 0:
70+
self.stdout.write("No projects to remove.")
71+
sys.exit(0)
72+
73+
if options["interactive"]:
74+
confirm = input(
75+
f"You have requested the deletion of {projects_count} "
76+
f"project{pluralize(projects_count)}.\n"
77+
"This will IRREVERSIBLY DESTROY all data related to those projects.\n"
78+
"Are you sure you want to do this?\n"
79+
"Type 'yes' to continue, or 'no' to cancel: "
80+
)
81+
if confirm != "yes":
82+
if verbosity > 0:
83+
self.stdout.write("Flush cancelled.")
84+
sys.exit(0)
85+
86+
deletion_count = 0
87+
for project in projects:
88+
project.delete()
89+
deletion_count += 1
90+
91+
if verbosity > 0:
92+
msg = (
93+
f"{deletion_count} project{pluralize(deletion_count)} and "
94+
f"{pluralize(deletion_count, 'its,their')} related data have been "
95+
f"removed."
96+
)
97+
self.stdout.write(msg, self.style.SUCCESS)

scanpipe/tests/test_commands.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,28 @@ def test_scanpipe_management_command_reset_project(self):
611611
self.assertEqual(1, len(Project.get_root_content(project.input_path)))
612612
self.assertEqual(0, len(Project.get_root_content(project.codebase_path)))
613613

614+
def test_scanpipe_management_command_flush_projects(self):
615+
project1 = Project.objects.create(name="project1")
616+
project2 = Project.objects.create(name="project2")
617+
ten_days_ago = timezone.now() - datetime.timedelta(days=10)
618+
project2.update(created_date=ten_days_ago)
619+
620+
out = StringIO()
621+
options = ["--retain-days", 7, "--no-color", "--no-input"]
622+
call_command("flush-projects", *options, stdout=out)
623+
out_value = out.getvalue().strip()
624+
expected = "1 project and its related data have been removed."
625+
self.assertEqual(expected, out_value)
626+
self.assertEqual(project1, Project.objects.get())
627+
628+
Project.objects.create(name="project2")
629+
out = StringIO()
630+
options = ["--no-color", "--no-input"]
631+
call_command("flush-projects", *options, stdout=out)
632+
out_value = out.getvalue().strip()
633+
expected = "2 projects and their related data have been removed."
634+
self.assertEqual(expected, out_value)
635+
614636
def test_scanpipe_management_command_create_user(self):
615637
out = StringIO()
616638

0 commit comments

Comments
 (0)