Skip to content

Commit a5d65ad

Browse files
authored
Merge pull request #352 from DataRecce/feature/drc-519-enhancement-provide-a-recce-command-to-manage-the-state
[Draft] DRC-519 command to purge existing state file from cloud
2 parents 8ab38b0 + 291cdbb commit a5d65ad

File tree

4 files changed

+201
-53
lines changed

4 files changed

+201
-53
lines changed

recce/cli.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,9 @@ def server(host, port, state_file=None, **kwargs):
227227
@add_options(recce_options)
228228
@add_options(recce_cloud_options)
229229
def run(output, **kwargs):
230+
"""
231+
Run recce to generate the state file in CI/CD pipeline
232+
"""
230233
from rich.console import Console
231234
handle_debug_flag(**kwargs)
232235
console = Console()
@@ -267,6 +270,9 @@ def run(output, **kwargs):
267270
@add_options(recce_options)
268271
@add_options(recce_cloud_options)
269272
def summary(state_file, **kwargs):
273+
"""
274+
Generate a summary of the recce state file
275+
"""
270276
from rich.console import Console
271277
from .core import load_context
272278
handle_debug_flag(**kwargs)
@@ -301,6 +307,66 @@ def summary(state_file, **kwargs):
301307
print(output)
302308

303309

310+
@cli.command(cls=TrackCommand)
311+
@click.option('--cloud-token', help='The token used by Recce Cloud.', type=click.STRING,
312+
envvar='GITHUB_TOKEN')
313+
@click.option('--state-file-host', help='The host to fetch the state file from.', type=click.STRING,
314+
envvar='RECCE_STATE_FILE_HOST', default='cloud.datarecce.io', hidden=True)
315+
@click.option('--force', '-f', help='Bypasses the confirmation prompt. Purge the state file directly.', is_flag=True)
316+
@add_options(recce_options)
317+
def purge_cloud_state(**kwargs):
318+
"""
319+
Purge the state file from cloud
320+
"""
321+
from rich.console import Console
322+
handle_debug_flag(**kwargs)
323+
console = Console()
324+
cloud_options = {
325+
'host': kwargs.get('state_file_host'),
326+
'token': kwargs.get('cloud_token'),
327+
}
328+
force_to_purge = kwargs.get('force', False)
329+
try:
330+
console.rule('Check Recce State from Cloud')
331+
recce_state = RecceStateLoader(review_mode=False, cloud_mode=True,
332+
state_file=None, cloud_options=cloud_options)
333+
except Exception as e:
334+
console.print("[[red]Error[/red]] Failed to load recce state file.")
335+
console.print(f" Reason: {e}")
336+
return 1
337+
338+
if not recce_state.verify():
339+
error, hint = recce_state.error_and_hint
340+
console.print(f"[[red]Error[/red]] {error}")
341+
console.print(f"{hint}")
342+
return 1
343+
344+
info = recce_state.info()
345+
if info is None:
346+
console.print("[[yellow]Skip[/yellow]] No state file found in cloud.")
347+
return 0
348+
349+
pr_info = info.get('pull_request')
350+
console.print('[green]State File hosted by[/green]', info.get('source'))
351+
console.print('[green]GitHub Repository[/green]', info.get('pull_request').repository)
352+
console.print(f'[green]GitHub Pull Request[/green]\n{pr_info.title} #{pr_info.id}')
353+
console.print(f'Branch merged into [blue]{pr_info.base_branch}[/blue] from [blue]{pr_info.branch}[/blue]')
354+
console.print(pr_info.url)
355+
356+
try:
357+
if force_to_purge is True or click.confirm('\nDo you want to purge the state file?'):
358+
response = recce_state.purge()
359+
if response is True:
360+
console.rule('Purged Successfully')
361+
else:
362+
console.rule('Failed to Purge', style='red')
363+
console.print(f'Reason: {recce_state.error_message}')
364+
except click.exceptions.Abort:
365+
pass
366+
367+
return 0
368+
369+
304370
@cli.group('github', short_help='GitHub related commands', hidden=True)
305371
def github(**kwargs):
306372
pass

recce/pull_request.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,25 @@
11
import json
22
import os
3-
from typing import Optional
3+
from typing import Optional, Union
44

55
import requests
6+
from pydantic import BaseModel
67

78
from recce.git import hosting_repo
89
from recce.github import recce_pr_information
9-
from recce.state import PullRequestInfo
10+
from recce.util.pydantic_model import pydantic_model_dump
11+
12+
13+
class PullRequestInfo(BaseModel):
14+
id: Optional[Union[int, str]] = None
15+
title: Optional[str] = None
16+
url: Optional[str] = None
17+
branch: Optional[str] = None
18+
base_branch: Optional[str] = None
19+
repository: Optional[str] = None
20+
21+
def to_dict(self):
22+
return pydantic_model_dump(self)
1023

1124

1225
def fetch_pr_metadata(**kwargs):

recce/state.py

Lines changed: 98 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99
from typing import List, Optional, Dict, Union
1010

1111
import botocore.exceptions
12-
import pydantic.version
1312
from pydantic import BaseModel
1413
from pydantic import Field
1514

1615
from recce import get_version
1716
from recce.git import current_branch
1817
from recce.models.types import Run, Check
18+
from recce.pull_request import fetch_pr_metadata, PullRequestInfo
19+
from recce.util.pydantic_model import pydantic_model_json_dump, pydantic_model_dump
1920

2021
logger = logging.getLogger('uvicorn')
2122

@@ -40,26 +41,6 @@ def check_s3_bucket(bucket_name: str):
4041
return True, None
4142

4243

43-
def pydantic_model_json_dump(model: BaseModel):
44-
pydantic_version = pydantic.version.VERSION
45-
pydantic_major = pydantic_version.split(".")[0]
46-
47-
if pydantic_major == "1":
48-
return model.json(exclude_none=True)
49-
else:
50-
return model.model_dump_json(exclude_none=True)
51-
52-
53-
def pydantic_model_dump(model: BaseModel):
54-
pydantic_version = pydantic.version.VERSION
55-
pydantic_major = pydantic_version.split(".")[0]
56-
57-
if pydantic_major == "1":
58-
return model.dict()
59-
else:
60-
return model.model_dump()
61-
62-
6344
class GitRepoInfo(BaseModel):
6445
branch: Optional[str] = None
6546

@@ -75,18 +56,6 @@ def to_dict(self):
7556
return pydantic_model_dump(self)
7657

7758

78-
class PullRequestInfo(BaseModel):
79-
id: Optional[Union[int, str]] = None
80-
title: Optional[str] = None
81-
url: Optional[str] = None
82-
branch: Optional[str] = None
83-
base_branch: Optional[str] = None
84-
repository: Optional[str] = None
85-
86-
def to_dict(self):
87-
return pydantic_model_dump(self)
88-
89-
9059
class RecceStateMetadata(BaseModel):
9160
schema_version: str = 'v0'
9261
recce_version: str = Field(default_factory=lambda: get_version())
@@ -167,6 +136,13 @@ def __init__(self,
167136
self.hint_message = None
168137
self.state: RecceState | None = None
169138
self.state_lock = threading.Lock()
139+
self.pr_info = None
140+
141+
if self.cloud_mode:
142+
if self.cloud_options.get('token'):
143+
self.pr_info = fetch_pr_metadata(github_token=self.cloud_options.get('token'))
144+
else:
145+
raise Exception('No GitHub token is provided to access the pull request information.')
170146

171147
# Load the state
172148
self.load()
@@ -230,6 +206,44 @@ def refresh(self):
230206
new_state = self.load(refresh=True)
231207
return new_state
232208

209+
def info(self):
210+
if self.state is None:
211+
self.error_message = 'No state is loaded.'
212+
return None
213+
214+
state_info = {
215+
'mode': 'cloud' if self.cloud_mode else 'local',
216+
'source': None,
217+
}
218+
if self.cloud_mode:
219+
if self.cloud_options.get('host', '').startswith('s3://'):
220+
state_info['source'] = self.cloud_options.get('host')
221+
else:
222+
state_info['source'] = 'Recce Cloud'
223+
state_info['pull_request'] = self.pr_info
224+
else:
225+
state_info['source'] = self.state_file
226+
return state_info
227+
228+
def purge(self) -> bool:
229+
if self.cloud_mode is True:
230+
# self.error_message = 'Purging the state is not supported in cloud mode.'
231+
# return False
232+
if self.cloud_options.get('host', '').startswith('s3://'):
233+
return self._purge_state_from_s3_bucket()
234+
else:
235+
return self._purge_state_from_cloud()
236+
else:
237+
if self.state_file is not None:
238+
try:
239+
os.remove(self.state_file)
240+
except Exception as e:
241+
self.error_message = f'Failed to remove the state file: {e}'
242+
return False
243+
else:
244+
self.error_message = 'No state file is provided. Skip removing the state file.'
245+
return False
246+
233247
def _get_presigned_url(self, pr_info: PullRequestInfo, artifact_name: str, method: str = 'upload') -> str:
234248
import requests
235249
# Step 1: Get the token
@@ -254,23 +268,21 @@ def _load_state_from_file(self, file_path: Optional[str] = None) -> RecceState:
254268
return RecceState.from_file(file_path) if file_path else None
255269

256270
def _load_state_from_cloud(self) -> RecceState:
257-
from recce.pull_request import fetch_pr_metadata
258-
pr_info = fetch_pr_metadata(github_token=self.cloud_options.get('token'))
259-
if (pr_info.id is None) or (pr_info.repository is None):
271+
if (self.pr_info is None) or (self.pr_info.id is None) or (self.pr_info.repository is None):
260272
raise Exception('Cannot get the pull request information from GitHub.')
261273

262274
if self.cloud_options.get('host', '').startswith('s3://'):
263275
logger.debug('Fetching state from AWS S3 bucket...')
264-
return self._load_state_from_s3_bucket(pr_info)
276+
return self._load_state_from_s3_bucket()
265277
else:
266278
logger.debug('Fetching state from Recce Cloud...')
267-
return self._load_state_from_recce_cloud(pr_info)
279+
return self._load_state_from_recce_cloud()
268280

269-
def _load_state_from_recce_cloud(self, pr_info) -> Union[RecceState, None]:
281+
def _load_state_from_recce_cloud(self) -> Union[RecceState, None]:
270282
import tempfile
271283
import requests
272284

273-
presigned_url = self._get_presigned_url(pr_info, RECCE_STATE_COMPRESSED_FILE, method='download')
285+
presigned_url = self._get_presigned_url(self.pr_info, RECCE_STATE_COMPRESSED_FILE, method='download')
274286

275287
with tempfile.NamedTemporaryFile() as tmp:
276288
response = requests.get(presigned_url)
@@ -284,12 +296,12 @@ def _load_state_from_recce_cloud(self, pr_info) -> Union[RecceState, None]:
284296
f.write(response.content)
285297
return RecceState.from_file(tmp.name, compressed=True)
286298

287-
def _load_state_from_s3_bucket(self, pr_info) -> Union[RecceState, None]:
299+
def _load_state_from_s3_bucket(self) -> Union[RecceState, None]:
288300
import boto3
289301
import tempfile
290302
s3_client = boto3.client('s3')
291303
s3_bucket_name = self.cloud_options.get('host').replace('s3://', '')
292-
s3_bucket_key = f'github/{pr_info.repository}/pulls/{pr_info.id}/{RECCE_STATE_COMPRESSED_FILE}'
304+
s3_bucket_key = f'github/{self.pr_info.repository}/pulls/{self.pr_info.id}/{RECCE_STATE_COMPRESSED_FILE}'
293305

294306
rc, error_message = check_s3_bucket(s3_bucket_name)
295307
if rc is False:
@@ -308,23 +320,21 @@ def _load_state_from_s3_bucket(self, pr_info) -> Union[RecceState, None]:
308320
return RecceState.from_file(tmp.name, compressed=True)
309321

310322
def _export_state_to_cloud(self) -> Union[str, None]:
311-
from recce.pull_request import fetch_pr_metadata
312-
pr_info = fetch_pr_metadata(github_token=self.cloud_options.get('token'))
313-
if (pr_info.id is None) or (pr_info.repository is None):
323+
if (self.pr_info is None) or (self.pr_info.id is None) or (self.pr_info.repository is None):
314324
raise Exception('Cannot get the pull request information from GitHub.')
315325

316326
if self.cloud_options.get('host', '').startswith('s3://'):
317327
logger.info("Store recce state to AWS S3 bucket")
318-
return self._export_state_to_s3_bucket(pr_info)
328+
return self._export_state_to_s3_bucket()
319329
else:
320330
logger.info("Store recce state to Recce Cloud")
321-
return self._export_state_to_recce_cloud(pr_info)
331+
return self._export_state_to_recce_cloud()
322332

323-
def _export_state_to_recce_cloud(self, pr_info) -> Union[str, None]:
333+
def _export_state_to_recce_cloud(self) -> Union[str, None]:
324334
import tempfile
325335
import requests
326336

327-
presigned_url = self._get_presigned_url(pr_info, RECCE_STATE_COMPRESSED_FILE, method='upload')
337+
presigned_url = self._get_presigned_url(self.pr_info, RECCE_STATE_COMPRESSED_FILE, method='upload')
328338
with tempfile.NamedTemporaryFile() as tmp:
329339
self._export_state_to_file(tmp.name, compress=True)
330340
response = requests.put(presigned_url, data=open(tmp.name, 'rb').read())
@@ -333,12 +343,12 @@ def _export_state_to_recce_cloud(self, pr_info) -> Union[str, None]:
333343
return 'Failed to upload the state file to Recce Cloud.'
334344
return 'The state file is uploaded to Recce Cloud.'
335345

336-
def _export_state_to_s3_bucket(self, pr_info) -> Union[str, None]:
346+
def _export_state_to_s3_bucket(self) -> Union[str, None]:
337347
import boto3
338348
import tempfile
339349
s3_client = boto3.client('s3')
340350
s3_bucket_name = self.cloud_options.get('host').replace('s3://', '')
341-
s3_bucket_key = f'github/{pr_info.repository}/pulls/{pr_info.id}/{RECCE_STATE_COMPRESSED_FILE}'
351+
s3_bucket_key = f'github/{self.pr_info.repository}/pulls/{self.pr_info.id}/{RECCE_STATE_COMPRESSED_FILE}'
342352

343353
rc, error_message = check_s3_bucket(s3_bucket_name)
344354
if rc is False:
@@ -364,3 +374,40 @@ def _export_state_to_file(self, file_path: Optional[str] = None, compress: bool
364374
with open(file_path, 'w') as f:
365375
f.write(json_data)
366376
return f'The state file is stored at \'{file_path}\''
377+
378+
def _purge_state_from_cloud(self) -> bool:
379+
import requests
380+
logger.debug('Purging the state from Recce Cloud...')
381+
token = self.cloud_options.get('token')
382+
api_url = f'{RECCE_CLOUD_API_HOST}/api/v1/{self.pr_info.repository}/pulls/{self.pr_info.id}/artifacts'
383+
headers = {
384+
'Authorization': f'Bearer {token}'
385+
}
386+
response = requests.delete(api_url, headers=headers)
387+
if response.status_code != 204:
388+
self.error_message = response.text
389+
return False
390+
return True
391+
392+
def _purge_state_from_s3_bucket(self) -> bool:
393+
import boto3
394+
from rich.console import Console
395+
console = Console()
396+
delete_objects = []
397+
logger.debug('Purging the state from AWS S3 bucket...')
398+
s3_client = boto3.client('s3')
399+
s3_bucket_name = self.cloud_options.get('host').replace('s3://', '')
400+
s3_key_prefix = f'github/{self.pr_info.repository}/pulls/{self.pr_info.id}/'
401+
list_response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=s3_key_prefix)
402+
if 'Contents' in list_response:
403+
for obj in list_response['Contents']:
404+
key = obj['Key']
405+
delete_objects.append({'Key': key})
406+
console.print(f'[green]Deleted[/green]: {key}')
407+
else:
408+
return False
409+
410+
delete_response = s3_client.delete_objects(Bucket=s3_bucket_name, Delete={'Objects': delete_objects})
411+
if 'Deleted' not in delete_response:
412+
return False
413+
return True

recce/util/pydantic_model.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import pydantic
2+
from pydantic import BaseModel
3+
4+
5+
def pydantic_model_json_dump(model: BaseModel):
6+
pydantic_version = pydantic.version.VERSION
7+
pydantic_major = pydantic_version.split(".")[0]
8+
9+
if pydantic_major == "1":
10+
return model.json(exclude_none=True)
11+
else:
12+
return model.model_dump_json(exclude_none=True)
13+
14+
15+
def pydantic_model_dump(model: BaseModel):
16+
pydantic_version = pydantic.version.VERSION
17+
pydantic_major = pydantic_version.split(".")[0]
18+
19+
if pydantic_major == "1":
20+
return model.dict()
21+
else:
22+
return model.model_dump()

0 commit comments

Comments
 (0)