Add readme + ability to deselect slow tests relying on stepfunctions

Diego Ardila · Diego Ardila · commit 66a763c829ec · 2021-05-28T16:35:13.000-07:00
diff --git a/README.md b/README.md
@@ -6,15 +6,13 @@ Aggregate metrics in ML are not good enough. To improve production ML, you need
 
 Scale Nucleus helps you:
 
-* Visualize your data
-* Curate interesting slices within your dataset
-* Review and manage annotations
-* Measure and debug your model performance
+- Visualize your data
+- Curate interesting slices within your dataset
+- Review and manage annotations
+- Measure and debug your model performance
 
 Nucleus is a new way—the right way—to develop ML models, helping us move away from the concept of one dataset and towards a paradigm of collections of scenarios.
 
-
-
 ## Installation
 
 `$ pip install scale-nucleus`
@@ -26,65 +24,83 @@ The client abstractions serves to authenticate the user and act as the gateway
 for users to interact with their datasets, models, and model runs.
 
 ### Create a client object
+
 ```python
 import nucleus
 client = nucleus.NucleusClient("YOUR_API_KEY_HERE")
 ```
 
 ### Create Dataset
+
 ```python
 dataset = client.create_dataset("My Dataset")
 ```
 
 ### List Datasets
+
 ```python
 datasets = client.list_datasets()
 ```
 
 ### Delete a Dataset
+
 By specifying target dataset id.
 A response code of 200 indicates successful deletion.
+
 ```python
 client.delete_dataset("YOUR_DATASET_ID")
 ```
 
 ### Append Items to a Dataset
+
 You can append both local images and images from the web. Simply specify the location and Nucleus will automatically infer if it's remote or a local file.
+
 ```python
 dataset_item_1 = DatasetItem(image_location="./1.jpeg", reference_id="1", metadata={"key": "value"})
 dataset_item_2 = DatasetItem(image_location="s3://srikanth-nucleus/9-1.jpg", reference_id="2", metadata={"key": "value"})
 ```
 
 The append function expects a list of `DatasetItem` objects to upload, like this:
+
 ```python
 response = dataset.append([dataset_item_1, dataset_item_2])
 ```
 
 ### Get Dataset Info
+
 Tells us the dataset name, number of dataset items, model_runs, and slice_ids.
+
 ```python
 dataset.info
 ```
 
 ### Access Dataset Items
+
 There are three methods to access individual Dataset Items:
 
 (1) Dataset Items are accessible by reference id
+
 ```python
 item = dataset.refloc("my_img_001.png")
 ```
+
 (2) Dataset Items are accessible by index
+
 ```python
 item = dataset.iloc(0)
 ```
+
 (3) Dataset Items are accessible by the dataset_item_id assigned internally
+
 ```python
 item = dataset.loc("dataset_item_id")
 ```
 
 ### Add Annotations
+
 Upload groundtruth annotations for the items in your dataset.
 Box2DAnnotation has same format as https://dashboard.scale.com/nucleus/docs/api#add-ground-truth
+
 ```python
 annotation_1 = BoxAnnotation(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_1", metadata={})
 annotation_2 = BoxAnnotation(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="ann_2", metadata={})
@@ -94,6 +110,7 @@ response = dataset.annotate([annotation_1, annotation_2])
 For particularly large payloads, please reference the accompanying scripts in **references**
 
 ### Add Model
+
 The model abstraction is intended to represent a unique architecture.
 Models are independent of any dataset.
 
@@ -102,10 +119,12 @@ model = client.add_model(name="My Model", reference_id="newest-cnn-its-new", met
 ```
 
 ### Upload Predictions to ModelRun
+
 This method populates the model_run object with predictions. `ModelRun` objects need to reference a `Dataset` that has been created.
 Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
 Takes a list of Box2DPredictions within the payload, where Box2DPrediction
 is formulated as in https://dashboard.scale.com/nucleus/docs/api#upload-model-outputs
+
 ```python
 prediction_1 = BoxPrediction(reference_id="1", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_1", confidence=0.9)
 prediction_2 = BoxPrediction(reference_id="2", label="label", x=0, y=0, width=10, height=10, annotation_id="pred_2", confidence=0.2)
@@ -114,39 +133,51 @@ model_run = model.create_run(name="My Model Run", metadata={"timestamp": "121012
 ```
 
 ### Commit ModelRun
+
 The commit action indicates that the user is finished uploading predictions associated
-with this model run.  Committing a model run kicks off Nucleus internal processes
+with this model run. Committing a model run kicks off Nucleus internal processes
 to calculate performance metrics like IoU. After being committed, a ModelRun object becomes immutable.
+
 ```python
 model_run.commit()
 ```
 
 ### Get ModelRun Info
+
 Returns the associated model_id, human-readable name of the run, status, and user specified metadata.
+
 ```python
 model_run.info
 ```
 
 ### Accessing ModelRun Predictions
+
 You can access the modelRun predictions for an individual dataset_item through three methods:
 
 (1) user specified reference_id
+
 ```python
 model_run.refloc("my_img_001.png")
 ```
+
 (2) Index
+
 ```python
 model_run.iloc(0)
 ```
+
 (3) Internally maintained dataset_item_id
+
 ```python
 model_run.loc("dataset_item_id")
 ```
 
 ### Delete ModelRun
+
 Delete a model run using the target model_run_id.
 
 A response code of 200 indicates successful deletion.
+
 ```python
 client.delete_model_run("model_run_id")
 ```
@@ -163,14 +194,20 @@ poetry install
 ```
 
 Please install the pre-commit hooks by running the following command:
+
 ```python
 poetry run pre-commit install
 ```
 
 **Best practices for testing:**
 (1). Please run pytest from the root directory of the repo, i.e.
+
 ```
-poetry pytest tests/test_dataset.py
+poetry run pytest tests/test_dataset.py
 ```
 
+(2) To skip slow integration tests that have to wait for an async job to start.
 
+```
+poetry run pytest -m "not integration"
+```
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -144,6 +144,7 @@ def test_dataset_append_local(CLIENT, dataset):
     assert ERROR_PAYLOAD not in resp_json
 
 
+@pytest.mark.integration
 def test_dataset_append_async(dataset: Dataset):
     job = dataset.append(make_dataset_items(), asynchronous=True)
     job.sleep_until_complete()
@@ -173,6 +174,7 @@ def test_dataset_append_async_with_local_path(dataset: Dataset):
         dataset.append(ds_items, asynchronous=True)
 
 
+@pytest.mark.integration
 def test_dataset_append_async_with_1_bad_url(dataset: Dataset):
     ds_items = make_dataset_items()
     ds_items[0].image_location = "https://looks.ok.but.is.not.accessible"
@@ -248,6 +250,7 @@ def test_dataset_export_autotag_scores(CLIENT):
             assert len(scores[column]) > 0
 
 
+@pytest.mark.integration
 def test_annotate_async(dataset: Dataset):
     dataset.append(make_dataset_items())
     semseg = SegmentationAnnotation.from_json(TEST_SEGMENTATION_ANNOTATIONS[0])
@@ -281,6 +284,7 @@ def test_annotate_async(dataset: Dataset):
     }
 
 
+@pytest.mark.integration
 def test_annotate_async_with_error(dataset: Dataset):
     dataset.append(make_dataset_items())
     semseg = SegmentationAnnotation.from_json(TEST_SEGMENTATION_ANNOTATIONS[0])
diff --git a/tests/test_prediction.py b/tests/test_prediction.py
@@ -271,6 +271,7 @@ def test_mixed_pred_upload(model_run):
     )
 
 
+@pytest.mark.integration
 def test_mixed_pred_upload_async(model_run: ModelRun):
     prediction_semseg = SegmentationPrediction.from_json(
         TEST_SEGMENTATION_PREDICTIONS[0]
@@ -307,6 +308,7 @@ def test_mixed_pred_upload_async(model_run: ModelRun):
     }
 
 
+@pytest.mark.integration
 def test_mixed_pred_upload_async_with_error(model_run: ModelRun):
     prediction_semseg = SegmentationPrediction.from_json(
         TEST_SEGMENTATION_PREDICTIONS[0]

Original file line number	Diff line number	Diff line change
`@@ -271,6 +271,7 @@ def test_mixed_pred_upload(model_run):`
`271`	`271`	`)`
`272`	`272`
`273`	`273`
	`274`	`+@pytest.mark.integration`
`274`	`275`	`def test_mixed_pred_upload_async(model_run: ModelRun):`
`275`	`276`	`prediction_semseg = SegmentationPrediction.from_json(`
`276`	`277`	`TEST_SEGMENTATION_PREDICTIONS[0]`
`@@ -307,6 +308,7 @@ def test_mixed_pred_upload_async(model_run: ModelRun):`
`307`	`308`	`}`
`308`	`309`
`309`	`310`
	`311`	`+@pytest.mark.integration`
`310`	`312`	`def test_mixed_pred_upload_async_with_error(model_run: ModelRun):`
`311`	`313`	`prediction_semseg = SegmentationPrediction.from_json(`
`312`	`314`	`TEST_SEGMENTATION_PREDICTIONS[0]`