From b437cf0d82c77d8e0e2b34da46d60ec8cecd5a7f Mon Sep 17 00:00:00 2001 From: glopezdiest Date: Thu, 6 Mar 2025 09:54:19 +0100 Subject: [PATCH 1/2] Added LB2.1 --- .gitmodules | 8 + .../cluster-autoscaler-autodiscover.yaml | 162 ++++++ leaderboard_2.1/config/fluentd.yaml | 131 +++++ .../config/leaderboard-cluster.yaml | 243 +++++++++ .../docker/aws_uploader/Dockerfile | 17 + .../docker/aws_uploader/make_docker.sh | 36 ++ .../docker/aws_uploader/run_uploader.sh | 67 +++ leaderboard_2.1/docker/carla/Dockerfile | 18 + leaderboard_2.1/docker/carla/make_docker.sh | 51 ++ leaderboard_2.1/docker/carla/run_carla.sh | 113 ++++ leaderboard_2.1/docker/leaderboard/Dockerfile | 25 + .../leaderboard/gpu_utils/get_gpu_device.sh | 11 + .../leaderboard/gpu_utils/get_gpu_uuid.sh | 3 + .../docker/leaderboard/make_docker.sh | 77 +++ .../docker/leaderboard/run_leaderboard.sh | 204 ++++++++ .../docker/leaderboard/submodules/leaderboard | 1 + .../leaderboard/submodules/scenario_runner | 1 + leaderboard_2.1/docker/monitor/Dockerfile | 24 + .../monitor/evalai/generate_metadata.py | 20 + .../docker/monitor/evalai/generate_results.py | 71 +++ .../docker/monitor/evalai/generate_stdout.py | 123 +++++ leaderboard_2.1/docker/monitor/make_docker.sh | 36 ++ leaderboard_2.1/docker/monitor/run_monitor.sh | 168 ++++++ leaderboard_2.1/experiments/README.md | 39 ++ leaderboard_2.1/experiments/exp0.yaml | 126 +++++ leaderboard_2.1/experiments/exp1.yaml | 269 ++++++++++ leaderboard_2.1/experiments/exp2-agent.yaml | 119 +++++ .../experiments/exp2-simulator.yaml | 61 +++ leaderboard_2.1/experiments/exp3.yaml | 495 ++++++++++++++++++ leaderboard_2.1/jobs/monitor.yaml | 94 ++++ leaderboard_2.1/jobs/submission.yaml | 169 ++++++ leaderboard_2.1/tests/test-carla-server.yaml | 47 ++ leaderboard_2.1/tests/test-gpu-job.yaml | 19 + 33 files changed, 3048 insertions(+) create mode 100644 leaderboard_2.1/config/cluster-autoscaler-autodiscover.yaml create mode 100644 leaderboard_2.1/config/fluentd.yaml create mode 100644 leaderboard_2.1/config/leaderboard-cluster.yaml create mode 100644 leaderboard_2.1/docker/aws_uploader/Dockerfile create mode 100755 leaderboard_2.1/docker/aws_uploader/make_docker.sh create mode 100755 leaderboard_2.1/docker/aws_uploader/run_uploader.sh create mode 100644 leaderboard_2.1/docker/carla/Dockerfile create mode 100755 leaderboard_2.1/docker/carla/make_docker.sh create mode 100755 leaderboard_2.1/docker/carla/run_carla.sh create mode 100644 leaderboard_2.1/docker/leaderboard/Dockerfile create mode 100755 leaderboard_2.1/docker/leaderboard/gpu_utils/get_gpu_device.sh create mode 100755 leaderboard_2.1/docker/leaderboard/gpu_utils/get_gpu_uuid.sh create mode 100755 leaderboard_2.1/docker/leaderboard/make_docker.sh create mode 100755 leaderboard_2.1/docker/leaderboard/run_leaderboard.sh create mode 160000 leaderboard_2.1/docker/leaderboard/submodules/leaderboard create mode 160000 leaderboard_2.1/docker/leaderboard/submodules/scenario_runner create mode 100644 leaderboard_2.1/docker/monitor/Dockerfile create mode 100644 leaderboard_2.1/docker/monitor/evalai/generate_metadata.py create mode 100644 leaderboard_2.1/docker/monitor/evalai/generate_results.py create mode 100644 leaderboard_2.1/docker/monitor/evalai/generate_stdout.py create mode 100755 leaderboard_2.1/docker/monitor/make_docker.sh create mode 100755 leaderboard_2.1/docker/monitor/run_monitor.sh create mode 100644 leaderboard_2.1/experiments/README.md create mode 100644 leaderboard_2.1/experiments/exp0.yaml create mode 100644 leaderboard_2.1/experiments/exp1.yaml create mode 100644 leaderboard_2.1/experiments/exp2-agent.yaml create mode 100644 leaderboard_2.1/experiments/exp2-simulator.yaml create mode 100644 leaderboard_2.1/experiments/exp3.yaml create mode 100644 leaderboard_2.1/jobs/monitor.yaml create mode 100644 leaderboard_2.1/jobs/submission.yaml create mode 100644 leaderboard_2.1/tests/test-carla-server.yaml create mode 100644 leaderboard_2.1/tests/test-gpu-job.yaml diff --git a/.gitmodules b/.gitmodules index 9af7e01..3e39bcb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -14,3 +14,11 @@ path = leaderboard_2.0/docker/leaderboard/submodules/scenario_runner url = https://github.com/carla-simulator/scenario_runner branch = leaderboard-2.0 +[submodule "leaderboard_2.1/docker/leaderboard/submodules/leaderboard"] + path = leaderboard_2.1/docker/leaderboard/submodules/leaderboard + url = https://github.com/carla-simulator/leaderboard.git + branch = leaderboard-2.1 +[submodule "leaderboard_2.1/docker/leaderboard/submodules/scenario_runner"] + path = leaderboard_2.1/docker/leaderboard/submodules/scenario_runner + url = https://github.com/carla-simulator/scenario_runner.git + branch = leaderboard-2.1 diff --git a/leaderboard_2.1/config/cluster-autoscaler-autodiscover.yaml b/leaderboard_2.1/config/cluster-autoscaler-autodiscover.yaml new file mode 100644 index 0000000..ef14441 --- /dev/null +++ b/leaderboard_2.1/config/cluster-autoscaler-autodiscover.yaml @@ -0,0 +1,162 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["events", "endpoints"] + verbs: ["create", "patch"] + - apiGroups: [""] + resources: ["pods/eviction"] + verbs: ["create"] + - apiGroups: [""] + resources: ["pods/status"] + verbs: ["update"] + - apiGroups: [""] + resources: ["endpoints"] + resourceNames: ["cluster-autoscaler"] + verbs: ["get", "update"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["watch", "list", "get", "update"] + - apiGroups: [""] + resources: + - "namespaces" + - "pods" + - "services" + - "replicationcontrollers" + - "persistentvolumeclaims" + - "persistentvolumes" + verbs: ["watch", "list", "get"] + - apiGroups: ["extensions"] + resources: ["replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["watch", "list"] + - apiGroups: ["apps"] + resources: ["statefulsets", "replicasets", "daemonsets"] + verbs: ["watch", "list", "get"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"] + verbs: ["watch", "list", "get"] + - apiGroups: ["batch", "extensions"] + resources: ["jobs"] + verbs: ["get", "list", "watch", "patch"] + - apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["create"] + - apiGroups: ["coordination.k8s.io"] + resourceNames: ["cluster-autoscaler"] + resources: ["leases"] + verbs: ["get", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["create","list","watch"] + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] + verbs: ["delete", "get", "update", "watch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cluster-autoscaler + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + k8s-addon: cluster-autoscaler.addons.k8s.io + k8s-app: cluster-autoscaler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: cluster-autoscaler +subjects: + - kind: ServiceAccount + name: cluster-autoscaler + namespace: kube-system + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-autoscaler + namespace: kube-system + labels: + app: cluster-autoscaler +spec: + replicas: 1 + selector: + matchLabels: + app: cluster-autoscaler + template: + metadata: + labels: + app: cluster-autoscaler + annotations: + prometheus.io/scrape: 'true' + prometheus.io/port: '8085' + spec: + priorityClassName: system-cluster-critical + securityContext: + runAsNonRoot: true + runAsUser: 65534 + fsGroup: 65534 + serviceAccountName: cluster-autoscaler + containers: + - image: registry.k8s.io/autoscaling/cluster-autoscaler:v1.22.2 + name: cluster-autoscaler + resources: + limits: + cpu: 100m + memory: 600Mi + requests: + cpu: 100m + memory: 600Mi + command: + - ./cluster-autoscaler + - --v=4 + - --stderrthreshold=info + - --cloud-provider=aws + - --skip-nodes-with-local-storage=false + - --expander=least-waste + - --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/leaderboard-20 + volumeMounts: + - name: ssl-certs + mountPath: /etc/ssl/certs/ca-certificates.crt #/etc/ssl/certs/ca-bundle.crt for Amazon Linux Worker Nodes + readOnly: true + imagePullPolicy: "Always" + volumes: + - name: ssl-certs + hostPath: + path: "/etc/ssl/certs/ca-bundle.crt" diff --git a/leaderboard_2.1/config/fluentd.yaml b/leaderboard_2.1/config/fluentd.yaml new file mode 100644 index 0000000..e87c796 --- /dev/null +++ b/leaderboard_2.1/config/fluentd.yaml @@ -0,0 +1,131 @@ +#@include file-fluent.conffluentd +apiVersion: v1 +kind: ConfigMap +metadata: + name: fluentd-config + namespace: kube-system + labels: + k8s-app: fluentd +data: + fluent.conf: | + + @type tail + @id in_tail_container_logs + path /var/log/containers/*simulator*.log, /var/log/containers/*agent*.log, /var/log/containers/*uploader*.log, /var/log/containers/*monitor*.log, /var/log/containers/*private-contents*.log + pos_file "/var/log/fluentd-containers.log.pos" + tag "kubernetes.*" + read_from_head true + + @type regexp + expression /^(? + + + + @type kubernetes_metadata + @id filter_kube_metadata + + + + @type cloudwatch_logs + @id out_cloudwatch_logs + region "#{ENV['AWS_REGION']}" + log_group_name "#{ENV['LOG_GROUP_NAME']}" + aws_instance_profile_credentials_retries 5 + auto_create_stream true + use_tag_as_stream true + json_handler yajl + log_rejected_request true + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: fluentd-role +rules: + - apiGroups: + - "" + resources: + - namespaces + - pods + - nodes + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: fluentd-role-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: fluentd-role +subjects: + - kind: ServiceAccount + name: fluentd + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: fluentd + namespace: kube-system + labels: + k8s-app: fluentd + version: v1 + kubernetes.io/cluster-service: "true" +spec: + selector: + matchLabels: + k8s-app: fluentd + version: v1 + kubernetes.io/cluster-service: "true" + template: + metadata: + labels: + k8s-app: fluentd + version: v1 + kubernetes.io/cluster-service: "true" + spec: + serviceAccount: fluentd + serviceAccountName: fluentd + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + containers: + - name: fluentd + image: fluent/fluentd-kubernetes-daemonset:v1.16.1-debian-cloudwatch-1.2 + env: + - name: K8S_NODE_NAME + value: leaderboard-20 + - name: AWS_REGION + value: us-west-2 + - name: LOG_GROUP_NAME + value: leaderboard-20 + resources: + limits: + memory: 200Mi + requests: + cpu: 500m + memory: 200Mi + volumeMounts: + - name: varlog + mountPath: /var/log + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: fluentd-config + mountPath: /fluentd/etc + terminationGracePeriodSeconds: 30 + volumes: + - name: varlog + hostPath: + path: /var/log + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + - name: fluentd-config + configMap: + name: fluentd-config diff --git a/leaderboard_2.1/config/leaderboard-cluster.yaml b/leaderboard_2.1/config/leaderboard-cluster.yaml new file mode 100644 index 0000000..90b17cc --- /dev/null +++ b/leaderboard_2.1/config/leaderboard-cluster.yaml @@ -0,0 +1,243 @@ +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: leaderboard-20 + region: us-west-2 + version: "1.29" + +iamIdentityMappings: + - arn: arn:aws:iam::342236305043:role/LB2-eks-admin + groups: + - system:masters + username: admin + noDuplicateARNs: true # prevents shadowing of ARNs + - arn: arn:aws:iam::342236305043:role/LeaderboardStepFunctionRole + groups: + - system:masters + username: admin + noDuplicateARNs: true # prevents shadowing of ARNs + +iam: + withOIDC: true + serviceAccounts: + - metadata: + name: fluentd + namespace: kube-system + attachPolicy: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - "logs:CreateLogStream" + - "logs:CreateLogGroup" + - "logs:PutLogEvents" + - "logs:DescribeLogGroups" + - "logs:DescribeLogStreams" + Resource: "arn:aws:logs:*:*:*" + - metadata: + name: cluster-autoscaler + namespace: kube-system + wellKnownPolicies: + autoScaler: true + - metadata: + name: submission-worker + attachPolicy: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + # Based on AWSAppRunnerServicePolicyForECRAccess, grants acces to download the ECR docker images + - "ecr:GetDownloadUrlForLayer" + - "ecr:BatchGetImage" + - "ecr:DescribeImages" + - "ecr:GetAuthorizationToken" + - "ecr:BatchCheckLayerAvailability" + # Based on AWSS3FullAccess, grant read + write access to S3“ + - "s3:*" + - "s3-object-lambda:*" + # DynamoDB + - "dynamodb:GetItem" + - "dynamodb:DeleteItem" + - "dynamodb:PutItem" + - "dynamodb:UpdateItem" + Resource: + # All ECR repositories and a specific S3 bucket + - "arn:aws:ecr:*:342236305043:repository/*" + - "arn:aws:s3:::leaderboard-20*" + - "arn:aws:dynamodb:*:342236305043:table/leaderboard-20" + - "arn:aws:dynamodb:*:342236305043:table/leaderboard-21" + +nodeGroups: + - name: basic-worker + amiFamily: AmazonLinux2 + instanceType: t3.large + desiredCapacity: 1 + volumeSize: 100 + labels: + role: basic-worker + + - name: monitor-worker + instanceType: t3.medium + amiFamily: AmazonLinux2 + desiredCapacity: 0 + minSize: 0 + maxSize: 4 + volumeSize: 100 + tags: + k8s.io/cluster-autoscaler/enabled: "true" + k8s.io/cluster-autoscaler/leaderboard-20: "owned" + labels: + role: submission-generic-worker + + - name: submission-worker-g5x12 + instanceType: g5.12xlarge + amiFamily: Ubuntu2004 + ami: ami-00254c198912bd2a1 + desiredCapacity: 0 + minSize: 0 + maxSize: 4 + volumeSize: 400 + tags: + k8s.io/cluster-autoscaler/enabled: "true" + k8s.io/cluster-autoscaler/leaderboard-20: "owned" + labels: + role: submission-generic-worker + ssh: + publicKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkL6oBOlOqWp4BgOIsQnHQkaPCEGQjdqwWPy1WXLPEnjMLQ3iFGK+zMJ3VNhYujhemn2Yxja8Yw+a0MWv0OfV9TTcW6gsjsBuZyBA0g7OkaFFrAiEi42gajqqnBCEpbEL8/+MYnOHSYCqIXi7yyzHwDGuUzBsyTTsbAmdvuQ8o7sh7QH0Ncw5Z7605RTQI1MxP2zAQdl/UdZipFH9Q3pCidwWLJ3WFYTvKkhpEjiUyrf2sfPya89yFQdfLytpX4mW/YRsvLIoBElJYDkcAkyGPU6N0o+CoXyFg1ezvB9rXFsW1XgRf4ZR3nKxiM9yi1N1Z0/rf5hUWseNRt6/Xl0pn" + overrideBootstrapCommand: | + #!/bin/bash + /etc/eks/bootstrap.sh leaderboard-20 + sudo bash -c "echo 'version = 2 + [plugins] + [plugins.\"io.containerd.grpc.v1.cri\"] + [plugins.\"io.containerd.grpc.v1.cri\".containerd] + default_runtime_name = \"nvidia\" + + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes] + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_engine = \"\" + runtime_root = \"\" + runtime_type = \"io.containerd.runc.v2\" + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes.nvidia.options] + BinaryName = \"/usr/bin/nvidia-container-runtime\"' \ + > /etc/containerd/config.toml" + sudo systemctl restart containerd + preBootstrapCommands: + - "sudo nvidia-xconfig --preserve-busid -a --virtual=1280x1024" + - "sudo X :0&" + + - name: submission-worker-g5x + instanceType: g5.xlarge + amiFamily: Ubuntu2004 + ami: ami-00254c198912bd2a1 + desiredCapacity: 0 + minSize: 0 + maxSize: 4 + volumeSize: 400 + tags: + k8s.io/cluster-autoscaler/enabled: "true" + k8s.io/cluster-autoscaler/leaderboard-20: "owned" + labels: + role: submission-generic-worker + ssh: + publicKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkL6oBOlOqWp4BgOIsQnHQkaPCEGQjdqwWPy1WXLPEnjMLQ3iFGK+zMJ3VNhYujhemn2Yxja8Yw+a0MWv0OfV9TTcW6gsjsBuZyBA0g7OkaFFrAiEi42gajqqnBCEpbEL8/+MYnOHSYCqIXi7yyzHwDGuUzBsyTTsbAmdvuQ8o7sh7QH0Ncw5Z7605RTQI1MxP2zAQdl/UdZipFH9Q3pCidwWLJ3WFYTvKkhpEjiUyrf2sfPya89yFQdfLytpX4mW/YRsvLIoBElJYDkcAkyGPU6N0o+CoXyFg1ezvB9rXFsW1XgRf4ZR3nKxiM9yi1N1Z0/rf5hUWseNRt6/Xl0pn" + overrideBootstrapCommand: | + #!/bin/bash + /etc/eks/bootstrap.sh leaderboard-20 + sudo bash -c "echo 'version = 2 + [plugins] + [plugins.\"io.containerd.grpc.v1.cri\"] + [plugins.\"io.containerd.grpc.v1.cri\".containerd] + default_runtime_name = \"nvidia\" + + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes] + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_engine = \"\" + runtime_root = \"\" + runtime_type = \"io.containerd.runc.v2\" + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes.nvidia.options] + BinaryName = \"/usr/bin/nvidia-container-runtime\"' \ + > /etc/containerd/config.toml" + sudo systemctl restart containerd + preBootstrapCommands: + - "sudo nvidia-xconfig --preserve-busid -a --virtual=1280x1024" + - "sudo X :0&" + + - name: submission-worker-g5x2 + instanceType: g5.2xlarge + amiFamily: Ubuntu2004 + ami: ami-00254c198912bd2a1 + desiredCapacity: 0 + minSize: 0 + maxSize: 4 + volumeSize: 400 + tags: + k8s.io/cluster-autoscaler/enabled: "true" + k8s.io/cluster-autoscaler/leaderboard-20: "owned" + labels: + role: submission-generic-worker + ssh: + publicKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkL6oBOlOqWp4BgOIsQnHQkaPCEGQjdqwWPy1WXLPEnjMLQ3iFGK+zMJ3VNhYujhemn2Yxja8Yw+a0MWv0OfV9TTcW6gsjsBuZyBA0g7OkaFFrAiEi42gajqqnBCEpbEL8/+MYnOHSYCqIXi7yyzHwDGuUzBsyTTsbAmdvuQ8o7sh7QH0Ncw5Z7605RTQI1MxP2zAQdl/UdZipFH9Q3pCidwWLJ3WFYTvKkhpEjiUyrf2sfPya89yFQdfLytpX4mW/YRsvLIoBElJYDkcAkyGPU6N0o+CoXyFg1ezvB9rXFsW1XgRf4ZR3nKxiM9yi1N1Z0/rf5hUWseNRt6/Xl0pn" + overrideBootstrapCommand: | + #!/bin/bash + /etc/eks/bootstrap.sh leaderboard-20 + sudo bash -c "echo 'version = 2 + [plugins] + [plugins.\"io.containerd.grpc.v1.cri\"] + [plugins.\"io.containerd.grpc.v1.cri\".containerd] + default_runtime_name = \"nvidia\" + + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes] + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_engine = \"\" + runtime_root = \"\" + runtime_type = \"io.containerd.runc.v2\" + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes.nvidia.options] + BinaryName = \"/usr/bin/nvidia-container-runtime\"' \ + > /etc/containerd/config.toml" + sudo systemctl restart containerd + preBootstrapCommands: + - "sudo nvidia-xconfig --preserve-busid -a --virtual=1280x1024" + - "sudo X :0&" + + - name: submission-worker-g5x4 + instanceType: g5.4xlarge + amiFamily: Ubuntu2004 + ami: ami-00254c198912bd2a1 + desiredCapacity: 0 + minSize: 0 + maxSize: 4 + volumeSize: 400 + tags: + k8s.io/cluster-autoscaler/enabled: "true" + k8s.io/cluster-autoscaler/leaderboard-20: "owned" + labels: + role: submission-generic-worker + ssh: + publicKey: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDkL6oBOlOqWp4BgOIsQnHQkaPCEGQjdqwWPy1WXLPEnjMLQ3iFGK+zMJ3VNhYujhemn2Yxja8Yw+a0MWv0OfV9TTcW6gsjsBuZyBA0g7OkaFFrAiEi42gajqqnBCEpbEL8/+MYnOHSYCqIXi7yyzHwDGuUzBsyTTsbAmdvuQ8o7sh7QH0Ncw5Z7605RTQI1MxP2zAQdl/UdZipFH9Q3pCidwWLJ3WFYTvKkhpEjiUyrf2sfPya89yFQdfLytpX4mW/YRsvLIoBElJYDkcAkyGPU6N0o+CoXyFg1ezvB9rXFsW1XgRf4ZR3nKxiM9yi1N1Z0/rf5hUWseNRt6/Xl0pn" + overrideBootstrapCommand: | + #!/bin/bash + /etc/eks/bootstrap.sh leaderboard-20 + sudo bash -c "echo 'version = 2 + [plugins] + [plugins.\"io.containerd.grpc.v1.cri\"] + [plugins.\"io.containerd.grpc.v1.cri\".containerd] + default_runtime_name = \"nvidia\" + + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes] + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes.nvidia] + privileged_without_host_devices = false + runtime_engine = \"\" + runtime_root = \"\" + runtime_type = \"io.containerd.runc.v2\" + [plugins.\"io.containerd.grpc.v1.cri\".containerd.runtimes.nvidia.options] + BinaryName = \"/usr/bin/nvidia-container-runtime\"' \ + > /etc/containerd/config.toml" + sudo systemctl restart containerd + preBootstrapCommands: + - "sudo nvidia-xconfig --preserve-busid -a --virtual=1280x1024" + - "sudo X :0&" diff --git a/leaderboard_2.1/docker/aws_uploader/Dockerfile b/leaderboard_2.1/docker/aws_uploader/Dockerfile new file mode 100644 index 0000000..f8d1287 --- /dev/null +++ b/leaderboard_2.1/docker/aws_uploader/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.7 + +WORKDIR /workspace + +RUN apt-get update \ + && apt-get install -y --no-install-recommends jq \ + && rm -rf /var/lib/apt/lists/* + +# Install aws cli v2 +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ + && unzip awscliv2.zip \ + && ./aws/install \ + && rm -rf awscliv2.zip + +COPY run_uploader.sh /workspace/run_uploader.sh + +CMD ["/bin/bash"] diff --git a/leaderboard_2.1/docker/aws_uploader/make_docker.sh b/leaderboard_2.1/docker/aws_uploader/make_docker.sh new file mode 100755 index 0000000..fd8a358 --- /dev/null +++ b/leaderboard_2.1/docker/aws_uploader/make_docker.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +DOC_STRING="Build aws_uploader docker image." + +USAGE_STRING=$(cat <<- END +Usage: $0 [-h|--help] [-t|--target-name TARGET] + +The default target name is "uploader-20" +END +) + +usage() { echo "${DOC_STRING}"; echo "${USAGE_STRING}"; exit 1; } + +# Defaults +TARGET_NAME="uploader-20" + +while [[ $# -gt 0 ]]; do + case "$1" in + -t | --target-name ) + TARGET_NAME=$2 + shift 2 ;; + -h | --help ) + usage + ;; + * ) + shift ;; + esac +done + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Build docker image +echo "Building uploader docker" +docker build --force-rm \ + -t ${TARGET_NAME} \ + -f ${SCRIPT_DIR}/Dockerfile ${SCRIPT_DIR} diff --git a/leaderboard_2.1/docker/aws_uploader/run_uploader.sh b/leaderboard_2.1/docker/aws_uploader/run_uploader.sh new file mode 100755 index 0000000..4c4edde --- /dev/null +++ b/leaderboard_2.1/docker/aws_uploader/run_uploader.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +####################### +## DEFAULT VARIABLES ## +####################### +ID="$WORKER_ID" + +# The existence of this file notifies to the monitor node that this worker has started the evaluation. +WORKER_START_FILE="/logs/containers-status/worker-$ID.start" + +UPLOADER_LOGS="/logs/logs/uploader-$ID.log" +UPLOADER_DONE_FILE="/logs/containers-status/uploader-$ID.done" + +SIMULATION_CANCEL_FILE="/logs/containers-status/simulation-$ID.cancel" + +########### +## UTILS ## +########### +push_to_s3() { + aws s3 sync /logs s3://${S3_BUCKET}/${SUBMISSION_ID} --no-progress + aws s3 sync /ros/logs s3://${S3_BUCKET}/${SUBMISSION_ID}/ros/worker-$WORKER_ID --delete --no-progress +} + +######################### +## UPLOADER PARAMETERS ## +######################### +[[ -z "${UPLOADER_PERIOD}" ]] && export UPLOADER_PERIOD="30" +[ -f $SIMULATION_CANCEL_FILE ] && rm $SIMULATION_CANCEL_FILE + +# Save all the outpus into a file, which will be sent to s3 +exec > >(tee -a "$UPLOADER_LOGS") 2>&1 + +if [ -f "$UPLOADER_LOGS" ]; then + echo "" + echo "Found partial uploader logs" +fi + +touch $WORKER_START_FILE + +while sleep ${UPLOADER_PERIOD} ; do + echo "" + echo "[$(date +"%Y-%m-%d %T")] Starting loop uploader" + + echo "> Pushing to S3" + push_to_s3 + + echo "> Checking if the submission has been cancelled" + aws s3api head-object --bucket ${S3_BUCKET} --key ${SUBMISSION_ID}/containers-status/simulation.cancel > /dev/null 2>&1 && SIMULATION_CANCELLED=true + if [ $SIMULATION_CANCELLED ]; then + echo "Detected that the submission has been cancelled. Stopping..." + touch $SIMULATION_CANCEL_FILE + push_to_s3 + break + fi + + echo "> Checking end condition" + DONE_FILES=$(find /logs/containers-status -name *.done* | wc -l) + if [ $DONE_FILES -ge 2 ]; then + echo "Detected that all containers have finished. Stopping..." + touch $UPLOADER_DONE_FILE + push_to_s3 + break + else + echo "Detected that only $DONE_FILES out of the 2 containers have finished. Waiting..." + fi + +done diff --git a/leaderboard_2.1/docker/carla/Dockerfile b/leaderboard_2.1/docker/carla/Dockerfile new file mode 100644 index 0000000..4499373 --- /dev/null +++ b/leaderboard_2.1/docker/carla/Dockerfile @@ -0,0 +1,18 @@ + +FROM ubuntu:20.04 + +RUN packages='libsdl2-2.0 xserver-xorg libvulkan1 libomp5' && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y $packages --no-install-recommends + +RUN useradd -m carla + +COPY --chown=carla:carla . /home/carla + +USER carla +WORKDIR /home/carla + +ENV OMP_PROC_BIND="FALSE" +ENV OMP_NUM_THREADS="48" +ENV SDL_VIDEODRIVER="x11" +ENV NVIDIA_DRIVER_CAPABILITIES="all" + +CMD /bin/bash CarlaUE4.sh -RenderOffScreen -nosound diff --git a/leaderboard_2.1/docker/carla/make_docker.sh b/leaderboard_2.1/docker/carla/make_docker.sh new file mode 100755 index 0000000..438c692 --- /dev/null +++ b/leaderboard_2.1/docker/carla/make_docker.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +DOC_STRING="Build CARLA docker image." + +USAGE_STRING=$(cat <<- END +Usage: $0 [-h|--help] [-t|--target-name TARGET] + +The default target name is "carla-20" + +The following env variables are mandatory: + * CARLA_ROOT +END +) + +usage() { echo "${DOC_STRING}"; echo "${USAGE_STRING}"; exit 1; } + +# Defaults +TARGET_NAME="carla-20" + +while [[ $# -gt 0 ]]; do + case "$1" in + -t | --target-name ) + TARGET_NAME=$2 + shift 2 ;; + -h | --help ) + usage + ;; + * ) + shift ;; + esac +done + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if [ -z "$CARLA_ROOT" ] +then + echo "Error $CARLA_ROOT is empty. Set \$CARLA_ROOT as an environment variable first." + exit 1 +fi + +# Temporary copying run_carla script in ${CARLA_ROOT} +cp ${SCRIPT_DIR}/run_carla.sh ${CARLA_ROOT} + +# Build docker image +echo "Building CARLA docker" +echo "Using CARLA version: ${CARLA_ROOT}" +docker build --force-rm \ + -t ${TARGET_NAME} \ + -f ${SCRIPT_DIR}/Dockerfile ${CARLA_ROOT} + +rm -fr ${CARLA_ROOT}/run_carla.sh \ No newline at end of file diff --git a/leaderboard_2.1/docker/carla/run_carla.sh b/leaderboard_2.1/docker/carla/run_carla.sh new file mode 100755 index 0000000..2b23454 --- /dev/null +++ b/leaderboard_2.1/docker/carla/run_carla.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +# Get the file names of this attempt +ID="$WORKER_ID" +CRASH_ID=$(find /tmp/status -name *simulator-$ID.crash* | wc -l) + +SIMULATOR_LOGS="/tmp/logs/simulator-$ID.log" +SIMULATOR_START_FILE="/tmp/status/simulator-$ID.start$CRASH_ID" +SIMULATOR_DONE_FILE="/tmp/status/simulator-$ID.done" +SIMULATOR_CRASH_FILE="/tmp/status/simulator-$ID.crash$CRASH_ID" + +AGENT_DONE_FILE="/tmp/status/agent-$ID.done" +AGENT_CRASH_FILE="/tmp/status/agent-$ID.crash$CRASH_ID" + +SIMULATION_CANCEL_FILE="/tmp/status/simulation-$ID.cancel" + +GPU_DEVICE_FILE="/gpu/uuid.txt$CRASH_ID" + +# Ending function before exitting the container +kill_all_processes() { + # Avoid exiting on error + pkill -9 'CarlaUE4' || true +} + +kill_and_wait_for_agent () { + kill_all_processes + + if [ "$1" = "crash" ]; then + echo "Creating the simulator crash file" + touch $SIMULATOR_CRASH_FILE + else + echo "Creating the simulator done file" + touch $SIMULATOR_DONE_FILE + fi + + echo "Waiting for the agent to finish..." + for ((i=1;i<=60;i++)); do + [ -f $AGENT_CRASH_FILE ] && break + [ -f $AGENT_DONE_FILE ] && break + sleep 10 + done + + if [ "$1" = "crash" ]; then + echo "Detected that the agent has finished. Exiting with crash..." + else + echo "Detected that the agent has finished. Exiting with success..." + fi +} + +# Save all the outpus into a file, which will be sent to s3 +exec > >(tee -a "$SIMULATOR_LOGS") 2>&1 + +if [ -f "$SIMULATOR_LOGS" ]; then + echo "" + echo "Found partial simulator logs" +fi + +echo "Waiting for a GPU to be assigned..." +MAX_RETRIES=120 # wait 1h maximum +for ((i=1;i<=$MAX_RETRIES;i++)); do + if [ -f $GPU_DEVICE_FILE ]; then + echo "" + echo "Detected that a GPU has been assigned" + break + fi + sleep 30 +done + +if ! [ -f $GPU_DEVICE_FILE ]; then + echo "No GPU assigned. Stopping..." + kill_and_wait_for_agent crash + exit 1 +fi + +echo "" +export NVIDIA_VISIBLE_DEVICES=$(/gpu/get_gpu_device.sh ${GPU_DEVICE_FILE}) +UUID=$(cat ${GPU_DEVICE_FILE}) +echo "Using GPU: ${UUID} (${NVIDIA_VISIBLE_DEVICES})" + +echo "Starting CARLA server" +./CarlaUE4.sh -vulkan -RenderOffScreen -nosound -ini:[/Script/Engine.RendererSettings]:r.GraphicsAdapter=${NVIDIA_VISIBLE_DEVICES} & + +echo "Sleeping a bit to ensure CARLA is ready" +sleep 60 + +touch $SIMULATOR_START_FILE + +while sleep 5 ; do + if [ -f $AGENT_CRASH_FILE ]; then + echo "" + echo "Detected that the Leaderboard has failed. Stopping the server..." + kill_and_wait_for_agent crash + exit 1 + fi + if [ -f $AGENT_DONE_FILE ]; then + echo "" + echo "Detected that the Leaderboard has finished. Stopping the server..." + kill_and_wait_for_agent + exit 0 + fi + if [ -z "$(pgrep -f CarlaUE4)" ]; then + echo "" + echo "Detected that the server has crashed" + kill_and_wait_for_agent crash + exit 1 + fi + if [ -f $SIMULATION_CANCEL_FILE ]; then + echo "" + echo "Detected that the submission has been cancelled. Stopping..." + kill_all_processes + exit 0 + fi +done diff --git a/leaderboard_2.1/docker/leaderboard/Dockerfile b/leaderboard_2.1/docker/leaderboard/Dockerfile new file mode 100644 index 0000000..acf96b1 --- /dev/null +++ b/leaderboard_2.1/docker/leaderboard/Dockerfile @@ -0,0 +1,25 @@ +FROM ubuntu:20.04 + +WORKDIR /workspace + +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates curl unzip \ + && rm -rf /var/lib/apt/lists/* + +# Install aws cli v2 +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ + && unzip awscliv2.zip \ + && ./aws/install \ + && rm -rf awscliv2.zip + +ENV CARLA_PYTHON_API_ROOT "/workspace/CARLA" +ENV SCENARIO_RUNNER_ROOT "/workspace/scenario_runner" +ENV LEADERBOARD_ROOT "/workspace/leaderboard" +ENV GPU_UTILS_ROOT "/workspace/gpu_utils" + +COPY PythonAPI ${CARLA_PYTHON_API_ROOT}/PythonAPI +COPY scenario_runner ${SCENARIO_RUNNER_ROOT} +COPY leaderboard ${LEADERBOARD_ROOT} +COPY gpu_utils ${GPU_UTILS_ROOT} + +CMD ["/bin/bash"] diff --git a/leaderboard_2.1/docker/leaderboard/gpu_utils/get_gpu_device.sh b/leaderboard_2.1/docker/leaderboard/gpu_utils/get_gpu_device.sh new file mode 100755 index 0000000..1d16f09 --- /dev/null +++ b/leaderboard_2.1/docker/leaderboard/gpu_utils/get_gpu_device.sh @@ -0,0 +1,11 @@ + +#!/bin/bash + +GPU_DEVICE_FILE=${1} + +uuid=$(cat ${GPU_DEVICE_FILE}) + +readarray -t ALL_GPUS < <(nvidia-smi --query-gpu=index,uuid --format=csv | grep GPU) +for gpu in "${ALL_GPUS[@]}"; do if [[ "$gpu" == *"$uuid"* ]]; then DEVICE=$(cut -d , -f 1 <<< $gpu) && break ; fi; done + +echo $DEVICE diff --git a/leaderboard_2.1/docker/leaderboard/gpu_utils/get_gpu_uuid.sh b/leaderboard_2.1/docker/leaderboard/gpu_utils/get_gpu_uuid.sh new file mode 100755 index 0000000..601d766 --- /dev/null +++ b/leaderboard_2.1/docker/leaderboard/gpu_utils/get_gpu_uuid.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +nvidia-smi --query-gpu=uuid --format=csv | grep GPU diff --git a/leaderboard_2.1/docker/leaderboard/make_docker.sh b/leaderboard_2.1/docker/leaderboard/make_docker.sh new file mode 100755 index 0000000..46fa180 --- /dev/null +++ b/leaderboard_2.1/docker/leaderboard/make_docker.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +DOC_STRING="Build leaderboard docker image." + +USAGE_STRING=$(cat <<- END +Usage: $0 [-h|--help] [-t|--target-name TARGET] + +The default target name is "leaderboard-20" + +The following env variables are mandatory: + * CARLA_ROOT + * CHALLENGE_CONTENTS_ROOT +END +) + +usage() { echo "${DOC_STRING}"; echo "${USAGE_STRING}"; exit 1; } + +# Defaults +TARGET_NAME="leaderboard-20" + +while [[ $# -gt 0 ]]; do + case "$1" in + -t | --target-name ) + TARGET_NAME=$2 + shift 2 ;; + -h | --help ) + usage + ;; + * ) + shift ;; + esac +done + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +if [ -z "$CARLA_ROOT" ] +then + echo "Error $CARLA_ROOT is empty. Set \$CARLA_ROOT as an environment variable first." + exit 1 +fi +echo "Using CARLA version: '$CARLA_ROOT'" + +if [ -z "$CHALLENGE_CONTENTS_ROOT" ] +then echo "Error $CHALLENGE_CONTENTS_ROOT is empty. Set \$CHALLENGE_CONTENTS_ROOT as an environment variable first." + exit 1 +fi + +rm -fr .lbtmp +mkdir .lbtmp +mkdir -p .lbtmp/team_code + +echo "Copying CARLA Python API" +cp -fr ${CARLA_ROOT}/PythonAPI .lbtmp +mv .lbtmp/PythonAPI/carla/dist/carla*-py2*.egg .lbtmp/PythonAPI/carla/dist/carla-leaderboard-py2.7.egg +mv .lbtmp/PythonAPI/carla/dist/carla*-py3*.egg .lbtmp/PythonAPI/carla/dist/carla-leaderboard-py3x.egg + +echo "Copying Scenario Runner" +cp -fr ${SCRIPT_DIR}/submodules/scenario_runner .lbtmp +rm -fr .lbtmp/scenario_runner/.git + +echo "Copying Leaderboard" +cp -fr ${SCRIPT_DIR}/submodules/leaderboard .lbtmp +rm -fr .lbtmp/leaderboard/.git + +echo "Copying CARLA's private data" +cp ${CHALLENGE_CONTENTS_ROOT}/src/leaderboard_20/data/* .lbtmp/leaderboard/data +cp ${CHALLENGE_CONTENTS_ROOT}/src/leaderboard_20/data/parked_vehicles.py .lbtmp/leaderboard/leaderboard/utils/parked_vehicles.py +cp ${SCRIPT_DIR}/run_leaderboard.sh .lbtmp/leaderboard/ + +echo "Copying GPU utils" +cp -fr ${SCRIPT_DIR}/gpu_utils .lbtmp + +# build docker image +echo "Building docker" +docker build --force-rm -t ${TARGET_NAME} -f ${SCRIPT_DIR}/Dockerfile .lbtmp + +rm -fr .lbtmp diff --git a/leaderboard_2.1/docker/leaderboard/run_leaderboard.sh b/leaderboard_2.1/docker/leaderboard/run_leaderboard.sh new file mode 100755 index 0000000..5f418b5 --- /dev/null +++ b/leaderboard_2.1/docker/leaderboard/run_leaderboard.sh @@ -0,0 +1,204 @@ +#!/bin/bash + +# Get the file names of this attempt +ID="$WORKER_ID" +CRASH_ID=$(find /tmp/status -name *agent-$ID.crash* | wc -l) + +AGENT_LOGS="/tmp/logs/agent-$ID.log" +AGENT_START_FILE="/tmp/status/agent-$ID.start$CRASH_ID" +AGENT_DONE_FILE="/tmp/status/agent-$ID.done" +AGENT_CRASH_FILE="/tmp/status/agent-$ID.crash$CRASH_ID" + +SIMULATOR_START_FILE="/tmp/status/simulator-$ID.start$CRASH_ID" +SIMULATOR_DONE_FILE="/tmp/status/simulator-$ID.done" +SIMULATOR_CRASH_FILE="/tmp/status/simulator-$ID.crash$CRASH_ID" + +SIMULATION_CANCEL_FILE="/tmp/status/simulation-$ID.cancel" + +AGENT_RESULTS="/tmp/agent/partial_agent_results$ID.json" + +GPU_DEVICE_FILE="/gpu/uuid.txt$CRASH_ID" + +MAX_IDLE=800 + +####################### +## DEFAULT VARIABLES ## +####################### +export CARLA_ROOT="/workspace/CARLA" +export SCENARIO_RUNNER_ROOT="/workspace/scenario_runner" +export LEADERBOARD_ROOT="/workspace/leaderboard" +export PYTHONPATH="${CARLA_ROOT}/PythonAPI/carla/dist/$(ls ${CARLA_ROOT}/PythonAPI/carla/dist | grep py3.):${SCENARIO_RUNNER_ROOT}":"${LEADERBOARD_ROOT}":${PYTHONPATH} + +############################ +## LEADERBOARD PARAMETERS ## +############################ +[[ -z "${CHALLENGE_TRACK_CODENAME}" ]] && export CHALLENGE_TRACK_CODENAME="SENSORS" +export ROUTES="/workspace/leaderboard/data/routes_testing.xml" +if [[ "$CHALLENGE_TRACK_CODENAME" == *"QUALIFIER"* ]]; then + export ROUTES="/workspace/leaderboard/data/routes_qualifier.xml" +fi +[[ -z "${REPETITIONS}" ]] && export REPETITIONS="1" +[[ -z "${RESUME}" ]] && export RESUME="" + +export CHECKPOINT_ENDPOINT=$AGENT_RESULTS +export RECORD_PATH="/home/carla/recorder" + +export DEBUG_CHALLENGE="0" +export DEBUG_CHECKPOINT_ENDPOINT="/workspace/leaderboard/live_results.txt" + +############################ +## LEADERBOARD EXECUTION ## +############################ + +# Save all the outpus into a file, which will be sent to s3 +exec > >(tee -a "$AGENT_LOGS") 2>&1 + +if [ -f "$AGENT_LOGS" ]; then + echo "" + echo "Found partial agent logs" +fi + +# GPU assignment +bash /gpu/get_gpu_uuid.sh > $GPU_DEVICE_FILE + +echo "" +export NVIDIA_VISIBLE_DEVICES=$(/gpu/get_gpu_device.sh ${GPU_DEVICE_FILE}) +UUID=$(cat ${GPU_DEVICE_FILE}) +echo "Using GPU: ${UUID} (${NVIDIA_VISIBLE_DEVICES})" +echo "" + +# Check for any previous trial. If so resume +if [ $CRASH_ID -gt 0 ]; then + PREVIOUS_AGENT_CRASH_FILE="/tmp/status/agent-$ID.crash$(($CRASH_ID - 1))" + if [ -f $PREVIOUS_AGENT_CRASH_FILE ]; then + echo "Found the agent failure file. Resuming..." + export RESUME="1" + fi +else + echo "Found no agent failure file" +fi + +if [ -f "$AGENT_RESULTS" ]; then + echo "Found partial agent results file. Resuming..." + export RESUME="1" + cat $AGENT_RESULTS + echo "" +fi + +# Get the modification date of a file +file_age () { + echo "$(($(date +%s) - $(stat -c %Y "$1" )))" +} + +# Stop all processes +kill_all_processes() { + # Avoid exiting the script if pkill fails. We are not setting exit on error + # (i.e., set -e) but users may enable it during agent_sources.sh + pkill -9 'python|java|ros|publisher|catkin' || true +} + +# Ending function before exitting the container +kill_and_wait_for_simulator () { + kill_all_processes + + if [ "$1" = "crash" ]; then + echo "Creating the agent crash file" + touch $AGENT_CRASH_FILE + else + echo "Creating the agent done file" + touch $AGENT_DONE_FILE + fi + + echo "Waiting for the simulator to finish..." + for ((i=1;i<=60;i++)); do + [ -f $SIMULATOR_CRASH_FILE ] && break + [ -f $SIMULATOR_DONE_FILE ] && break + sleep 10 + done + + if [ "$1" = "crash" ]; then + echo "Detected that the simulator has finished. Exiting with crash..." + else + echo "Detected that the simulator has finished. Exiting with success..." + fi +} + +echo "Sourcing '${HOME}/agent_sources.sh'" +source ${HOME}/.bashrc +if [[ -f "${HOME}/agent_sources.sh" ]]; then + source ${HOME}/agent_sources.sh +fi + +echo "Waiting for the simulator container to start..." +MAX_RETRIES=120 # wait 1h maximum +for ((i=1;i<=$MAX_RETRIES;i++)); do + if [ -f $SIMULATOR_START_FILE ]; then + echo "" + echo "Detected that the simulator container has started" + break + fi + sleep 30 +done + +if ! [ -f $SIMULATOR_START_FILE ]; then + echo "The simulator has not started. Stopping..." + kill_and_wait_for_simulator crash + exit 1 +fi + +echo "Starting the Leaderboard" + +# To ensure the Leaderboard never blocks, run it in the background (Done using the '&' at the end) +# while monitoring the changes to the results to know when it has finished. +python3 -u ${LEADERBOARD_ROOT}/leaderboard/leaderboard_evaluator.py \ + --agent=${TEAM_AGENT} \ + --agent-config=${TEAM_CONFIG} \ + --track=${CHALLENGE_TRACK_CODENAME} \ + --routes=${ROUTES} \ + --routes-subset=${ROUTES_SUBSET} \ + --repetitions=${REPETITIONS} \ + --resume=${RESUME} \ + --checkpoint=${CHECKPOINT_ENDPOINT} \ + --record=${RECORD_PATH} \ + --debug-checkpoint=${DEBUG_CHECKPOINT_ENDPOINT} \ + --debug=${DEBUG_CHALLENGE} & + +touch $AGENT_START_FILE + +while sleep 5 ; do + if [ "$(file_age $AGENT_LOGS)" -gt "$MAX_IDLE" ]; then + echo "" + echo "Detected no new outputs for $AGENT_LOGS during $MAX_IDLE seconds. Stopping..." + break + fi + if ! pgrep -f leaderboard_evaluator | egrep -q -v '^1$'; then + echo "" + echo "Detected that the leaderboard has finished" + break + fi + if [ -f $SIMULATOR_CRASH_FILE ]; then + echo "" + echo "Detected that the simulator has crashed. Stopping..." + break + fi + if [ -f $SIMULATION_CANCEL_FILE ]; then + echo "" + echo "Detected that the submission has been cancelled. Stopping..." + kill_all_processes + exit 0 + fi +done + +sleep 5 + +echo "" +echo "Validating the Leaderboard results..." +if ! [ -f $AGENT_RESULTS ] || grep -wq "global_record\": {}" $AGENT_RESULTS; then + echo "Detected missing global records" + kill_and_wait_for_simulator crash + exit 1 +else + echo "Detected correct global records" + kill_and_wait_for_simulator + exit 0 +fi diff --git a/leaderboard_2.1/docker/leaderboard/submodules/leaderboard b/leaderboard_2.1/docker/leaderboard/submodules/leaderboard new file mode 160000 index 0000000..edaa85e --- /dev/null +++ b/leaderboard_2.1/docker/leaderboard/submodules/leaderboard @@ -0,0 +1 @@ +Subproject commit edaa85e81225a78ce9f468df1293df79bfc5e377 diff --git a/leaderboard_2.1/docker/leaderboard/submodules/scenario_runner b/leaderboard_2.1/docker/leaderboard/submodules/scenario_runner new file mode 160000 index 0000000..d7bcaf0 --- /dev/null +++ b/leaderboard_2.1/docker/leaderboard/submodules/scenario_runner @@ -0,0 +1 @@ +Subproject commit d7bcaf0dee05fdfd720bda45740b8c9f11ee478a diff --git a/leaderboard_2.1/docker/monitor/Dockerfile b/leaderboard_2.1/docker/monitor/Dockerfile new file mode 100644 index 0000000..83aff6c --- /dev/null +++ b/leaderboard_2.1/docker/monitor/Dockerfile @@ -0,0 +1,24 @@ +FROM python:3.7 + +WORKDIR /workspace + +RUN apt-get update \ + && apt-get install -y --no-install-recommends jq \ + && rm -rf /var/lib/apt/lists/* + +# Install aws cli v2 +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ + && unzip awscliv2.zip \ + && ./aws/install \ + && rm -rf awscliv2.zip + +RUN pip3 install \ + requests \ + dictor + +COPY run_monitor.sh /workspace/run_monitor.sh +COPY evalai/generate_results.py /workspace/evalai/generate_results.py +COPY evalai/generate_stdout.py /workspace/evalai/generate_stdout.py +COPY evalai/generate_metadata.py /workspace/evalai/generate_metadata.py + +CMD ["/bin/bash"] diff --git a/leaderboard_2.1/docker/monitor/evalai/generate_metadata.py b/leaderboard_2.1/docker/monitor/evalai/generate_metadata.py new file mode 100644 index 0000000..06baae2 --- /dev/null +++ b/leaderboard_2.1/docker/monitor/evalai/generate_metadata.py @@ -0,0 +1,20 @@ +import argparse +import shutil +import os + +def main(): + """ + Generate the EvalAI metadata, which is jsut a copy of the agent results + """ + argparser = argparse.ArgumentParser() + argparser.add_argument('-f', '--file-path', required=True, help='path to all the files containing the partial results') + argparser.add_argument('-e', '--endpoint', required=True, help='path to the endpoint containing the joined results') + args = argparser.parse_args() + + if os.path.exists(args.file_path): + shutil.copyfile(args.file_path, args.endpoint) + else: + print(f"Couldn't generate the metadata, missing input file '{args.file_path}'") + +if __name__ == '__main__': + main() diff --git a/leaderboard_2.1/docker/monitor/evalai/generate_results.py b/leaderboard_2.1/docker/monitor/evalai/generate_results.py new file mode 100644 index 0000000..3464225 --- /dev/null +++ b/leaderboard_2.1/docker/monitor/evalai/generate_results.py @@ -0,0 +1,71 @@ +import argparse + +from leaderboard.utils.checkpoint_tools import fetch_dict, save_dict +import sys + +def main(): + """ + Extract the global scores and infractions and parse it to the evalai format + """ + argparser = argparse.ArgumentParser() + argparser.add_argument('-f', '--file-path', required=True, help='path to all the files containing the partial results') + argparser.add_argument('-e', '--endpoint', required=True, help='path to the endpoint containing the joined results') + args = argparser.parse_args() + + # Base output + output = [ + { + "split": "leaderboard", + "show_to_participant": True, + "accuracies": { + "Driving score": 0, + "Route completion": 0, + "Infraction penalty": 0, + "Collisions pedestrians": 0, + "Collisions vehicles": 0, + "Collisions layout": 0, + "Red light infractions": 0, + "Stop sign infractions": 0, + "Off-road infractions": 0, + "Route deviations": 0, + "Route timeouts": 0, + "Agent blocked": 0, + "Yield emergency vehicle infractions": 0, + "Scenario timeouts": 0, + "Min speed infractions": 0, + } + } + ] + + data = fetch_dict(args.file_path) + if '_checkpoint' in data: + global_records = data['_checkpoint']['global_record'] + if global_records: + output = [ + { + "split": "leaderboard", + "show_to_participant": True, + "accuracies": { + "Driving score": global_records['scores_mean']['score_composed'], + "Route completion": global_records['scores_mean']['score_route'], + "Infraction penalty": global_records['scores_mean']['score_penalty'], + "Collisions pedestrians": global_records['infractions']['collisions_pedestrian'], + "Collisions vehicles": global_records['infractions']['collisions_vehicle'], + "Collisions layout": global_records['infractions']['collisions_layout'], + "Red light infractions": global_records['infractions']['red_light'], + "Stop sign infractions": global_records['infractions']['stop_infraction'], + "Off-road infractions": global_records['infractions']['outside_route_lanes'], + "Route deviations": global_records['infractions']['route_dev'], + "Route timeouts": global_records['infractions']['route_timeout'], + "Agent blocked": global_records['infractions']['vehicle_blocked'], + "Yield emergency vehicle infractions": global_records['infractions']['yield_emergency_vehicle_infractions'], + "Scenario timeouts": global_records['infractions']['scenario_timeouts'], + "Min speed infractions": global_records['infractions']['min_speed_infractions'], + } + } + ] + + save_dict(args.endpoint, output) + +if __name__ == '__main__': + main() diff --git a/leaderboard_2.1/docker/monitor/evalai/generate_stdout.py b/leaderboard_2.1/docker/monitor/evalai/generate_stdout.py new file mode 100644 index 0000000..21bb2c4 --- /dev/null +++ b/leaderboard_2.1/docker/monitor/evalai/generate_stdout.py @@ -0,0 +1,123 @@ +import argparse + +from leaderboard.utils.checkpoint_tools import fetch_dict +import sys + +PRETTY_SENSORS = { + 'carla_camera': 'RGB Camera', + 'carla_lidar': 'LIDAR', + 'carla_radar': 'Radar', + 'carla_gnss': 'GNSS', + 'carla_imu': 'IMU', + 'carla_opendrive_map': 'OpenDrive Map', + 'carla_speedometer': 'Speedometer' +} + +ROUND_RATIO_DIGITS = 4 + + +def main(): + """ + Extract some global and route records into readable format + """ + argparser = argparse.ArgumentParser() + argparser.add_argument('-f', '--file-path', required=True, help='path to all the files containing the partial results') + argparser.add_argument('-e', '--endpoint', required=True, help='path to the endpoint containing the joined results') + args = argparser.parse_args() + + data = fetch_dict(args.file_path) + + if not data or 'sensors' not in data or '_checkpoint' not in data \ + or 'progress' not in data['_checkpoint'] or 'records' not in data['_checkpoint']: + pretty_output = "Initializing the submission, no data avaialable yet.\n" + pretty_output += "More information will be found here once the submission starts running.\n" + with open(args.endpoint, 'w') as fd: + fd.write(pretty_output) + sys.exit(0) + + global_record = data['_checkpoint']['global_record'] + if global_record: + pretty_output = "Here is a summary of the submission's results\n\n" + else: + pretty_output = "Here is a summary of the submission's current results\n\n" + pretty_output += "General information:\n" + + # Sensors + pretty_output += "- Sensors:\n" + sensors = {} + for sensor in data['sensors']: + pretty_sensor = PRETTY_SENSORS[sensor] + if pretty_sensor in sensors: + sensors[pretty_sensor] += 1 + else: + sensors[pretty_sensor] = 1 + for sensor_type, sensor_number in sensors.items(): + pretty_output += f" - {sensor_number} {sensor_type}\n" + + # Completed routes + completed_routes, total_routes = data['_checkpoint']['progress'] + if completed_routes == total_routes: + pretty_output += f"- All {total_routes} route have been completed\n" + else: + pretty_output += f"- Completed {completed_routes} out of the {total_routes} routes\n" + + # Routes data + total_duration_game = 0 + total_duration_system = 0 + route_records = [] + for record in data['_checkpoint']['records']: + ratio = 0 if record['meta']['duration_system'] == 0 else record['meta']['duration_game']/record['meta']['duration_system'] + route_records.append({ + "route_id": record['route_id'], + "index": record['index'], + "status": record['status'], + "ratio": round(ratio, ROUND_RATIO_DIGITS), + "fps": round(20*ratio, ROUND_RATIO_DIGITS), + }) + + total_duration_game += record['meta']['duration_game'] + total_duration_system += record['meta']['duration_system'] + + # General duration + ratio = round(0.0 if total_duration_system == 0 else total_duration_game / total_duration_system, ROUND_RATIO_DIGITS) + fps = round(20 * ratio, ROUND_RATIO_DIGITS) + pretty_output += f"- Submission ratio of {ratio}x\n" + pretty_output += f"- Submission FPS of {fps}\n" + + if global_record: + pretty_output += f"- Results:\n" + pretty_output += f" - Driving score: {global_record['scores_mean']['score_composed']}\n" + pretty_output += f" - Route completion: {global_record['scores_mean']['score_route']}\n" + pretty_output += f" - Infraction penalty: {global_record['scores_mean']['score_penalty']}\n" + pretty_output += f" - Collisions pedestrians: {global_record['infractions']['collisions_pedestrian']}\n" + pretty_output += f" - Collisions vehicles: {global_record['infractions']['collisions_vehicle']}\n" + pretty_output += f" - Collisions layout: {global_record['infractions']['collisions_layout']}\n" + pretty_output += f" - Red light infractions: {global_record['infractions']['red_light']}\n" + pretty_output += f" - Stop sign infractions: {global_record['infractions']['stop_infraction']}\n" + pretty_output += f" - Off-road infractions: {global_record['infractions']['outside_route_lanes']}\n" + pretty_output += f" - Route deviations: {global_record['infractions']['route_dev']}\n" + pretty_output += f" - Route timeouts: {global_record['infractions']['route_timeout']}\n" + pretty_output += f" - Agent blocked: {global_record['infractions']['vehicle_blocked']}\n" + pretty_output += f" - Yield emergency vehicle infractions: {global_record['infractions']['yield_emergency_vehicle_infractions']}\n" + pretty_output += f" - Scenario timeouts: {global_record['infractions']['scenario_timeouts']}\n" + pretty_output += f" - Min speed infractions: {global_record['infractions']['min_speed_infractions']}\n" + + pretty_output += "\n" + + # Route data + if route_records: + pretty_output += "Glossary of each route:\n" + + for route in route_records: + pretty_output += "\n" + pretty_output += f"- Index: {route['index']}\n" + pretty_output += f" - Route ID: {route['route_id']}\n" + pretty_output += f" - Status: {route['status']}\n" + pretty_output += f" - Ratio: {route['ratio']}x\n" + pretty_output += f" - FPS: {route['fps']}\n" + + with open(args.endpoint, 'w') as fd: + fd.write(pretty_output) + +if __name__ == '__main__': + main() diff --git a/leaderboard_2.1/docker/monitor/make_docker.sh b/leaderboard_2.1/docker/monitor/make_docker.sh new file mode 100755 index 0000000..e9d858e --- /dev/null +++ b/leaderboard_2.1/docker/monitor/make_docker.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +DOC_STRING="Build monitor docker image." + +USAGE_STRING=$(cat <<- END +Usage: $0 [-h|--help] [-t|--target-name TARGET] + +The default target name is "monitor-20" +END +) + +usage() { echo "${DOC_STRING}"; echo "${USAGE_STRING}"; exit 1; } + +# Defaults +TARGET_NAME="monitor-20" + +while [[ $# -gt 0 ]]; do + case "$1" in + -t | --target-name ) + TARGET_NAME=$2 + shift 2 ;; + -h | --help ) + usage + ;; + * ) + shift ;; + esac +done + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Build docker image +echo "Building monitor docker" +docker build --force-rm \ + -t ${TARGET_NAME} \ + -f ${SCRIPT_DIR}/Dockerfile ${SCRIPT_DIR} diff --git a/leaderboard_2.1/docker/monitor/run_monitor.sh b/leaderboard_2.1/docker/monitor/run_monitor.sh new file mode 100755 index 0000000..070fa75 --- /dev/null +++ b/leaderboard_2.1/docker/monitor/run_monitor.sh @@ -0,0 +1,168 @@ +#!/bin/bash + +####################### +## DEFAULT VARIABLES ## +####################### +export SCENARIO_RUNNER_ROOT="/utils/scenario_runner" +export LEADERBOARD_ROOT="/utils/leaderboard" +export PYTHONPATH="${SCENARIO_RUNNER_ROOT}":"${LEADERBOARD_ROOT}":${PYTHONPATH} + +MONITOR_LOGS="/logs/logs/monitor.log" +MONITOR_DONE_FILE="/logs/containers-status/monitor.done" +SIMULATION_CANCEL_FILE="/logs/containers-status/simulation.cancel" + +AGENT_RESULTS_FILE="/logs/agent_results.json" + +EVALAI_FOLDER="/logs/evalai" +EVALAI_RESULTS_FILE="$EVALAI_FOLDER/results.json" +EVALAI_STDOUT_FILE="$EVALAI_FOLDER/stdout.txt" +EVALAI_METADATA_FILE="$EVALAI_FOLDER/metadata.json" + +PARTIAL_FOLDER="/partial/logs" +PARTIAL_AGENT_FOLDER="$PARTIAL_FOLDER/agent" +PARTIAL_AGENT_RESULTS_FILES=$(for (( i=1; i<=$SUBMISSION_WORKERS; i++ )); do printf "$PARTIAL_AGENT_FOLDER/partial_agent_results%d.json " $i; done) + +PARTIAL_CONTAINERS_STATUS="$PARTIAL_FOLDER/containers-status" + +########### +## UTILS ## +########### +update_database() { + START_TIME=$(date +"%Y-%m-%d %T %Z") + aws dynamodb update-item \ + --table-name "$DYNAMODB_SUBMISSIONS_TABLE" \ + --key '{"team_id": {"S": "'"${TEAM_ID}"'" }, "submission_id": {"S": "'"${SUBMISSION_ID}"'"} }' \ + --update-expression "SET submission_status = :s, results = :r, start_time = :t" \ + --expression-attribute-values '{":s": {"S": "RUNNING"}, ":r": {"S": "'"s3://${S3_BUCKET}/${SUBMISSION_ID}"'"}, ":t": {"S": "'"${START_TIME}"'"}}' +} + +merge_statistics() { + python3.7 ${LEADERBOARD_ROOT}/scripts/merge_statistics.py \ + --file-paths $PARTIAL_AGENT_RESULTS_FILES \ + --endpoint $AGENT_RESULTS_FILE +} + +generate_evalai_files() { + python3.7 /workspace/evalai/generate_stdout.py \ + --file-path $AGENT_RESULTS_FILE \ + --endpoint $EVALAI_STDOUT_FILE + python3.7 /workspace/evalai/generate_results.py \ + --file-path $AGENT_RESULTS_FILE \ + --endpoint $EVALAI_RESULTS_FILE + python3.7 /workspace/evalai/generate_metadata.py \ + --file-path $AGENT_RESULTS_FILE \ + --endpoint $EVALAI_METADATA_FILE +} + +push_to_s3() { + aws s3 sync /logs s3://${S3_BUCKET}/${SUBMISSION_ID} --no-progress +} + +pull_from_s3_containers_status() { + aws s3 sync s3://${S3_BUCKET}/${SUBMISSION_ID}/containers-status ${PARTIAL_CONTAINERS_STATUS} --no-progress +} + +pull_from_s3_partial_agent_results() { + aws s3 sync s3://${S3_BUCKET}/${SUBMISSION_ID}/agent ${PARTIAL_AGENT_FOLDER} --no-progress +} + +get_submission_status() { + ADDR="$EVALAI_API_SERVER/api/jobs/submission/$SUBMISSION_ID" + HEADER="Authorization: Bearer $EVALAI_AUTH_TOKEN" + STATUS=$(curl --max-time 600 --silent --location --request GET "$ADDR" --header "${HEADER}" | jq ".status" | sed 's:^.\(.*\).$:\1:') + echo $STATUS +} + +update_partial_submission_status() { + STDOUT_STR=$(jq -n -c --arg m "$(cat $EVALAI_STDOUT_FILE)" '$m') + RESULTS_STR=$(jq -n -c --arg m "$(cat $EVALAI_RESULTS_FILE)" '$m') + METADATA_STR=$(jq -n -c --arg m "$(cat $EVALAI_METADATA_FILE)" '$m') + + ADDR="$EVALAI_API_SERVER/api/jobs/challenges/$CHALLENGE_ID/update_partially_evaluated_submission/" + HEADER="Authorization: Bearer $EVALAI_AUTH_TOKEN" + DATA='{"submission": '"$SUBMISSION_ID"', + "submission_status": "PARTIALLY_EVALUATED", + "challenge_phase": '"$TRACK_ID"', + "stdout": '"$STDOUT_STR"', + "result": '"$RESULTS_STR"', + "stderr": "", + "metadata": '"$METADATA_STR"'}' + curl --max-time 600 --silent --location --request PUT "$ADDR" --header "$HEADER" --header 'Content-Type: application/json' --data-raw "$DATA" +} + +######################## +## MONITOR PARAMETERS ## +######################## +[[ -z "${MONITOR_PERIOD}" ]] && export MONITOR_PERIOD="30" +[ -f $SIMULATION_CANCEL_FILE ] && rm $SIMULATION_CANCEL_FILE + +# Save all the outpus into a file, which will be sent to s3 +exec > >(tee -a "$MONITOR_LOGS") 2>&1 + +if [ -f "$MONITOR_LOGS" ]; then + echo "" + echo "Found partial monitor logs" +fi + +update_database + +while sleep ${MONITOR_PERIOD} ; do + echo "" + echo "[$(date +"%Y-%m-%d %T")] Starting monitor loop" + + echo "> Pulling containers status" + pull_from_s3_containers_status + + echo "> Checking start condition" + START_FILES=$(find $PARTIAL_CONTAINERS_STATUS -name *.start* | wc -l) + if [ $START_FILES -gt 0 ]; then + echo "Detected that $START_FILES out of the $SUBMISSION_WORKERS submission workers have started." + else + echo "Detected that no submission workers have started. Waiting..." + continue + fi + + echo "> Pulling partial agent results" + pull_from_s3_partial_agent_results + + echo "> Merging statistics" + merge_statistics + + echo "> Generating EvalAI files" + generate_evalai_files + + echo "> Pushing to S3" + push_to_s3 + + echo "> Checking if the submission has been cancelled" + if [[ $(get_submission_status) == "cancelled" ]] ; then + echo "Detected that the submission has been cancelled. Stopping..." + touch $SIMULATION_CANCEL_FILE + push_to_s3 + break + fi + + echo "> Updating partial submission status" + update_partial_submission_status + + echo "> Checking end condition" + DONE_FILES=$(find $PARTIAL_CONTAINERS_STATUS -name *.done* | wc -l) + CRASH_FILES=$(find $PARTIAL_CONTAINERS_STATUS -name *.crash* | wc -l) + + DONE_WORKERS=$(($DONE_FILES / 3)) + CRASH_WORKERS=$(($CRASH_FILES / 4)) # 4 is the backofflimit of the submission_worker + + FINISHED_WORKERS=$(($DONE_WORKERS+$CRASH_WORKERS)) + + if [ $FINISHED_WORKERS -ge $SUBMISSION_WORKERS ]; then + echo "Detected that all containers have finished. Stopping..." + touch $MONITOR_DONE_FILE + merge_statistics + generate_evalai_files + push_to_s3 + break + else + echo "Detected that only $FINISHED_WORKERS out of the $SUBMISSION_WORKERS submission workers have finished. Waiting..." + fi + +done diff --git a/leaderboard_2.1/experiments/README.md b/leaderboard_2.1/experiments/README.md new file mode 100644 index 0000000..f90620b --- /dev/null +++ b/leaderboard_2.1/experiments/README.md @@ -0,0 +1,39 @@ +# Experiments + +## Overview + +### Exp0 + +Simulator and agent running in the same instance with only one GPU. + +Experiment | Instance type +--- | --- +0 | `g5.4xlarge` + +### Exp1 + +Simulator and agent running in different instances with one GPU each one. + +Experiment | Simulator instance type | Agent instance type +--- | --- | --- +1.1 | `g5.4xlarge` | `g5.4xlarge` +1.2 | `g5.4xlarge` | `g4dn.2xlarge` + + +### Exp2 + +Simulator and agent running in the same instance with 4 GPUs. We split the submission in two (i.e., 2 simulators and 2 agents running at the same time). + +Experiment | Instance type +--- | --- +2 | `g4dn.12xlarge` + + +## Results + +Experiment | Ratio | Real time [s] | Game time [s] | Route completion +--- | --- | --- | --- |--- | +0 | 0.26 | - | - | - +1.1 | 0.296 | - | - | - +1.2 | 0.222 | - | - | - +2 | 0.29 | - | - | - diff --git a/leaderboard_2.1/experiments/exp0.yaml b/leaderboard_2.1/experiments/exp0.yaml new file mode 100644 index 0000000..041abcc --- /dev/null +++ b/leaderboard_2.1/experiments/exp0.yaml @@ -0,0 +1,126 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: submission +spec: + template: + spec: + serviceAccountName: submission-worker + nodeSelector: + node.kubernetes.io/instance-type: g5.4xlarge + initContainers: + - name: leaderboard-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-testing:latest + command: [ "sh", "-c", "cp -r /workspace/leaderboard/* /tmp/leaderboard-master/"] + volumeMounts: + - mountPath: /tmp/leaderboard-master + name: leaderboard-master + - name: scenario-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-testing:latest + command: [ "sh", "-c", "cp -r /workspace/scenario_runner/* /tmp/scenario-runner-master/"] + volumeMounts: + - mountPath: /tmp/scenario-runner-master + name: scenario-runner-master + - name: carla-root-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-testing:latest + command: [ "sh", "-c", "cp -r /workspace/CARLA/* /tmp/carla-root-master/"] + volumeMounts: + - mountPath: /tmp/carla-root-master + name: carla-root-master + containers: + - name: simulator + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-simulator-testing:latest + command: ["/bin/bash", "-c"] + args: + - | + echo "Starting CARLA server" + ./CarlaUE4.sh -vulkan -RenderOffScreen -nosound -gpucrashdebugging + echo "Stopped CARLA server" + sleep 20000 + exit 0 + env: + - name: DISPLAY + value: ":0.0" + - name: OMP_PROC_BIND + value: "FALSE" + - name: OMP_NUM_THREADS + value: "64" + - name: SDL_VIDEODRIVER + value: "x11" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 + - name: agent + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/bm-586a194d-team-177:54a1db1b-d340-4de8-a25b-67aa756db0c6 + command: ["/bin/bash", "-c"] + args: + - | + echo "Sleeping a bit to ensure CARLA is ready" + sleep 60 + echo "Starting agent" + bash /workspace/leaderboard/scripts/run_evaluation.sh + echo "Finished agent" + sleep 86400 + exit 0 + volumeMounts: + - mountPath: /workspace/leaderboard + name: leaderboard-master + - mountPath: /workspace/scenario_runner + name: scenario-runner-master + - mountPath: /workspace/CARLA + name: carla-root-master + env: + - name: OMP_PROC_BIND + value: "FALSE" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: OMP_NUM_THREADS + value: "64" + - name: HTTP_PROXY + value: "" + - name: HTTPS_PROXY + value: "" + - name: LEADERBOARD_ROOT + value: "/workspace/leaderboard" + - name: CARLA_ROOT + value: "/workspace/CARLA" + - name: SCENARIO_RUNNER_ROOT + value: "/workspace/scenario_runner" + - name: ROUTES + value: "/workspace/leaderboard/data/routes_devtest.xml" + - name: ROUTES_SUBSET + value: "0" + - name: REPETITIONS + value: "1" + - name: CHALLENGE_TRACK_CODENAME + value: "SENSORS" + - name: CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/results.json" + - name: DEBUG_CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/live_results.txt" + - name: DEBUG_CHALLENGE + value: "0" + - name: RECORD_PATH + value: "/home/carla" + - name: RESUME + value: "" + # resources: + # limits: + # nvidia.com/gpu: 1 + restartPolicy: Never + volumes: + - name: leaderboard-master + emptyDir: {} + - name: scenario-runner-master + emptyDir: {} + - name: carla-root-master + emptyDir: {} + - name: x11 + hostPath: + path: /tmp/.X11-unix + backoffLimit: 0 diff --git a/leaderboard_2.1/experiments/exp1.yaml b/leaderboard_2.1/experiments/exp1.yaml new file mode 100644 index 0000000..56e0d30 --- /dev/null +++ b/leaderboard_2.1/experiments/exp1.yaml @@ -0,0 +1,269 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: submission-parallel +spec: + template: + spec: + serviceAccountName: submission-worker + nodeSelector: + node.kubernetes.io/instance-type: g5.12xlarge + initContainers: + - name: leaderboard-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-testing:latest + command: [ "sh", "-c", "cp -r /workspace/leaderboard/* /tmp/leaderboard-master/"] + volumeMounts: + - mountPath: /tmp/leaderboard-master + name: leaderboard-master + - name: scenario-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-testing:latest + command: [ "sh", "-c", "cp -r /workspace/scenario_runner/* /tmp/scenario-runner-master/"] + volumeMounts: + - mountPath: /tmp/scenario-runner-master + name: scenario-runner-master + - name: carla-root-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-testing:latest + command: [ "sh", "-c", "cp -r /workspace/CARLA/* /tmp/carla-root-master/"] + volumeMounts: + - mountPath: /tmp/carla-root-master + name: carla-root-master + containers: + # First set of simulator + agent + - name: simulator + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-simulator-testing:latest + command: ["/bin/bash", "-c"] + args: + - | + echo "Starting CARLA server" + ./CarlaUE4.sh -vulkan -carla-rpc-port=2000 -RenderOffScreen -nosound -ini:[/Script/Engine.RendererSettings]:r.GraphicsAdapter=0 + echo "Stopped CARLA server" + sleep 86400 + exit 0 + env: + - name: DISPLAY + value: ":0" + - name: OMP_PROC_BIND + value: "FALSE" + - name: OMP_NUM_THREADS + value: "64" + - name: SDL_VIDEODRIVER + value: "x11" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "0" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 + #cpu: 12.0 # 1000m is equal to 1 VCPU core + #cpu: "12000m" # 1000m is equal to 1 VCPU core + - name: agent + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/bm-586a194d-team-177:54a1db1b-d340-4de8-a25b-67aa756db0c6 + command: ["/bin/bash", "-c"] + args: + - | + echo "Sleeping a bit to ensure CARLA is ready" + sleep 30 + echo "Starting agent" + + echo "python3 ${LEADERBOARD_ROOT}/leaderboard/leaderboard_evaluator.py \ + --routes=${ROUTES} \ + --routes-subset=${ROUTES_SUBSET} \ + --repetitions=${REPETITIONS} \ + --track=${CHALLENGE_TRACK_CODENAME} \ + --checkpoint=${CHECKPOINT_ENDPOINT} \ + --debug-checkpoint=${DEBUG_CHECKPOINT_ENDPOINT} \ + --agent=${TEAM_AGENT} \ + --agent-config=${TEAM_CONFIG} \ + --debug=${DEBUG_CHALLENGE} \ + --record=${RECORD_PATH} \ + --resume=${RESUME} \ + --port=${CARLA_PORT}" \ + --traffic-manager-port=${CARLA_TM_PORT}> /workspace/leaderboard/scripts/run_evaluation.sh + + bash /workspace/leaderboard/scripts/run_evaluation.sh + echo "Finished agent" + sleep 86400 + exit 0 + volumeMounts: + - mountPath: /workspace/leaderboard + name: leaderboard-master + - mountPath: /workspace/scenario_runner + name: scenario-runner-master + - mountPath: /workspace/CARLA + name: carla-root-master + env: + - name: CARLA_PORT + value: "2000" + - name: CARLA_TM_PORT + value: "8000" + - name: OMP_PROC_BIND + value: "FALSE" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "2" + - name: OMP_NUM_THREADS + value: "64" + - name: HTTP_PROXY + value: "" + - name: HTTPS_PROXY + value: "" + - name: LEADERBOARD_ROOT + value: "/workspace/leaderboard" + - name: CARLA_ROOT + value: "/workspace/CARLA" + - name: SCENARIO_RUNNER_ROOT + value: "/workspace/scenario_runner" + - name: ROUTES + value: "/workspace/leaderboard/data/routes_devtest.xml" + - name: ROUTES_SUBSET + value: "0" + - name: REPETITIONS + value: "1" + - name: CHALLENGE_TRACK_CODENAME + value: "SENSORS" + - name: CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/results.json" + - name: DEBUG_CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/live_results.txt" + - name: DEBUG_CHALLENGE + value: "0" + - name: RECORD_PATH + value: "/home/carla" + - name: RESUME + value: "" + resources: + limits: + nvidia.com/gpu: 1 + #cpu: 12.0 # 1000m is equal to 1 VCPU core + # Second set of simulator + agent + - name: simulator-2 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-simulator-testing:latest + command: ["/bin/bash", "-c"] + args: + - | + echo "Starting CARLA server" + ./CarlaUE4.sh -vulkan -carla-rpc-port=3000 -RenderOffScreen -nosound -ini:[/Script/Engine.RendererSettings]:r.GraphicsAdapter=1 + echo "Stopped CARLA server" + sleep 86400 + exit 0 + env: + - name: DISPLAY + value: ":0" + - name: OMP_PROC_BIND + value: "FALSE" + - name: OMP_NUM_THREADS + value: "64" + - name: SDL_VIDEODRIVER + value: "x11" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "1" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 + #cpu: 12.0 # 1000m is equal to 1 VCPU core + - name: agent-2 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/bm-586a194d-team-177:54a1db1b-d340-4de8-a25b-67aa756db0c6 + command: ["/bin/bash", "-c"] + args: + - | + echo "Sleeping a bit to ensure CARLA is ready" + sleep 30 + echo "Starting agent" + + echo "python3 ${LEADERBOARD_ROOT}/leaderboard/leaderboard_evaluator.py \ + --routes=${ROUTES} \ + --routes-subset=${ROUTES_SUBSET} \ + --repetitions=${REPETITIONS} \ + --track=${CHALLENGE_TRACK_CODENAME} \ + --checkpoint=${CHECKPOINT_ENDPOINT} \ + --debug-checkpoint=${DEBUG_CHECKPOINT_ENDPOINT} \ + --agent=${TEAM_AGENT} \ + --agent-config=${TEAM_CONFIG} \ + --debug=${DEBUG_CHALLENGE} \ + --record=${RECORD_PATH} \ + --resume=${RESUME} \ + --port=${CARLA_PORT}" \ + --traffic-manager-port=${CARLA_TM_PORT} > /workspace/leaderboard/scripts/run_evaluation.sh + + bash /workspace/leaderboard/scripts/run_evaluation.sh + echo "Finished agent" + sleep 86400 + exit 0 + volumeMounts: + - mountPath: /workspace/leaderboard + name: leaderboard-master + - mountPath: /workspace/scenario_runner + name: scenario-runner-master + - mountPath: /workspace/CARLA + name: carla-root-master + env: + - name: CARLA_PORT + value: "3000" + - name: CARLA_TM_PORT + value: "9000" + - name: OMP_PROC_BIND + value: "FALSE" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "3" + - name: OMP_NUM_THREADS + value: "64" + - name: HTTP_PROXY + value: "" + - name: HTTPS_PROXY + value: "" + - name: LEADERBOARD_ROOT + value: "/workspace/leaderboard" + - name: CARLA_ROOT + value: "/workspace/CARLA" + - name: SCENARIO_RUNNER_ROOT + value: "/workspace/scenario_runner" + - name: ROUTES + value: "/workspace/leaderboard/data/routes_devtest.xml" + - name: ROUTES_SUBSET + value: "0" + - name: REPETITIONS + value: "1" + - name: CHALLENGE_TRACK_CODENAME + value: "SENSORS" + - name: CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/results.json" + - name: DEBUG_CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/live_results.txt" + - name: DEBUG_CHALLENGE + value: "0" + - name: RECORD_PATH + value: "/home/carla" + - name: RESUME + value: "" + resources: + limits: + nvidia.com/gpu: 1 + #cpu: 11 # 1000m is equal to 1 VCPU core + restartPolicy: Never + volumes: + - name: leaderboard-master + emptyDir: {} + - name: scenario-runner-master + emptyDir: {} + - name: carla-root-master + emptyDir: {} + - name: x11 + hostPath: + path: /tmp/.X11-unix + backoffLimit: 0 diff --git a/leaderboard_2.1/experiments/exp2-agent.yaml b/leaderboard_2.1/experiments/exp2-agent.yaml new file mode 100644 index 0000000..719f775 --- /dev/null +++ b/leaderboard_2.1/experiments/exp2-agent.yaml @@ -0,0 +1,119 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: agent-a92f57-exp2 + labels: + app: carla + session: a92f57-exp2 +spec: + template: + spec: + nodeSelector: + node.kubernetes.io/instance-type: g5.4xlarge + initContainers: + - name: leaderboard-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-testing:latest + command: [ "sh", "-c", "cp -r /workspace/leaderboard/* /tmp/leaderboard-master/"] + volumeMounts: + - mountPath: /tmp/leaderboard-master + name: leaderboard-master + - name: scenario-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-testing:latest + command: [ "sh", "-c", "cp -r /workspace/scenario_runner/* /tmp/scenario-runner-master/"] + volumeMounts: + - mountPath: /tmp/scenario-runner-master + name: scenario-runner-master + - name: carla-root-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-testing:latest + command: [ "sh", "-c", "cp -r /workspace/CARLA/* /tmp/carla-root-master/"] + volumeMounts: + - mountPath: /tmp/carla-root-master + name: carla-root-master + containers: + - name: agent + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/bm-586a194d-team-177:54a1db1b-d340-4de8-a25b-67aa756db0c6 + command: ["/bin/bash", "-c"] + args: + - | + echo "Sleeping a bit to ensure CARLA is ready" + sleep 1800 + echo "Starting agent" + + echo "python3 ${LEADERBOARD_ROOT}/leaderboard/leaderboard_evaluator.py \ + --routes=${ROUTES} \ + --routes-subset=${ROUTES_SUBSET} \ + --repetitions=${REPETITIONS} \ + --track=${CHALLENGE_TRACK_CODENAME} \ + --checkpoint=${CHECKPOINT_ENDPOINT} \ + --debug-checkpoint=${DEBUG_CHECKPOINT_ENDPOINT} \ + --agent=${TEAM_AGENT} \ + --agent-config=${TEAM_CONFIG} \ + --debug=${DEBUG_CHALLENGE} \ + --record=${RECORD_PATH} \ + --resume=${RESUME} \ + --host=${CARLA_AGENT_HOST} \ + --port=${CARLA_AGENT_PORT}" > /workspace/leaderboard/scripts/run_evaluation.sh + + bash /workspace/leaderboard/scripts/run_evaluation.sh + + echo "Finished agent" + sleep 86400 + exit 0 + volumeMounts: + - mountPath: /workspace/leaderboard + name: leaderboard-master + - mountPath: /workspace/scenario_runner + name: scenario-runner-master + - mountPath: /workspace/CARLA + name: carla-root-master + env: + - name: CARLA_AGENT_HOST + value: "simulator-a92f57-exp2" # TODO Requires automation + - name: CARLA_AGENT_PORT + value: "2000" + - name: OMP_PROC_BIND + value: "FALSE" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: OMP_NUM_THREADS + value: "64" + - name: HTTP_PROXY + value: "" + - name: HTTPS_PROXY + value: "" + - name: LEADERBOARD_ROOT + value: "/workspace/leaderboard" + - name: CARLA_ROOT + value: "/workspace/CARLA" + - name: SCENARIO_RUNNER_ROOT + value: "/workspace/scenario_runner" + - name: ROUTES + value: "/workspace/leaderboard/data/routes_devtest.xml" + - name: ROUTES_SUBSET + value: "0" + - name: REPETITIONS + value: "1" + - name: CHALLENGE_TRACK_CODENAME + value: "SENSORS" + - name: CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/results.json" + - name: DEBUG_CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/live_results.txt" + - name: DEBUG_CHALLENGE + value: "0" + - name: RECORD_PATH + value: "/home/carla" + - name: RESUME + value: "" + resources: + limits: + nvidia.com/gpu: "1" + restartPolicy: Never + volumes: + - name: leaderboard-master + emptyDir: {} + - name: scenario-runner-master + emptyDir: {} + - name: carla-root-master + emptyDir: {} + backoffLimit: 0 diff --git a/leaderboard_2.1/experiments/exp2-simulator.yaml b/leaderboard_2.1/experiments/exp2-simulator.yaml new file mode 100644 index 0000000..c38eabc --- /dev/null +++ b/leaderboard_2.1/experiments/exp2-simulator.yaml @@ -0,0 +1,61 @@ +apiVersion: v1 +kind: Pod +metadata: + name: simulator-a92f57-exp2 + labels: + app: carla + session: a92f57-exp2 +spec: + nodeSelector: + node.kubernetes.io/instance-type: g5.4xlarge + containers: + - name: simulator + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-simulator-testing:latest + command: ["/bin/bash", "-c"] + args: + - | + echo "Starting CARLA server" + ./CarlaUE4.sh -vulkan -RenderOffScreen -nosound + echo "Stopped CARLA server" + env: + - name: DISPLAY + value: ":0.0" + - name: OMP_PROC_BIND + value: "FALSE" + - name: OMP_NUM_THREADS + value: "64" + - name: SDL_VIDEODRIVER + value: "x11" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + resources: + limits: + nvidia.com/gpu: "1" + securityContext: + privileged: true + volumes: + - name: x11 + hostPath: + path: /tmp/.X11-unix +--- +apiVersion: v1 +kind: Service +metadata: + name: simulator-a92f57-exp2 + labels: + app: carla + session: a92f57-exp2 +spec: + selector: + app: carla + session: a92f57-exp2 + ports: + - name: up + protocol: TCP + port: 2000 + targetPort: 2000 + - name: down + protocol: TCP + port: 2001 + targetPort: 2001 diff --git a/leaderboard_2.1/experiments/exp3.yaml b/leaderboard_2.1/experiments/exp3.yaml new file mode 100644 index 0000000..53f45f5 --- /dev/null +++ b/leaderboard_2.1/experiments/exp3.yaml @@ -0,0 +1,495 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: submission-parallel +spec: + template: + spec: + serviceAccountName: submission-worker + nodeSelector: + node.kubernetes.io/instance-type: g5.12xlarge + initContainers: + - name: leaderboard-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-development:latest + command: [ "sh", "-c", "cp -r /workspace/leaderboard/* /tmp/leaderboard-master/"] + volumeMounts: + - mountPath: /tmp/leaderboard-master + name: leaderboard-master + - name: scenario-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-development:latest + command: [ "sh", "-c", "cp -r /workspace/scenario_runner/* /tmp/scenario-runner-master/"] + volumeMounts: + - mountPath: /tmp/scenario-runner-master + name: scenario-runner-master + - name: carla-root-copy + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-development:latest + command: [ "sh", "-c", "cp -r /workspace/CARLA/* /tmp/carla-root-master/"] + volumeMounts: + - mountPath: /tmp/carla-root-master + name: carla-root-master + containers: + # First set of simulator + agent + - name: simulator-1 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-simulator-development:latest + command: ["/bin/bash", "-c"] + args: + - | + echo "Starting CARLA server" + ./CarlaUE4.sh -vulkan -carla-rpc-port=2000 -RenderOffScreen -nosound -ini:[/Script/Engine.RendererSettings]:r.GraphicsAdapter=0 + echo "Stopped CARLA server" + sleep 86400 + exit 0 + env: + - name: DISPLAY + value: ":0" + - name: OMP_PROC_BIND + value: "FALSE" + - name: OMP_NUM_THREADS + value: "64" + - name: SDL_VIDEODRIVER + value: "x11" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "0" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 + #cpu: 12.0 # 1000m is equal to 1 VCPU core + #cpu: "12000m" # 1000m is equal to 1 VCPU core + - name: agent-1 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/bm-586a194d-team-177:54a1db1b-d340-4de8-a25b-67aa756db0c6 + command: ["/bin/bash", "-c"] + args: + - | + echo "Sleeping a bit to ensure CARLA is ready" + sleep 30 + echo "Starting agent" + + echo "python3 ${LEADERBOARD_ROOT}/leaderboard/leaderboard_evaluator.py \ + --routes=${ROUTES} \ + --routes-subset=${ROUTES_SUBSET} \ + --repetitions=${REPETITIONS} \ + --track=${CHALLENGE_TRACK_CODENAME} \ + --checkpoint=${CHECKPOINT_ENDPOINT} \ + --debug-checkpoint=${DEBUG_CHECKPOINT_ENDPOINT} \ + --agent=${TEAM_AGENT} \ + --agent-config=${TEAM_CONFIG} \ + --debug=${DEBUG_CHALLENGE} \ + --record=${RECORD_PATH} \ + --resume=${RESUME} \ + --port=${CARLA_PORT}" \ + --traffic-manager-port=${CARLA_TM_PORT}> /workspace/leaderboard/scripts/run_evaluation.sh + + bash /workspace/leaderboard/scripts/run_evaluation.sh + echo "Finished agent" + sleep 86400 + exit 0 + volumeMounts: + - mountPath: /workspace/leaderboard + name: leaderboard-master + - mountPath: /workspace/scenario_runner + name: scenario-runner-master + - mountPath: /workspace/CARLA + name: carla-root-master + env: + - name: CARLA_PORT + value: "2000" + - name: CARLA_TM_PORT + value: "8000" + - name: OMP_PROC_BIND + value: "FALSE" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "0" + - name: OMP_NUM_THREADS + value: "64" + - name: HTTP_PROXY + value: "" + - name: HTTPS_PROXY + value: "" + - name: LEADERBOARD_ROOT + value: "/workspace/leaderboard" + - name: CARLA_ROOT + value: "/workspace/CARLA" + - name: SCENARIO_RUNNER_ROOT + value: "/workspace/scenario_runner" + - name: ROUTES + value: "/workspace/leaderboard/data/routes_devtest.xml" + - name: ROUTES_SUBSET + value: "0" + - name: REPETITIONS + value: "1" + - name: CHALLENGE_TRACK_CODENAME + value: "SENSORS" + - name: CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/results.json" + - name: DEBUG_CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/live_results.txt" + - name: DEBUG_CHALLENGE + value: "0" + - name: RECORD_PATH + value: "/home/carla" + - name: RESUME + value: "" + # resources: + # limits: + # nvidia.com/gpu: 1 + #cpu: 12.0 # 1000m is equal to 1 VCPU core + # Second set of simulator + agent + - name: simulator-2 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-simulator-development:latest + command: ["/bin/bash", "-c"] + args: + - | + echo "Starting CARLA server" + ./CarlaUE4.sh -vulkan -carla-rpc-port=3000 -RenderOffScreen -nosound -ini:[/Script/Engine.RendererSettings]:r.GraphicsAdapter=1 + echo "Stopped CARLA server" + sleep 86400 + exit 0 + env: + - name: DISPLAY + value: ":0" + - name: OMP_PROC_BIND + value: "FALSE" + - name: OMP_NUM_THREADS + value: "64" + - name: SDL_VIDEODRIVER + value: "x11" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "1" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 + #cpu: 12.0 # 1000m is equal to 1 VCPU core + - name: agent-2 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/bm-586a194d-team-177:54a1db1b-d340-4de8-a25b-67aa756db0c6 + command: ["/bin/bash", "-c"] + args: + - | + echo "Sleeping a bit to ensure CARLA is ready" + sleep 30 + echo "Starting agent" + + echo "python3 ${LEADERBOARD_ROOT}/leaderboard/leaderboard_evaluator.py \ + --routes=${ROUTES} \ + --routes-subset=${ROUTES_SUBSET} \ + --repetitions=${REPETITIONS} \ + --track=${CHALLENGE_TRACK_CODENAME} \ + --checkpoint=${CHECKPOINT_ENDPOINT} \ + --debug-checkpoint=${DEBUG_CHECKPOINT_ENDPOINT} \ + --agent=${TEAM_AGENT} \ + --agent-config=${TEAM_CONFIG} \ + --debug=${DEBUG_CHALLENGE} \ + --record=${RECORD_PATH} \ + --resume=${RESUME} \ + --port=${CARLA_PORT}" \ + --traffic-manager-port=${CARLA_TM_PORT} > /workspace/leaderboard/scripts/run_evaluation.sh + + bash /workspace/leaderboard/scripts/run_evaluation.sh + echo "Finished agent" + sleep 86400 + exit 0 + volumeMounts: + - mountPath: /workspace/leaderboard + name: leaderboard-master + - mountPath: /workspace/scenario_runner + name: scenario-runner-master + - mountPath: /workspace/CARLA + name: carla-root-master + env: + - name: CARLA_PORT + value: "3000" + - name: CARLA_TM_PORT + value: "9000" + - name: OMP_PROC_BIND + value: "FALSE" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "1" + - name: OMP_NUM_THREADS + value: "64" + - name: HTTP_PROXY + value: "" + - name: HTTPS_PROXY + value: "" + - name: LEADERBOARD_ROOT + value: "/workspace/leaderboard" + - name: CARLA_ROOT + value: "/workspace/CARLA" + - name: SCENARIO_RUNNER_ROOT + value: "/workspace/scenario_runner" + - name: ROUTES + value: "/workspace/leaderboard/data/routes_devtest.xml" + - name: ROUTES_SUBSET + value: "0" + - name: REPETITIONS + value: "1" + - name: CHALLENGE_TRACK_CODENAME + value: "SENSORS" + - name: CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/results.json" + - name: DEBUG_CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/live_results.txt" + - name: DEBUG_CHALLENGE + value: "0" + - name: RECORD_PATH + value: "/home/carla" + - name: RESUME + value: "" + # resources: + # limits: + # nvidia.com/gpu: 1 + #cpu: 11 # 1000m is equal to 1 VCPU core + # Third set of simulator + agent + - name: simulator-3 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-simulator-development:latest + command: ["/bin/bash", "-c"] + args: + - | + echo "Starting CARLA server" + ./CarlaUE4.sh -vulkan -carla-rpc-port=4000 -RenderOffScreen -nosound -ini:[/Script/Engine.RendererSettings]:r.GraphicsAdapter=2 + echo "Stopped CARLA server" + sleep 86400 + exit 0 + env: + - name: DISPLAY + value: ":0" + - name: OMP_PROC_BIND + value: "FALSE" + - name: OMP_NUM_THREADS + value: "64" + - name: SDL_VIDEODRIVER + value: "x11" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "2" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 + #cpu: 12.0 # 1000m is equal to 1 VCPU core + - name: agent-3 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/bm-586a194d-team-177:54a1db1b-d340-4de8-a25b-67aa756db0c6 + command: ["/bin/bash", "-c"] + args: + - | + echo "Sleeping a bit to ensure CARLA is ready" + sleep 30 + echo "Starting agent" + + echo "python3 ${LEADERBOARD_ROOT}/leaderboard/leaderboard_evaluator.py \ + --routes=${ROUTES} \ + --routes-subset=${ROUTES_SUBSET} \ + --repetitions=${REPETITIONS} \ + --track=${CHALLENGE_TRACK_CODENAME} \ + --checkpoint=${CHECKPOINT_ENDPOINT} \ + --debug-checkpoint=${DEBUG_CHECKPOINT_ENDPOINT} \ + --agent=${TEAM_AGENT} \ + --agent-config=${TEAM_CONFIG} \ + --debug=${DEBUG_CHALLENGE} \ + --record=${RECORD_PATH} \ + --resume=${RESUME} \ + --port=${CARLA_PORT}" \ + --traffic-manager-port=${CARLA_TM_PORT} > /workspace/leaderboard/scripts/run_evaluation.sh + + bash /workspace/leaderboard/scripts/run_evaluation.sh + echo "Finished agent" + sleep 86400 + exit 0 + volumeMounts: + - mountPath: /workspace/leaderboard + name: leaderboard-master + - mountPath: /workspace/scenario_runner + name: scenario-runner-master + - mountPath: /workspace/CARLA + name: carla-root-master + env: + - name: CARLA_PORT + value: "4000" + - name: CARLA_TM_PORT + value: "10000" + - name: OMP_PROC_BIND + value: "FALSE" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "2" + - name: OMP_NUM_THREADS + value: "64" + - name: HTTP_PROXY + value: "" + - name: HTTPS_PROXY + value: "" + - name: LEADERBOARD_ROOT + value: "/workspace/leaderboard" + - name: CARLA_ROOT + value: "/workspace/CARLA" + - name: SCENARIO_RUNNER_ROOT + value: "/workspace/scenario_runner" + - name: ROUTES + value: "/workspace/leaderboard/data/routes_devtest.xml" + - name: ROUTES_SUBSET + value: "0" + - name: REPETITIONS + value: "1" + - name: CHALLENGE_TRACK_CODENAME + value: "SENSORS" + - name: CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/results.json" + - name: DEBUG_CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/live_results.txt" + - name: DEBUG_CHALLENGE + value: "0" + - name: RECORD_PATH + value: "/home/carla" + - name: RESUME + value: "" + # resources: + # limits: + # nvidia.com/gpu: 1 + #cpu: 11 # 1000m is equal to 1 VCPU core + # Four set of simulator + agent + - name: simulator-4 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-simulator-development:latest + command: ["/bin/bash", "-c"] + args: + - | + echo "Starting CARLA server" + ./CarlaUE4.sh -vulkan -carla-rpc-port=5000 -RenderOffScreen -nosound -ini:[/Script/Engine.RendererSettings]:r.GraphicsAdapter=3 + echo "Stopped CARLA server" + sleep 86400 + exit 0 + env: + - name: DISPLAY + value: ":0" + - name: OMP_PROC_BIND + value: "FALSE" + - name: OMP_NUM_THREADS + value: "64" + - name: SDL_VIDEODRIVER + value: "x11" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "3" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + securityContext: + privileged: true + resources: + limits: + nvidia.com/gpu: 1 + #cpu: 12.0 # 1000m is equal to 1 VCPU core + - name: agent-4 + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/bm-586a194d-team-177:54a1db1b-d340-4de8-a25b-67aa756db0c6 + command: ["/bin/bash", "-c"] + args: + - | + echo "Sleeping a bit to ensure CARLA is ready" + sleep 30 + echo "Starting agent" + + echo "python3 ${LEADERBOARD_ROOT}/leaderboard/leaderboard_evaluator.py \ + --routes=${ROUTES} \ + --routes-subset=${ROUTES_SUBSET} \ + --repetitions=${REPETITIONS} \ + --track=${CHALLENGE_TRACK_CODENAME} \ + --checkpoint=${CHECKPOINT_ENDPOINT} \ + --debug-checkpoint=${DEBUG_CHECKPOINT_ENDPOINT} \ + --agent=${TEAM_AGENT} \ + --agent-config=${TEAM_CONFIG} \ + --debug=${DEBUG_CHALLENGE} \ + --record=${RECORD_PATH} \ + --resume=${RESUME} \ + --port=${CARLA_PORT}" \ + --traffic-manager-port=${CARLA_TM_PORT} > /workspace/leaderboard/scripts/run_evaluation.sh + + bash /workspace/leaderboard/scripts/run_evaluation.sh + echo "Finished agent" + sleep 86400 + exit 0 + volumeMounts: + - mountPath: /workspace/leaderboard + name: leaderboard-master + - mountPath: /workspace/scenario_runner + name: scenario-runner-master + - mountPath: /workspace/CARLA + name: carla-root-master + env: + - name: CARLA_PORT + value: "5000" + - name: CARLA_TM_PORT + value: "11000" + - name: OMP_PROC_BIND + value: "FALSE" + - name: NVIDIA_DRIVER_CAPABILITIES + value: "all" + - name: NVIDIA_VISIBLE_DEVICES + value: "3" + - name: OMP_NUM_THREADS + value: "64" + - name: HTTP_PROXY + value: "" + - name: HTTPS_PROXY + value: "" + - name: LEADERBOARD_ROOT + value: "/workspace/leaderboard" + - name: CARLA_ROOT + value: "/workspace/CARLA" + - name: SCENARIO_RUNNER_ROOT + value: "/workspace/scenario_runner" + - name: ROUTES + value: "/workspace/leaderboard/data/routes_devtest.xml" + - name: ROUTES_SUBSET + value: "0" + - name: REPETITIONS + value: "1" + - name: CHALLENGE_TRACK_CODENAME + value: "SENSORS" + - name: CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/results.json" + - name: DEBUG_CHECKPOINT_ENDPOINT + value: "/workspace/leaderboard/live_results.txt" + - name: DEBUG_CHALLENGE + value: "0" + - name: RECORD_PATH + value: "/home/carla" + - name: RESUME + value: "" + # resources: + # limits: + # nvidia.com/gpu: 1 + #cpu: 11 # 1000m is equal to 1 VCPU core + restartPolicy: Never + volumes: + - name: leaderboard-master + emptyDir: {} + - name: scenario-runner-master + emptyDir: {} + - name: carla-root-master + emptyDir: {} + - name: x11 + hostPath: + path: /tmp/.X11-unix + backoffLimit: 0 diff --git a/leaderboard_2.1/jobs/monitor.yaml b/leaderboard_2.1/jobs/monitor.yaml new file mode 100644 index 0000000..28d7975 --- /dev/null +++ b/leaderboard_2.1/jobs/monitor.yaml @@ -0,0 +1,94 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name.$: $.submission.name +spec: + backoffLimit: 4 + template: + spec: + serviceAccountName: submission-worker + nodeSelector: + node.kubernetes.io/instance-type.$: $.cluster.instance_type + + initContainers: + - name: private-contents + image.$: $.cluster.leaderboard_image + command: ["/bin/bash", "-c"] + args: + - | + cp -r ${LEADERBOARD_ROOT}/* /tmp/leaderboard/ + cp -r ${SCENARIO_RUNNER_ROOT}/* /tmp/scenario_runner/ + + echo "Creating log files" + mkdir -m 0777 -p /tmp/logs/logs + mkdir -m 0777 -p /tmp/logs/containers-status + mkdir -m 0777 -p /tmp/logs/agent + mkdir -m 0777 -p /tmp/logs/evalai + + echo "Downloading existent files from S3, and removing the container status data" + aws s3 rm s3://${S3_BUCKET}/${SUBMISSION_ID}/containers-status --recursive --exclude "*" --include "monitor.*" + aws s3 cp s3://${S3_BUCKET}/${SUBMISSION_ID}/logs /tmp/logs/logs --recursive --exclude "*" --include "monitor.log" + + chmod 0777 -R /tmp/logs + + if [ ! -z $RESUME ]; then + echo "Detected a resume initiated by the user. Removing simulation cancel file if needed." + aws s3 rm s3://${S3_BUCKET}/${SUBMISSION_ID}/containers-status --recursive --exclude "*" --include "simulation.cancel" + fi + env: + - name: SUBMISSION_ID + value.$: $.submission.submission_id + - name: S3_BUCKET + value.$: "$.aws.s3_bucket" + - name: RESUME + value.$: $.submission.resume + volumeMounts: + - mountPath: /tmp/leaderboard + name: leaderboard + - mountPath: /tmp/scenario_runner + name: scenario-runner + - mountPath: /tmp/logs + name: logs + + containers: + - name: monitor + image.$: $.cluster.monitor_image + command: ["/bin/bash", "-c"] + args: + - | + bash /workspace/run_monitor.sh + env: + - name: SUBMISSION_ID + value.$: $.submission.submission_id + - name: SUBMISSION_WORKERS + value.$: $.cluster.parallelization_workers + - name: CHALLENGE_ID + value.$: $.submission.challenge_id + - name: TRACK_ID + value.$: $.submission.track_id + - name: TEAM_ID + value.$: $.submission.team_id + - name: S3_BUCKET + value.$: "$.aws.s3_bucket" + - name: DYNAMODB_SUBMISSIONS_TABLE + value.$: "$.aws.dynamodb_submissions_table" + - name: EVALAI_AUTH_TOKEN + value.$: "$.evalai.auth_token" + - name: EVALAI_API_SERVER + value.$: "$.evalai.api_server" + volumeMounts: + - mountPath: /utils/leaderboard + name: leaderboard + - mountPath: /utils/scenario_runner + name: scenario-runner + - mountPath: /logs + name: logs + restartPolicy: OnFailure + volumes: + - name: leaderboard + emptyDir: {} + - name: scenario-runner + emptyDir: {} + - name: logs + emptyDir: {} + diff --git a/leaderboard_2.1/jobs/submission.yaml b/leaderboard_2.1/jobs/submission.yaml new file mode 100644 index 0000000..82d9a02 --- /dev/null +++ b/leaderboard_2.1/jobs/submission.yaml @@ -0,0 +1,169 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name.$: $.submission.name +spec: + backoffLimit: 4 + template: + spec: + serviceAccountName: submission-worker + nodeSelector: + node.kubernetes.io/instance-type.$: $.cluster.instance_type + + initContainers: + - name: private-contents + image.$: $.cluster.leaderboard_image + command: ["/bin/bash", "-c"] + args: + - | + cp -r ${LEADERBOARD_ROOT}/* /tmp/leaderboard/ + cp -r ${SCENARIO_RUNNER_ROOT}/* /tmp/scenario_runner/ + cp -r ${CARLA_PYTHON_API_ROOT}/* /tmp/CARLA/ + cp -r ${GPU_UTILS_ROOT}/* /tmp/gpu_utils + + echo "Creating log files" + mkdir -m 0777 -p /tmp/logs/logs + mkdir -m 0777 -p /tmp/logs/containers-status + mkdir -m 0777 -p /tmp/logs/agent + mkdir -m 0777 -p /tmp/logs/recorder + mkdir -m 0777 -p /tmp/logs/evalai + + echo "Downloading existent files from S3, and removing the container status data" + aws s3 rm s3://${S3_BUCKET}/${SUBMISSION_ID}/containers-status --recursive --exclude "*" --include "*-${WORKER_ID}.*" + aws s3 cp s3://${S3_BUCKET}/${SUBMISSION_ID}/agent /tmp/logs/agent --recursive --exclude "*" --include "*${WORKER_ID}.json" + aws s3 cp s3://${S3_BUCKET}/${SUBMISSION_ID}/logs /tmp/logs/logs --recursive --exclude "*" --include "*-${WORKER_ID}.log" + + chmod 0777 -R /tmp/logs + + if [ ! -z $RESUME ]; then + echo "Detected a resume initiated by the user." + aws s3 rm s3://${S3_BUCKET}/${SUBMISSION_ID}/containers-status --recursive --exclude "*" --include "simulation.cancel" + fi + env: + - name: SUBMISSION_ID + value.$: $.submission.submission_id + - name: WORKER_ID + value.$: $.parallelization.worker_id + - name: S3_BUCKET + value.$: "$.aws.s3_bucket" + - name: RESUME + value.$: $.submission.resume + volumeMounts: + - mountPath: /tmp/leaderboard + name: leaderboard + - mountPath: /tmp/scenario_runner + name: scenario-runner + - mountPath: /tmp/CARLA + name: carla-python-api + - mountPath: /tmp/gpu_utils + name: gpu-utils + - mountPath: /tmp/logs + name: logs + + containers: + + - name: simulator + image.$: $.cluster.simulator_image + command: ["/bin/bash", "-c"] + args: + - | + bash /home/carla/run_carla.sh + env: + - name: WORKER_ID + value.$: $.parallelization.worker_id + - name: DISPLAY + value: ":0" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + - mountPath: /gpu + name: gpu-utils + - mountPath: /tmp/logs + name: logs + subPath: logs + - mountPath: /home/carla/recorder + name: logs + subPath: recorder + - mountPath: /tmp/status + name: logs + subPath: containers-status + + - name: agent + image.$: $.submission.submitted_image_uri + command: ["/bin/bash", "-c"] + args: + - | + bash /workspace/leaderboard/run_leaderboard.sh + env: + - name: WORKER_ID + value.$: $.parallelization.worker_id + - name: CHALLENGE_TRACK_CODENAME + value.$: $.submission.track_codename + - name: ROUTES_SUBSET + value.$: $.submission.subset + - name: REPETITIONS + value: "1" + - name: RESUME + value.$: $.submission.resume + volumeMounts: + - mountPath: /workspace/leaderboard + name: leaderboard + - mountPath: /workspace/scenario_runner + name: scenario-runner + - mountPath: /workspace/CARLA + name: carla-python-api + - mountPath: /gpu + name: gpu-utils + - mountPath: /tmp/logs + name: logs + subPath: logs + - mountPath: /tmp/agent + name: logs + subPath: agent + - mountPath: /tmp/status + name: logs + subPath: containers-status + - mountPath: /workspace/log/ros + name: ros-logs + # Set the resource gpu limit at the agent container. In this way, agents will only see one available GPU + # and we avoid users sending models to a different GPU from the assgined one. + resources: + limits: + nvidia.com/gpu: 1 + + - name: aws-uploader + image.$: $.cluster.uploader_image + command: ["/bin/bash", "-c"] + args: + - | + bash /workspace/run_uploader.sh + env: + - name: WORKER_ID + value.$: $.parallelization.worker_id + - name: SUBMISSION_ID + value.$: $.submission.submission_id + - name: S3_BUCKET + value.$: "$.aws.s3_bucket" + volumeMounts: + - mountPath: /logs + name: logs + - mountPath: /ros/logs + name: ros-logs + + restartPolicy: OnFailure + volumes: + - name: leaderboard + emptyDir: {} + - name: scenario-runner + emptyDir: {} + - name: carla-python-api + emptyDir: {} + - name: gpu-utils + emptyDir: {} + - name: logs + emptyDir: {} + - name: ros-logs + emptyDir: {} + - name: x11 + hostPath: + path: /tmp/.X11-unix diff --git a/leaderboard_2.1/tests/test-carla-server.yaml b/leaderboard_2.1/tests/test-carla-server.yaml new file mode 100644 index 0000000..34df545 --- /dev/null +++ b/leaderboard_2.1/tests/test-carla-server.yaml @@ -0,0 +1,47 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: submission-worker + namespace: default +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: submission +spec: + template: + spec: + serviceAccountName: submission-worker + containers: + - name: simulator + image: 342236305043.dkr.ecr.us-west-2.amazonaws.com/leaderboard-20-simulator:latest + command: ["/bin/bash", "-c"] + args: + - | + echo Starting the CARLA server + ./CarlaUE4.sh --vulkan + echo Stopped the CARLA server + env: + - name: DISPLAY + value: ":0.0" + - name: OMP_PROC_BIND + value: "FALSE" + - name: OMP_NUM_THREADS + value: "64" + - name: SDL_VIDEODRIVER + value: "x11" + volumeMounts: + - mountPath: /tmp/.X11-unix + name: x11 + resources: + limits: + nvidia.com/gpu: "1" + securityContext: + privileged: true + restartPolicy: Never + volumes: + - name: x11 + hostPath: + path: /tmp/.X11-unix + backoffLimit: 0 \ No newline at end of file diff --git a/leaderboard_2.1/tests/test-gpu-job.yaml b/leaderboard_2.1/tests/test-gpu-job.yaml new file mode 100644 index 0000000..8b07b24 --- /dev/null +++ b/leaderboard_2.1/tests/test-gpu-job.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Pod +metadata: + name: gpu-pod +spec: + nodeSelector: + node.kubernetes.io/instance-type: g5.12xlarge + + restartPolicy: Never + containers: + - name: cuda-container + image: nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda10.2 + resources: + limits: + nvidia.com/gpu: 1 # requesting 1 GPU + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule \ No newline at end of file From fe93daee901f2729670743c9dc56e3c76cfb3a41 Mon Sep 17 00:00:00 2001 From: glopezdiest Date: Thu, 6 Mar 2025 10:15:54 +0100 Subject: [PATCH 2/2] Updated LB submodule --- leaderboard_2.1/docker/leaderboard/submodules/leaderboard | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/leaderboard_2.1/docker/leaderboard/submodules/leaderboard b/leaderboard_2.1/docker/leaderboard/submodules/leaderboard index edaa85e..cfecdc8 160000 --- a/leaderboard_2.1/docker/leaderboard/submodules/leaderboard +++ b/leaderboard_2.1/docker/leaderboard/submodules/leaderboard @@ -1 +1 @@ -Subproject commit edaa85e81225a78ce9f468df1293df79bfc5e377 +Subproject commit cfecdc8d99f933a09b27176829ce7b8d63d66d8d