Skip to content

Commit 8f73200

Browse files
dhananjay-ngYashwantGohokar
authored andcommitted
Added fix to check length of consistent device paths available before attempting read
1 parent 5695840 commit 8f73200

File tree

3 files changed

+75
-1
lines changed

3 files changed

+75
-1
lines changed

pkg/oci/client/client_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,22 @@ func (c *mockComputeClient) DetachVolume(ctx context.Context, request core.Detac
173173
}
174174

175175
func (c *mockComputeClient) ListInstanceDevices(ctx context.Context, request core.ListInstanceDevicesRequest) (response core.ListInstanceDevicesResponse, err error) {
176+
devicePath := "/dev/oracleoci/oraclevdac"
177+
178+
if *request.InstanceId == "ocid1.device-path-returns-error" {
179+
return core.ListInstanceDevicesResponse{}, errNotFound
180+
} else if *request.InstanceId == "ocid1.device-path-not-available" {
181+
return core.ListInstanceDevicesResponse{
182+
Items: []core.Device{},
183+
}, nil
184+
} else if *request.InstanceId == "ocid1.one-device-path-available" {
185+
return core.ListInstanceDevicesResponse{
186+
Items: []core.Device{{
187+
Name: &devicePath,
188+
},
189+
},
190+
}, nil
191+
}
176192
return core.ListInstanceDevicesResponse{}, nil
177193
}
178194

pkg/oci/client/volume_attachment.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ package client
1616

1717
import (
1818
"context"
19+
"fmt"
20+
"math/rand"
1921
"time"
2022

2123
"github.com/oracle/oci-cloud-controller-manager/pkg/util"
@@ -181,7 +183,14 @@ func (c *client) getDevicePath(ctx context.Context, instanceID string) (*string,
181183
return nil, errors.WithStack(err)
182184
}
183185

184-
device := listInstanceDevicesResp.Items[0].Name
186+
//When more than 32 pods get scheduled at same time on same node, some attach operations will reach this condition where no device is left
187+
if len(listInstanceDevicesResp.Items) == 0 {
188+
c.logger.With("service", "compute", "verb", listVerb, "resource", instanceResource).
189+
With("instanceID", instanceID).Warn("No consistent device paths available for worker node.")
190+
return nil, fmt.Errorf("Max number of volumes are already attached to instance %s. Please schedule workload on different node.", instanceID)
191+
}
192+
//Picks device path from available path randomly so that 2 volume attachments don't get same path when operations happen concurrently resulting in failure of one of them.
193+
device := listInstanceDevicesResp.Items[rand.Intn(len(listInstanceDevicesResp.Items))].Name
185194

186195
return device, nil
187196
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package client
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strings"
7+
"testing"
8+
9+
"go.uber.org/zap"
10+
)
11+
12+
func Test_getDevicePath(t *testing.T) {
13+
var tests = map[string]struct {
14+
instanceID string
15+
want string
16+
wantErr error
17+
}{
18+
"getDevicePathNoDeviceAvailable": {
19+
instanceID: "ocid1.device-path-not-available",
20+
wantErr: fmt.Errorf("Max number of volumes are already attached to instance %s. Please schedule workload on different node.", "ocid1.device-path-not-available"),
21+
22+
},
23+
"getDevicePathOneDeviceAvailable": {
24+
instanceID: "ocid1.one-device-path-available",
25+
want: "/dev/oracleoci/oraclevdac",
26+
},
27+
"getDevicePathReturnsError": {
28+
instanceID: "ocid1.device-path-returns-error",
29+
wantErr: errNotFound,
30+
},
31+
}
32+
33+
vaClient := &client{
34+
compute: &mockComputeClient{},
35+
logger: zap.S(),
36+
}
37+
38+
for name, tc := range tests {
39+
t.Run(name, func(t *testing.T) {
40+
result, err := vaClient.getDevicePath(context.Background(), tc.instanceID)
41+
if tc.wantErr != nil && !strings.EqualFold(tc.wantErr.Error(), err.Error()) {
42+
t.Errorf("getDevicePath() = %v, want %v", err.Error(), tc.wantErr.Error())
43+
}
44+
if tc.want != "" && !strings.EqualFold(tc.want, *result) {
45+
t.Errorf("getDevicePath() = %v, want %v", *result, tc.want)
46+
}
47+
})
48+
}
49+
}

0 commit comments

Comments
 (0)