Skip to content

Commit 9bf3481

Browse files
authored
Zero touch switch config for dev environments (#3576)
1 parent 200f4ad commit 9bf3481

27 files changed

+554
-338
lines changed

.github/buildomat/jobs/deploy.sh

Lines changed: 25 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ _exit_trap() {
3939
pfexec netstat -rncva
4040
pfexec netstat -anu
4141
pfexec arp -an
42-
pfexec ./out/softnpu/scadm \
43-
--server /opt/oxide/softnpu/stuff/server \
44-
--client /opt/oxide/softnpu/stuff/client \
42+
pfexec zlogin sidecar_softnpu /softnpu/scadm \
43+
--server /softnpu/server \
44+
--client /softnpu/client \
4545
standalone \
4646
dump-state
4747
pfexec /opt/oxide/opte/bin/opteadm list-ports
@@ -73,7 +73,7 @@ _exit_trap() {
7373
pfexec zlogin "$z" arp -an
7474
done
7575

76-
pfexec zlogin softnpu cat /softnpu.log
76+
pfexec zlogin sidecar_softnpu cat /var/log/softnpu.log
7777

7878
exit $status
7979
}
@@ -135,6 +135,20 @@ for p in /input/ci-tools/work/end-to-end-tests/*.gz; do
135135
chmod a+x "tests/$(basename "${p%.gz}")"
136136
done
137137

138+
# Lab gateway ip
139+
export GATEWAY_IP=192.168.1.199
140+
# Proxy arp settings.
141+
export PXA_START="192.168.1.50"
142+
export PXA_END="192.168.1.90"
143+
144+
# Nexus (and any instances using the above IP pool) are configured to use external
145+
# IPs from a fixed subnet (192.168.1.0/24). OPTE/SoftNPU/Boundary Services take care
146+
# of NATing between the private VPC networks and this "external network".
147+
# We create a static IP in this subnet in the global zone and configure the switch
148+
# to use it as the default gateway.
149+
# NOTE: Keep in sync with $[SERVICE_]IP_POOL_{START,END}
150+
pfexec ipadm create-addr -T static -a $GATEWAY_IP/24 igb0/sidehatch
151+
138152
pfexec zpool create -f scratch c1t1d0 c2t1d0
139153
ZPOOL_VDEV_DIR=/scratch ptime -m pfexec ./tools/create_virtual_hardware.sh
140154

@@ -186,7 +200,6 @@ rmdir pkg
186200
# will end up once installed.
187201
E2E_TLS_CERT="/opt/oxide/sled-agent/pkg/initial-tls-cert.pem"
188202

189-
190203
#
191204
# Image-related tests use images served by catacomb. The lab network is
192205
# IPv4-only; the propolis zones are IPv6-only. These steps set up tcpproxy
@@ -222,46 +235,23 @@ do
222235
retry=$((retry + 1))
223236
done
224237

225-
# Nexus (and any instances using the above IP pool) are configured to use external
226-
# IPs from a fixed subnet (192.168.1.0/24). OPTE/SoftNPU/Boundary Services take care
227-
# of NATing between the private VPC networks and this "external network".
228-
# We create a static IP in this subnet in the global zone and configure the switch
229-
# to use it as the default gateway.
230-
# NOTE: Keep in sync with $[SERVICE_]IP_POOL_{START,END}
231-
export GATEWAY_IP=192.168.1.199
232-
pfexec ipadm create-addr -T static -a $GATEWAY_IP/24 igb0/sidehatch
233-
234-
# NOTE: this script configures softnpu's "rack network" settings using swadm
235-
./tools/scrimlet/softnpu-init.sh
236-
237-
# NOTE: this command configures proxy arp for softnpu. This is needed if you want to be
238-
# able to reach instances from the same L2 network segment.
239-
# Keep consistent with `get_system_ip_pool` in `end-to-end-tests`.
240-
IP_POOL_START="192.168.1.50"
241-
IP_POOL_END="192.168.1.90"
242-
# `dladm` won't return leading zeroes but `scadm` expects them, use sed to add any missing zeroes
243-
SOFTNPU_MAC=$(dladm show-vnic sc0_1 -p -o macaddress | sed -E 's/[ :]/&0/g; s/0([^:]{2}(:|$))/\1/g')
244-
pfexec ./out/softnpu/scadm \
245-
--server /opt/oxide/softnpu/stuff/server \
246-
--client /opt/oxide/softnpu/stuff/client \
247-
standalone \
248-
add-proxy-arp $IP_POOL_START $IP_POOL_END $SOFTNPU_MAC
249238

250239
# We also need to configure proxy arp for any services which use OPTE for external connectivity (e.g. Nexus)
251240
tar xf out/omicron-sled-agent.tar pkg/config-rss.toml
241+
SOFTNPU_MAC=$(dladm show-vnic sc0_1 -p -o macaddress | sed -E 's/[ :]/&0/g; s/0([^:]{2}(:|$))/\1/g')
252242
SERVICE_IP_POOL_START="$(sed -n 's/^first = "\(.*\)"/\1/p' pkg/config-rss.toml)"
253243
SERVICE_IP_POOL_END="$(sed -n 's/^last = "\(.*\)"/\1/p' pkg/config-rss.toml)"
254244
rm -r pkg
255245

256-
pfexec ./out/softnpu/scadm \
257-
--server /opt/oxide/softnpu/stuff/server \
258-
--client /opt/oxide/softnpu/stuff/client \
246+
pfexec zlogin sidecar_softnpu /softnpu/scadm \
247+
--server /softnpu/server \
248+
--client /softnpu/client \
259249
standalone \
260250
add-proxy-arp $SERVICE_IP_POOL_START $SERVICE_IP_POOL_END $SOFTNPU_MAC
261251

262-
pfexec ./out/softnpu/scadm \
263-
--server /opt/oxide/softnpu/stuff/server \
264-
--client /opt/oxide/softnpu/stuff/client \
252+
pfexec zlogin sidecar_softnpu /softnpu/scadm \
253+
--server /softnpu/server \
254+
--client /softnpu/client \
265255
standalone \
266256
dump-state
267257

.github/buildomat/jobs/package.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,12 @@ ptime -m cargo run --locked --release --bin omicron-package -- \
5555
files=(
5656
out/*.tar
5757
out/target/test
58-
out/softnpu/*
58+
out/npuzone/*
5959
package-manifest.toml
6060
smf/sled-agent/non-gimlet/config.toml
6161
target/release/omicron-package
6262
tools/create_virtual_hardware.sh
63+
tools/virtual_hardware.sh
6364
tools/scrimlet/*
6465
)
6566

docs/how-to-run.adoc

Lines changed: 11 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,19 @@ The Sled Agent supports operation on both:
176176

177177
This script also sets up a "softnpu" zone to implement Boundary Services. SoftNPU simulates the Tofino device that's used in real systems. Just like Tofino, it can implement sled-to-sled networking, but that's beyond the scope of this doc.
178178

179-
If you're running on a PC and using an existing network as your external network, you can usually just run:
179+
If you're running on a PC and using an existing network as your external network, you can usually just run this script with a few environment vaiables set. These environment varaibles tell SoftNPU about your local network. You'll need to carve out part of your network for the Oxide platform, making sure that the range you specify below is not occupied by other hosts on your network.
180180

181+
[source,bash]
181182
----
183+
export PHYSICAL_LINK=igb0 # The physical link for your local network.
184+
export GATEWAY_IP=192.168.1.199 # The gateway IP address for your local network.
185+
export PXA_START=192.168.1.2 # The first IP address your Oxide cluster can use.
186+
export PXA_END=192.168.1.100 # The last IP address your Oxide cluster can use.
187+
182188
$ pfexec ./tools/create_virtual_hardware.sh
183189
----
184190

185-
If above you made up a new "external" network only accessible within the Sled, then you'll want to override PHYSICAL_LINK:
191+
If above you made up a new "external" network only accessible within the Sled, then you'll want to override PHYSICAL_LINK as follows:
186192

187193
----
188194
$ PHYSICAL_LINK=fake_external_stub0 pfexec ./tools/create_virtual_hardware.sh
@@ -350,46 +356,6 @@ $ zoneadm list -cnv
350356
$ pfexec tail -f $(pfexec svcs -z oxz_nexus_<UUID> -L nexus)
351357
----
352358

353-
=== SoftNPU Configuration
354-
355-
After installing omicron with `omicron-package install`, you can run the `softnpu-init.sh` script to configure SoftNPU. If your external network is the one that your global zone is already on (i.e., an existing network), then you can likely just run:
356-
357-
[source,console]
358-
----
359-
$ ./tools/scrimlet/softnpu-init.sh
360-
----
361-
362-
In this case, `softnpu-init.sh` determines a network gateway by looking at your system's default gateway. Again, the assumption for this use case is that your rack gets external connectivity the same way your system does because it's part of the same network.
363-
364-
If your external network is one you made up above that's local to your own system, this won't be right. In that case, you'll want to override the gateway IP to be the address of the global zone on your made-up network. In our example:
365-
366-
[source,console]
367-
----
368-
$ GATEWAY_IP=192.168.1.199 ./tools/scrimlet/softnpu-init.sh
369-
----
370-
371-
In other configurations, you might use a different GATEWAY_IP. You can also override GATEWAY_MAC if needed, but that shouldn't be necessary for the configurations described here.
372-
373-
=== SoftNPU Proxy ARP Setup
374-
375-
Services that require external connectivity (e.g. Nexus, Boundary NTP, External DNS) do so via OPTE using addresses from the services IP pool. When using SoftNPU, we'll need to configure Proxy ARP for those addresses.
376-
377-
In this snippet, `$SERVICE_IP_POOL_START` and `$SERVICE_IP_POOL_END` are the addresses you put into `config-rss.toml` above for `internal_services_ip_pool_ranges`:
378-
379-
[source,console]
380-
----
381-
# dladm won't return leading zeroes but `scadm` expects them
382-
$ SOFTNPU_MAC=$(dladm show-vnic sc0_1 -p -o macaddress | gsed 's/\b\(\w\)\b/0\1/g')
383-
$ pfexec /opt/oxide/softnpu/stuff/scadm \
384-
--server /opt/oxide/softnpu/stuff/server \
385-
--client /opt/oxide/softnpu/stuff/client \
386-
standalone \
387-
add-proxy-arp \
388-
$SERVICE_IP_POOL_START \
389-
$SERVICE_IP_POOL_END \
390-
$SOFTNPU_MAC
391-
----
392-
393359
== Using Omicron
394360

395361
At this point, the system should be up and running! You should be able to reach the external API and web console from your external network. But how? The URL for the API and console will be:
@@ -440,9 +406,9 @@ With SoftNPU you will generally also need to configure Proxy ARP. Below, `IP_PO
440406
----
441407
# dladm won't return leading zeroes but `scadm` expects them
442408
$ SOFTNPU_MAC=$(dladm show-vnic sc0_1 -p -o macaddress | gsed 's/\b\(\w\)\b/0\1/g')
443-
$ pfexec /opt/oxide/softnpu/stuff/scadm \
444-
--server /opt/oxide/softnpu/stuff/server \
445-
--client /opt/oxide/softnpu/stuff/client \
409+
$ pfexec zlogin sidecar_softnpu /softnpu/scadm \
410+
--server /softnpu/server \
411+
--client /softnpu/client \
446412
standalone \
447413
add-proxy-arp \
448414
$IP_POOL_START \

illumos-utils/src/dladm.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,14 @@ pub struct GetVnicError {
8686
err: ExecutionError,
8787
}
8888

89+
/// Errors returned from [`Dladm::get_simulated_tfports`].
90+
#[derive(thiserror::Error, Debug)]
91+
#[error("Failed to get simnets: {err}")]
92+
pub struct GetSimnetError {
93+
#[source]
94+
err: ExecutionError,
95+
}
96+
8997
/// Errors returned from [`Dladm::delete_vnic`].
9098
#[derive(thiserror::Error, Debug)]
9199
#[error("Failed to delete vnic {name}: {err}")]
@@ -416,6 +424,25 @@ impl Dladm {
416424
Ok(vnics)
417425
}
418426

427+
/// Returns simnet links masquerading as tfport devices
428+
pub fn get_simulated_tfports() -> Result<Vec<String>, GetSimnetError> {
429+
let mut command = std::process::Command::new(PFEXEC);
430+
let cmd = command.args(&[DLADM, "show-simnet", "-p", "-o", "LINK"]);
431+
let output = execute(cmd).map_err(|err| GetSimnetError { err })?;
432+
433+
let tfports = String::from_utf8_lossy(&output.stdout)
434+
.lines()
435+
.filter_map(|name| {
436+
if name.starts_with("tfport") {
437+
Some(name.to_owned())
438+
} else {
439+
None
440+
}
441+
})
442+
.collect();
443+
Ok(tfports)
444+
}
445+
419446
/// Remove a vnic from the sled.
420447
pub fn delete_vnic(name: &str) -> Result<(), DeleteVnicError> {
421448
let mut command = std::process::Command::new(PFEXEC);

illumos-utils/src/running_zone.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,7 @@ impl InstalledZone {
11041104
unique_name: Option<Uuid>,
11051105
datasets: &[zone::Dataset],
11061106
filesystems: &[zone::Fs],
1107+
data_links: &[String],
11071108
devices: &[zone::Device],
11081109
opte_ports: Vec<(Port, PortTicket)>,
11091110
bootstrap_vnic: Option<Link>,
@@ -1140,14 +1141,21 @@ impl InstalledZone {
11401141
.collect(),
11411142
})?;
11421143

1143-
let net_device_names: Vec<String> = opte_ports
1144+
let mut net_device_names: Vec<String> = opte_ports
11441145
.iter()
11451146
.map(|(port, _)| port.vnic_name().to_string())
11461147
.chain(std::iter::once(control_vnic.name().to_string()))
11471148
.chain(bootstrap_vnic.as_ref().map(|vnic| vnic.name().to_string()))
11481149
.chain(links.iter().map(|nic| nic.name().to_string()))
1150+
.chain(data_links.iter().map(|x| x.to_string()))
11491151
.collect();
11501152

1153+
// There are many sources for device names. In some cases they can
1154+
// overlap, depending on the contents of user defined config files. This
1155+
// can cause zones to fail to start if duplicate data links are given.
1156+
net_device_names.sort();
1157+
net_device_names.dedup();
1158+
11511159
Zones::install_omicron_zone(
11521160
log,
11531161
&zone_root_path,

installinator/src/bootstrap.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,12 @@ const MG_DDM_MANIFEST_PATH: &str = "/opt/oxide/mg-ddm/pkg/ddm/manifest.xml";
2626
// TODO-cleanup The implementation of this function is heavily derived from
2727
// `sled_agent::bootstrap::server::Server::start()`; consider whether we could
2828
// find a way for them to share it.
29-
pub(crate) async fn bootstrap_sled(log: Logger) -> Result<()> {
29+
pub(crate) async fn bootstrap_sled(
30+
data_links: &[String; 2],
31+
log: Logger,
32+
) -> Result<()> {
3033
// Find address objects to pass to maghemite.
31-
let links = underlay::find_chelsio_links()
34+
let links = underlay::find_chelsio_links(data_links)
3235
.context("failed to find chelsio links")?;
3336
ensure!(
3437
!links.is_empty(),

installinator/src/dispatch.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,18 @@ struct InstallOpts {
151151
#[clap(long)]
152152
install_on_gimlet: bool,
153153

154+
//TODO(ry) this probably needs to get plumbed somewhere instead of relying
155+
//on a default.
156+
/// The first gimlet data link to use.
157+
#[clap(long, default_value = "cxgbe0")]
158+
data_link0: String,
159+
160+
//TODO(ry) this probably needs to get plumbed somewhere instead of relying
161+
//on a default.
162+
/// The second gimlet data link to use.
163+
#[clap(long, default_value = "cxgbe1")]
164+
data_link1: String,
165+
154166
// TODO: checksum?
155167

156168
// The destination to write to.
@@ -164,7 +176,8 @@ struct InstallOpts {
164176
impl InstallOpts {
165177
async fn exec(self, log: &slog::Logger) -> Result<()> {
166178
if self.bootstrap_sled {
167-
crate::bootstrap::bootstrap_sled(log.clone()).await?;
179+
let data_links = [self.data_link0.clone(), self.data_link1.clone()];
180+
crate::bootstrap::bootstrap_sled(&data_links, log.clone()).await?;
168181
}
169182

170183
let image_id = self.artifact_ids.resolve()?;

internal-dns/src/resolver.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ impl Resolver {
7676
// The underlay is IPv6 only, so this helps avoid needless lookups of
7777
// the IPv4 variant.
7878
opts.ip_strategy = LookupIpStrategy::Ipv6Only;
79+
opts.negative_max_ttl = Some(std::time::Duration::from_secs(15));
7980
let resolver = TokioAsyncResolver::tokio(rc, opts)?;
8081

8182
Ok(Self { log, resolver })

sled-agent/src/bootstrap/server.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,10 @@ impl Server {
5555
}
5656

5757
// Find address objects to pass to maghemite.
58-
let mg_addr_objs = underlay::find_nics().map_err(|err| {
59-
format!("Failed to find address objects for maghemite: {err}")
60-
})?;
58+
let mg_addr_objs = underlay::find_nics(&sled_config.data_links)
59+
.map_err(|err| {
60+
format!("Failed to find address objects for maghemite: {err}")
61+
})?;
6162
if mg_addr_objs.is_empty() {
6263
return Err(
6364
"underlay::find_nics() returned 0 address objects".to_string()

sled-agent/src/config.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ pub struct Config {
7171
/// systems.
7272
pub data_link: Option<PhysicalLink>,
7373

74+
/// The data links that sled-agent will treat as a real gimlet cxgbe0/cxgbe1
75+
/// links.
76+
pub data_links: [String; 2],
77+
7478
#[serde(default)]
7579
pub updates: ConfigUpdates,
7680

0 commit comments

Comments
 (0)