Skip to content

Commit f929217

Browse files
authored
fix(hermes): reconnect on wh connection termination (#1488)
* fix(hermes): reconnect on wh connection termination `tokio::select` disables the branch that runs the wh connection if it returns OK and it never gets checked again. This change changes the `run` return to never return OK. * refactor(hermes): use Result<!> in pythnet network listener thread
1 parent 1b13bf6 commit f929217

File tree

4 files changed

+60
-55
lines changed

4 files changed

+60
-55
lines changed

apps/hermes/Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apps/hermes/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "hermes"
3-
version = "0.5.5"
3+
version = "0.5.6"
44
description = "Hermes is an agent that provides Verified Prices from the Pythnet Pyth Oracle."
55
edition = "2021"
66

apps/hermes/src/network/pythnet.rs

Lines changed: 45 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ async fn fetch_bridge_data(
139139
}
140140
}
141141

142-
pub async fn run(store: Arc<State>, pythnet_ws_endpoint: String) -> Result<()> {
142+
pub async fn run(store: Arc<State>, pythnet_ws_endpoint: String) -> Result<!> {
143143
let client = PubsubClient::new(pythnet_ws_endpoint.as_ref()).await?;
144144

145145
let config = RpcProgramAccountsConfig {
@@ -160,59 +160,54 @@ pub async fn run(store: Arc<State>, pythnet_ws_endpoint: String) -> Result<()> {
160160
.program_subscribe(&system_program::id(), Some(config))
161161
.await?;
162162

163-
loop {
164-
match notif.next().await {
165-
Some(update) => {
166-
let account: Account = match update.value.account.decode() {
167-
Some(account) => account,
168-
None => {
169-
tracing::error!(?update, "Failed to decode account from update.");
170-
continue;
171-
}
172-
};
173-
174-
let accumulator_messages = AccumulatorMessages::try_from_slice(&account.data);
175-
match accumulator_messages {
176-
Ok(accumulator_messages) => {
177-
let (candidate, _) = Pubkey::find_program_address(
178-
&[
179-
b"AccumulatorState",
180-
&accumulator_messages.ring_index().to_be_bytes(),
181-
],
182-
&system_program::id(),
183-
);
184-
185-
if candidate.to_string() == update.value.pubkey {
186-
let store = store.clone();
187-
tokio::spawn(async move {
188-
if let Err(err) = Aggregates::store_update(
189-
&*store,
190-
Update::AccumulatorMessages(accumulator_messages),
191-
)
192-
.await
193-
{
194-
tracing::error!(error = ?err, "Failed to store accumulator messages.");
195-
}
196-
});
197-
} else {
198-
tracing::error!(
199-
?candidate,
200-
?update.value.pubkey,
201-
"Failed to verify message public keys.",
202-
);
203-
}
204-
}
163+
while let Some(update) = notif.next().await {
164+
let account: Account = match update.value.account.decode() {
165+
Some(account) => account,
166+
None => {
167+
tracing::error!(?update, "Failed to decode account from update.");
168+
continue;
169+
}
170+
};
171+
172+
let accumulator_messages = AccumulatorMessages::try_from_slice(&account.data);
173+
match accumulator_messages {
174+
Ok(accumulator_messages) => {
175+
let (candidate, _) = Pubkey::find_program_address(
176+
&[
177+
b"AccumulatorState",
178+
&accumulator_messages.ring_index().to_be_bytes(),
179+
],
180+
&system_program::id(),
181+
);
205182

206-
Err(err) => {
207-
tracing::error!(error = ?err, "Failed to parse AccumulatorMessages.");
208-
}
209-
};
183+
if candidate.to_string() == update.value.pubkey {
184+
let store = store.clone();
185+
tokio::spawn(async move {
186+
if let Err(err) = Aggregates::store_update(
187+
&*store,
188+
Update::AccumulatorMessages(accumulator_messages),
189+
)
190+
.await
191+
{
192+
tracing::error!(error = ?err, "Failed to store accumulator messages.");
193+
}
194+
});
195+
} else {
196+
tracing::error!(
197+
?candidate,
198+
?update.value.pubkey,
199+
"Failed to verify message public keys.",
200+
);
201+
}
210202
}
211-
None => {
212-
return Err(anyhow!("Pythnet network listener terminated"));
203+
204+
Err(err) => {
205+
tracing::error!(error = ?err, "Failed to parse AccumulatorMessages.");
213206
}
214-
}
207+
};
215208
}
209+
210+
Err(anyhow!("Pythnet network listener connection terminated"))
216211
}
217212

218213
/// Fetch existing GuardianSet accounts from Wormhole.

apps/hermes/src/network/wormhole.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ use {
4949
Digest,
5050
Keccak256,
5151
},
52-
std::sync::Arc,
52+
std::{
53+
sync::Arc,
54+
time::Duration,
55+
},
56+
tokio::time::Instant,
5357
tonic::Request,
5458
wormhole_sdk::{
5559
vaa::{
@@ -158,10 +162,16 @@ mod proto {
158162
pub async fn spawn(opts: RunOptions, state: Arc<State>) -> Result<()> {
159163
let mut exit = crate::EXIT.subscribe();
160164
loop {
165+
let current_time = Instant::now();
161166
tokio::select! {
162167
_ = exit.changed() => break,
163168
Err(err) = run(opts.clone(), state.clone()) => {
164169
tracing::error!(error = ?err, "Wormhole gRPC service failed.");
170+
171+
if current_time.elapsed() < Duration::from_secs(30) {
172+
tracing::error!("Wormhole listener restarting too quickly. Sleep 1s.");
173+
tokio::time::sleep(Duration::from_secs(1)).await;
174+
}
165175
}
166176
}
167177
}
@@ -170,7 +180,7 @@ pub async fn spawn(opts: RunOptions, state: Arc<State>) -> Result<()> {
170180
}
171181

172182
#[tracing::instrument(skip(opts, state))]
173-
async fn run(opts: RunOptions, state: Arc<State>) -> Result<()> {
183+
async fn run(opts: RunOptions, state: Arc<State>) -> Result<!> {
174184
let mut client = SpyRpcServiceClient::connect(opts.wormhole.spy_rpc_addr).await?;
175185
let mut stream = client
176186
.subscribe_signed_vaa(Request::new(SubscribeSignedVaaRequest {
@@ -190,7 +200,7 @@ async fn run(opts: RunOptions, state: Arc<State>) -> Result<()> {
190200
}
191201
}
192202

193-
Ok(())
203+
Err(anyhow!("Wormhole gRPC stream terminated."))
194204
}
195205

196206
/// Process a message received via a Wormhole gRPC connection.

0 commit comments

Comments
 (0)