Skip to content

Commit 31b6696

Browse files
committed
lntest: properly handle shutdown error
This commit removes the panic used in checking the shutdown log. Instead, the error is returned and asserted in `shutdownAllNodes` so it's easier to check which node failed in which test. We also catch all the errors returned from `StopDaemon` call to properly access the shutdown behavior.
1 parent 73574d9 commit 31b6696

File tree

3 files changed

+56
-49
lines changed

3 files changed

+56
-49
lines changed

lntest/harness.go

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ func (h *HarnessTest) Subtest(t *testing.T) *HarnessTest {
332332
// Don't bother run the cleanups if the test is failed.
333333
if st.Failed() {
334334
st.Log("test failed, skipped cleanup")
335-
st.shutdownAllNodes()
335+
st.shutdownNodesNoAssert()
336336
return
337337
}
338338

@@ -402,16 +402,20 @@ func (h *HarnessTest) checkAndLimitBlocksMined(startHeight int32) {
402402
"50 blocks in one test")
403403
}
404404

405+
// shutdownNodesNoAssert will shutdown all running nodes without assertions.
406+
// This is used when the test has already failed, we don't want to log more
407+
// errors but focusing on the original error.
408+
func (h *HarnessTest) shutdownNodesNoAssert() {
409+
for _, node := range h.manager.activeNodes {
410+
_ = h.manager.shutdownNode(node)
411+
}
412+
}
413+
405414
// shutdownAllNodes will shutdown all running nodes.
406415
func (h *HarnessTest) shutdownAllNodes() {
416+
var err error
407417
for _, node := range h.manager.activeNodes {
408-
// The process may not be in a state to always shutdown
409-
// immediately, so we'll retry up to a hard limit to ensure we
410-
// eventually shutdown.
411-
err := wait.NoError(func() error {
412-
return h.manager.shutdownNode(node)
413-
}, DefaultTimeout)
414-
418+
err = h.manager.shutdownNode(node)
415419
if err == nil {
416420
continue
417421
}
@@ -421,6 +425,8 @@ func (h *HarnessTest) shutdownAllNodes() {
421425
// processes.
422426
h.Logf("unable to shutdown %s, got err: %v", node.Name(), err)
423427
}
428+
429+
require.NoError(h, err, "failed to shutdown all nodes")
424430
}
425431

426432
// cleanupStandbyNode is a function should be called with defer whenever a
@@ -516,12 +522,7 @@ func (h *HarnessTest) NewNodeWithCoins(name string,
516522

517523
// Shutdown shuts down the given node and asserts that no errors occur.
518524
func (h *HarnessTest) Shutdown(node *node.HarnessNode) {
519-
// The process may not be in a state to always shutdown immediately, so
520-
// we'll retry up to a hard limit to ensure we eventually shutdown.
521-
err := wait.NoError(func() error {
522-
return h.manager.shutdownNode(node)
523-
}, DefaultTimeout)
524-
525+
err := h.manager.shutdownNode(node)
525526
require.NoErrorf(h, err, "unable to shutdown %v in %v", node.Name(),
526527
h.manager.currentTestCase)
527528
}
@@ -764,9 +765,10 @@ func (h *HarnessTest) NewNodeRemoteSigner(name string, extraArgs []string,
764765

765766
// KillNode kills the node and waits for the node process to stop.
766767
func (h *HarnessTest) KillNode(hn *node.HarnessNode) {
768+
delete(h.manager.activeNodes, hn.Cfg.NodeID)
769+
767770
h.Logf("Manually killing the node %s", hn.Name())
768771
require.NoErrorf(h, hn.KillAndWait(), "%s: kill got error", hn.Name())
769-
delete(h.manager.activeNodes, hn.Cfg.NodeID)
770772
}
771773

772774
// SetFeeEstimate sets a fee rate to be returned from fee estimator.

lntest/harness_node_manager.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,14 @@ func (nm *nodeManager) registerNode(node *node.HarnessNode) {
112112
// ShutdownNode stops an active lnd process and returns when the process has
113113
// exited and any temporary directories have been cleaned up.
114114
func (nm *nodeManager) shutdownNode(node *node.HarnessNode) error {
115+
// Remove the node from the active nodes map even if the shutdown
116+
// fails as the shutdown cannot be retried in that case.
117+
delete(nm.activeNodes, node.Cfg.NodeID)
118+
115119
if err := node.Shutdown(); err != nil {
116120
return err
117121
}
118122

119-
delete(nm.activeNodes, node.Cfg.NodeID)
120123
return nil
121124
}
122125

lntest/node/harness_node.go

Lines changed: 35 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -636,12 +636,11 @@ func (hn *HarnessNode) cleanup() error {
636636
// waitForProcessExit Launch a new goroutine which that bubbles up any
637637
// potential fatal process errors to the goroutine running the tests.
638638
func (hn *HarnessNode) WaitForProcessExit() error {
639-
var err error
639+
var errReturned error
640640

641641
errChan := make(chan error, 1)
642642
go func() {
643-
err = hn.cmd.Wait()
644-
errChan <- err
643+
errChan <- hn.cmd.Wait()
645644
}()
646645

647646
select {
@@ -656,24 +655,36 @@ func (hn *HarnessNode) WaitForProcessExit() error {
656655
return nil
657656
}
658657

658+
// The process may have already been killed in the test, in
659+
// that case we will skip the error and continue processing
660+
// the logs.
661+
if strings.Contains(err.Error(), "signal: killed") {
662+
break
663+
}
664+
659665
// Otherwise, we print the error, break the select and save
660666
// logs.
661667
hn.printErrf("wait process exit got err: %v", err)
662-
663-
break
668+
errReturned = err
664669

665670
case <-time.After(wait.DefaultTimeout):
666671
hn.printErrf("timeout waiting for process to exit")
667672
}
668673

669674
// Make sure log file is closed and renamed if necessary.
670-
finalizeLogfile(hn)
675+
filename := finalizeLogfile(hn)
671676

672-
// Rename the etcd.log file if the node was running on embedded
673-
// etcd.
677+
// Assert the node has shut down from the log file.
678+
err1 := assertNodeShutdown(filename)
679+
if err1 != nil {
680+
return fmt.Errorf("[%s]: assert shutdown failed in log[%s]: %w",
681+
hn.Name(), filename, err1)
682+
}
683+
684+
// Rename the etcd.log file if the node was running on embedded etcd.
674685
finalizeEtcdLog(hn)
675686

676-
return err
687+
return errReturned
677688
}
678689

679690
// Stop attempts to stop the active lnd process.
@@ -700,30 +711,29 @@ func (hn *HarnessNode) Stop() error {
700711

701712
err := wait.NoError(func() error {
702713
_, err := hn.RPC.LN.StopDaemon(ctxt, &req)
703-
704-
switch {
705-
case err == nil:
714+
if err == nil {
706715
return nil
716+
}
707717

708-
// Try again if a recovery/rescan is in progress.
709-
case strings.Contains(
710-
err.Error(), "recovery in progress",
711-
):
712-
return err
713-
714-
default:
718+
// If the connection is already closed, we can exit
719+
// early as the node has already been shut down in the
720+
// test, e.g., in etcd leader health check test.
721+
if strings.Contains(err.Error(), "connection refused") {
715722
return nil
716723
}
724+
725+
return err
717726
}, wait.DefaultTimeout)
718727
if err != nil {
719-
return err
728+
return fmt.Errorf("shutdown timeout: %w", err)
720729
}
721730

722731
// Wait for goroutines to be finished.
723732
done := make(chan struct{})
724733
go func() {
725734
hn.Watcher.wg.Wait()
726735
close(done)
736+
hn.Watcher = nil
727737
}()
728738

729739
// If the goroutines fail to finish before timeout, we'll print
@@ -966,31 +976,23 @@ func getFinalizedLogFilePrefix(hn *HarnessNode) string {
966976

967977
// finalizeLogfile makes sure the log file cleanup function is initialized,
968978
// even if no log file is created.
969-
func finalizeLogfile(hn *HarnessNode) {
979+
func finalizeLogfile(hn *HarnessNode) string {
970980
// Exit early if there's no log file.
971981
if hn.logFile == nil {
972-
return
982+
return ""
973983
}
974984

975985
hn.logFile.Close()
976986

977987
// If logoutput flag is not set, return early.
978988
if !*logOutput {
979-
return
989+
return ""
980990
}
981991

982-
newFileName := fmt.Sprintf("%v.log",
983-
getFinalizedLogFilePrefix(hn),
984-
)
992+
newFileName := fmt.Sprintf("%v.log", getFinalizedLogFilePrefix(hn))
985993
renameFile(hn.filename, newFileName)
986994

987-
// Assert the node has shut down from the log file.
988-
err := assertNodeShutdown(newFileName)
989-
if err != nil {
990-
err := fmt.Errorf("[%s]: assert shutdown failed in log[%s]: %w",
991-
hn.Name(), newFileName, err)
992-
panic(err)
993-
}
995+
return newFileName
994996
}
995997

996998
// assertNodeShutdown asserts that the node has shut down properly by checking

0 commit comments

Comments
 (0)