diff --git a/tests/e2e/faultinjection/duplicate_node_id.go b/tests/e2e/faultinjection/duplicate_node_id.go index 7e6489160e4c..c2021d710491 100644 --- a/tests/e2e/faultinjection/duplicate_node_id.go +++ b/tests/e2e/faultinjection/duplicate_node_id.go @@ -44,7 +44,7 @@ var _ = ginkgo.Describe("Duplicate node handling", func() { _ = e2e.AddEphemeralNode(tc, network, node2) tc.By("checking that the second new node fails to become healthy before timeout") - err := tmpnet.WaitForHealthy(tc.DefaultContext(), node2) + err := node2.WaitForHealthy(tc.DefaultContext()) require.ErrorIs(err, context.DeadlineExceeded) tc.By("stopping the first new node") diff --git a/tests/fixture/e2e/helpers.go b/tests/fixture/e2e/helpers.go index ac1e97d32915..b954f99d73fd 100644 --- a/tests/fixture/e2e/helpers.go +++ b/tests/fixture/e2e/helpers.go @@ -154,7 +154,7 @@ func WaitForHealthy(t require.TestingT, node *tmpnet.Node) { // Need to use explicit context (vs DefaultContext()) to support use with DeferCleanup ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout) defer cancel() - require.NoError(t, tmpnet.WaitForHealthy(ctx, node)) + require.NoError(t, node.WaitForHealthy(ctx)) } // Sends an eth transaction and waits for the transaction receipt from the @@ -248,7 +248,7 @@ func CheckBootstrapIsPossible(tc tests.TestContext, network *tmpnet.Network) *tm }) // Check that the node becomes healthy within timeout - require.NoError(tmpnet.WaitForHealthy(tc.DefaultContext(), node)) + require.NoError(node.WaitForHealthy(tc.DefaultContext())) // Ensure that the primary validators are still healthy for _, node := range network.Nodes { diff --git a/tests/fixture/tmpnet/network.go b/tests/fixture/tmpnet/network.go index 6c01dd548958..4bf234b63824 100644 --- a/tests/fixture/tmpnet/network.go +++ b/tests/fixture/tmpnet/network.go @@ -507,7 +507,7 @@ func (n *Network) RestartNode(ctx context.Context, node *Node) error { n.log.Info("waiting for node to report healthy", zap.Stringer("nodeID", node.NodeID), ) - return WaitForHealthy(ctx, node) + return node.WaitForHealthy(ctx) } // Stops all nodes in the network. diff --git a/tests/fixture/tmpnet/node.go b/tests/fixture/tmpnet/node.go index 64844946a4ae..f48affdf56cc 100644 --- a/tests/fixture/tmpnet/node.go +++ b/tests/fixture/tmpnet/node.go @@ -6,6 +6,7 @@ package tmpnet import ( "context" "encoding/base64" + "errors" "fmt" "io" "net" @@ -14,6 +15,8 @@ import ( "strings" "time" + "go.uber.org/zap" + "github.com/ava-labs/avalanchego/config" "github.com/ava-labs/avalanchego/ids" "github.com/ava-labs/avalanchego/staking" @@ -335,3 +338,34 @@ func (n *Node) SaveAPIPort() error { n.Flags[config.HTTPPortKey] = port return nil } + +// WaitForHealthy blocks until node health is true or an error (including context timeout) is observed. +func (n *Node) WaitForHealthy(ctx context.Context) error { + if _, ok := ctx.Deadline(); !ok { + return fmt.Errorf("unable to wait for health for node %q with a context without a deadline", n.NodeID) + } + ticker := time.NewTicker(DefaultNodeTickerInterval) + defer ticker.Stop() + + for { + healthy, err := n.IsHealthy(ctx) + switch { + case errors.Is(err, ErrUnrecoverableNodeHealthCheck): + return fmt.Errorf("%w for node %q", err, n.NodeID) + case err != nil: + n.network.log.Verbo("failed to query node health", + zap.Stringer("nodeID", n.NodeID), + zap.Error(err), + ) + continue + case healthy: + return nil + } + + select { + case <-ctx.Done(): + return fmt.Errorf("failed to wait for health of node %q before timeout: %w", n.NodeID, ctx.Err()) + case <-ticker.C: + } + } +} diff --git a/tests/fixture/tmpnet/utils.go b/tests/fixture/tmpnet/utils.go index 29b36af1330e..e02fa9f618c3 100644 --- a/tests/fixture/tmpnet/utils.go +++ b/tests/fixture/tmpnet/utils.go @@ -47,35 +47,6 @@ func CheckNodeHealth(ctx context.Context, uri string) (*health.APIReply, error) return nil, fmt.Errorf("%w: %w", ErrUnrecoverableNodeHealthCheck, err) } -// WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed. -func WaitForHealthy(ctx context.Context, node *Node) error { - if _, ok := ctx.Deadline(); !ok { - return fmt.Errorf("unable to wait for health for node %q with a context without a deadline", node.NodeID) - } - ticker := time.NewTicker(DefaultNodeTickerInterval) - defer ticker.Stop() - - for { - healthy, err := node.IsHealthy(ctx) - switch { - case errors.Is(err, ErrUnrecoverableNodeHealthCheck): - return fmt.Errorf("%w for node %q", err, node.NodeID) - case err != nil: - // Error is recoverable - // TODO(marun) Log the error to aid in troubleshooting once a logger is available - continue - case healthy: - return nil - } - - select { - case <-ctx.Done(): - return fmt.Errorf("failed to wait for health of node %q before timeout: %w", node.NodeID, ctx.Err()) - case <-ticker.C: - } - } -} - // NodeURI associates a node ID with its API URI. type NodeURI struct { NodeID ids.NodeID