Skip to content

[tmpnet] Move WaitForHealthy from a function to a tmpnet.Node method #3896

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/e2e/faultinjection/duplicate_node_id.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ var _ = ginkgo.Describe("Duplicate node handling", func() {
_ = e2e.AddEphemeralNode(tc, network, node2)

tc.By("checking that the second new node fails to become healthy before timeout")
err := tmpnet.WaitForHealthy(tc.DefaultContext(), node2)
err := node2.WaitForHealthy(tc.DefaultContext())
require.ErrorIs(err, context.DeadlineExceeded)

tc.By("stopping the first new node")
Expand Down
4 changes: 2 additions & 2 deletions tests/fixture/e2e/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func WaitForHealthy(t require.TestingT, node *tmpnet.Node) {
// Need to use explicit context (vs DefaultContext()) to support use with DeferCleanup
ctx, cancel := context.WithTimeout(context.Background(), DefaultTimeout)
defer cancel()
require.NoError(t, tmpnet.WaitForHealthy(ctx, node))
require.NoError(t, node.WaitForHealthy(ctx))
}

// Sends an eth transaction and waits for the transaction receipt from the
Expand Down Expand Up @@ -248,7 +248,7 @@ func CheckBootstrapIsPossible(tc tests.TestContext, network *tmpnet.Network) *tm
})

// Check that the node becomes healthy within timeout
require.NoError(tmpnet.WaitForHealthy(tc.DefaultContext(), node))
require.NoError(node.WaitForHealthy(tc.DefaultContext()))

// Ensure that the primary validators are still healthy
for _, node := range network.Nodes {
Expand Down
2 changes: 1 addition & 1 deletion tests/fixture/tmpnet/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,7 @@ func (n *Network) RestartNode(ctx context.Context, node *Node) error {
n.log.Info("waiting for node to report healthy",
zap.Stringer("nodeID", node.NodeID),
)
return WaitForHealthy(ctx, node)
return node.WaitForHealthy(ctx)
}

// Stops all nodes in the network.
Expand Down
34 changes: 34 additions & 0 deletions tests/fixture/tmpnet/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package tmpnet
import (
"context"
"encoding/base64"
"errors"
"fmt"
"io"
"net"
Expand All @@ -14,6 +15,8 @@ import (
"strings"
"time"

"go.uber.org/zap"

"github.com/ava-labs/avalanchego/config"
"github.com/ava-labs/avalanchego/ids"
"github.com/ava-labs/avalanchego/staking"
Expand Down Expand Up @@ -335,3 +338,34 @@ func (n *Node) SaveAPIPort() error {
n.Flags[config.HTTPPortKey] = port
return nil
}

// WaitForHealthy blocks until node health is true or an error (including context timeout) is observed.
func (n *Node) WaitForHealthy(ctx context.Context) error {
if _, ok := ctx.Deadline(); !ok {
return fmt.Errorf("unable to wait for health for node %q with a context without a deadline", n.NodeID)
}
ticker := time.NewTicker(DefaultNodeTickerInterval)
defer ticker.Stop()

for {
healthy, err := n.IsHealthy(ctx)
switch {
case errors.Is(err, ErrUnrecoverableNodeHealthCheck):
return fmt.Errorf("%w for node %q", err, n.NodeID)
case err != nil:
n.network.log.Verbo("failed to query node health",
zap.Stringer("nodeID", n.NodeID),
zap.Error(err),
)
continue
case healthy:
return nil
}

select {
case <-ctx.Done():
return fmt.Errorf("failed to wait for health of node %q before timeout: %w", n.NodeID, ctx.Err())
case <-ticker.C:
}
}
}
29 changes: 0 additions & 29 deletions tests/fixture/tmpnet/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,35 +47,6 @@ func CheckNodeHealth(ctx context.Context, uri string) (*health.APIReply, error)
return nil, fmt.Errorf("%w: %w", ErrUnrecoverableNodeHealthCheck, err)
}

// WaitForHealthy blocks until Node.IsHealthy returns true or an error (including context timeout) is observed.
func WaitForHealthy(ctx context.Context, node *Node) error {
if _, ok := ctx.Deadline(); !ok {
return fmt.Errorf("unable to wait for health for node %q with a context without a deadline", node.NodeID)
}
ticker := time.NewTicker(DefaultNodeTickerInterval)
defer ticker.Stop()

for {
healthy, err := node.IsHealthy(ctx)
switch {
case errors.Is(err, ErrUnrecoverableNodeHealthCheck):
return fmt.Errorf("%w for node %q", err, node.NodeID)
case err != nil:
// Error is recoverable
// TODO(marun) Log the error to aid in troubleshooting once a logger is available
continue
case healthy:
return nil
}

select {
case <-ctx.Done():
return fmt.Errorf("failed to wait for health of node %q before timeout: %w", node.NodeID, ctx.Err())
case <-ticker.C:
}
}
}

// NodeURI associates a node ID with its API URI.
type NodeURI struct {
NodeID ids.NodeID
Expand Down