Skip to content

Commit 11ce72b

Browse files
🌱 Improve Machine remediation logs (#11692)
* Improve Machine remediation logs * Address comments
1 parent 6979dbe commit 11ce72b

File tree

2 files changed

+8
-15
lines changed

2 files changed

+8
-15
lines changed

internal/controllers/machinehealthcheck/machinehealthcheck_controller.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -428,10 +428,11 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
428428
// mark for remediation
429429
errList := []error{}
430430
for _, t := range unhealthy {
431+
logger := logger.WithValues("Machine", klog.KObj(t.Machine), "Node", klog.KObj(t.Node))
431432
condition := conditions.Get(t.Machine, clusterv1.MachineHealthCheckSucceededCondition)
432433

433434
if annotations.IsPaused(cluster, t.Machine) {
434-
logger.Info("Machine has failed health check, but machine is paused so skipping remediation", "target", t.string(), "reason", condition.Reason, "message", condition.Message)
435+
logger.Info("Machine has failed health check, but machine is paused so skipping remediation", "reason", condition.Reason, "message", condition.Message)
435436
} else {
436437
if m.Spec.RemediationTemplate != nil {
437438
// If external remediation request already exists,
@@ -482,7 +483,7 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
482483
// the same Machine, users are in charge of setting health checks and remediation properly.
483484
to.SetName(t.Machine.Name)
484485

485-
logger.Info("Target has failed health check, creating an external remediation request", "remediation request name", to.GetName(), "target", t.string(), "reason", condition.Reason, "message", condition.Message)
486+
logger.Info("Machine has failed health check, creating an external remediation request", "remediation request name", to.GetName(), "reason", condition.Reason, "message", condition.Message)
486487
// Create the external clone.
487488
if err := r.Client.Create(ctx, to); err != nil {
488489
conditions.MarkFalse(m, clusterv1.ExternalRemediationRequestAvailableCondition, clusterv1.ExternalRemediationRequestCreationFailedReason, clusterv1.ConditionSeverityError, err.Error())
@@ -503,7 +504,7 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
503504
Reason: clusterv1.MachineExternallyRemediatedWaitingForRemediationV1Beta2Reason,
504505
})
505506
} else if t.Machine.DeletionTimestamp.IsZero() { // Only setting the OwnerRemediated conditions when machine is not already in deletion.
506-
logger.Info("Target has failed health check, marking for remediation", "target", t.string(), "reason", condition.Reason, "message", condition.Message)
507+
logger.Info("Machine has failed health check, marking for remediation", "reason", condition.Reason, "message", condition.Message)
507508
// NOTE: MHC is responsible for creating MachineOwnerRemediatedCondition if missing or to trigger another remediation if the previous one is completed;
508509
// instead, if a remediation is in already progress, the remediation owner is responsible for completing the process and MHC should not overwrite the condition.
509510
if !conditions.Has(t.Machine, clusterv1.MachineOwnerRemediatedCondition) || conditions.IsTrue(t.Machine, clusterv1.MachineOwnerRemediatedCondition) {
@@ -540,8 +541,9 @@ func (r *Reconciler) patchUnhealthyTargets(ctx context.Context, logger logr.Logg
540541
t.Machine,
541542
corev1.EventTypeNormal,
542543
EventMachineMarkedUnhealthy,
543-
"Machine %v has been marked as unhealthy",
544-
t.string(),
544+
"Machine %s has been marked as unhealthy by %s",
545+
klog.KObj(t.Machine),
546+
klog.KObj(t.MHC),
545547
)
546548
}
547549
return errList

internal/controllers/machinehealthcheck/machinehealthcheck_targets.go

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,6 @@ type healthCheckTarget struct {
6464
nodeMissing bool
6565
}
6666

67-
func (t *healthCheckTarget) string() string {
68-
return fmt.Sprintf("%s/%s/%s/%s",
69-
t.MHC.GetNamespace(),
70-
t.MHC.GetName(),
71-
t.Machine.GetName(),
72-
t.nodeName(),
73-
)
74-
}
75-
7667
// Get the node name if the target has a node.
7768
func (t *healthCheckTarget) nodeName() string {
7869
if t.Node != nil {
@@ -331,7 +322,7 @@ func (r *Reconciler) healthCheckTargets(targets []healthCheckTarget, logger logr
331322
var healthy []healthCheckTarget
332323

333324
for _, t := range targets {
334-
logger := logger.WithValues("target", t.string())
325+
logger := logger.WithValues("Machine", klog.KObj(t.Machine), "Node", klog.KObj(t.Node))
335326
logger.V(3).Info("Health checking target")
336327
needsRemediation, nextCheck := t.needsRemediation(logger, timeoutForMachineToHaveNode)
337328

0 commit comments

Comments
 (0)