diff --git a/pkg/ebpf/tracee.go b/pkg/ebpf/tracee.go index 0d0f45b1c6a0..177620921cfe 100644 --- a/pkg/ebpf/tracee.go +++ b/pkg/ebpf/tracee.go @@ -130,6 +130,8 @@ type Tracee struct { // This does not mean they are required for tracee to function. // TODO: remove this in favor of dependency manager nodes requiredKsyms []string + // All possible tailcall map names from all Core events (computed once at startup) + allTailCallMapNames map[string]struct{} } func (t *Tracee) Stats() *metrics.Stats { @@ -235,24 +237,28 @@ func New(cfg config.Config) (*Tracee, error) { // Create Tracee t := &Tracee{ - config: cfg, - done: make(chan struct{}), - stats: metrics.NewStats(), - writtenFiles: make(map[string]string), - readFiles: make(map[string]string), - capturedFiles: make(map[string]int64), - streamsManager: streams.NewStreamsManager(), - policyManager: pm, - eventsDependencies: depsManager, - requiredKsyms: []string{}, - extraProbes: make(map[string]*probes.ProbeGroup), - dataTypeDecoder: bufferdecoder.NewTypeDecoder(), + config: cfg, + done: make(chan struct{}), + stats: metrics.NewStats(), + writtenFiles: make(map[string]string), + readFiles: make(map[string]string), + capturedFiles: make(map[string]int64), + streamsManager: streams.NewStreamsManager(), + policyManager: pm, + eventsDependencies: depsManager, + requiredKsyms: []string{}, + extraProbes: make(map[string]*probes.ProbeGroup), + dataTypeDecoder: bufferdecoder.NewTypeDecoder(), + allTailCallMapNames: make(map[string]struct{}), } // clear initial policies to avoid wrong references initialPolicies = nil t.config.InitialPolicies = nil + // Initialize list of all possible tailcall map names from Core events + t.initAllTailCallMapNames() + // Add/Drop capabilities to/from the Base ring (always effective) capsToAdd, err := capabilities.ReqByString(t.config.Capabilities.AddCaps...) @@ -593,6 +599,114 @@ func (t *Tracee) initTailCall(tailCall events.TailCall) error { return nil } +// initAllTailCallMapNames initializes the list of all possible tailcall map names +// by iterating through all Core event definitions and extracting their tailcall dependencies. +// This is called once during Tracee initialization to avoid repeated computation. +func (t *Tracee) initAllTailCallMapNames() { + for _, eventDefinition := range events.Core.GetDefinitions() { + deps := eventDefinition.GetDependencies() + primaryDeps := deps.GetPrimaryDependencies() + tailCalls := primaryDeps.GetTailCalls() + for _, tailCall := range tailCalls { + t.allTailCallMapNames[tailCall.GetMapName()] = struct{}{} + } + + // Also check fallback dependencies for their tailcalls + fallbackDeps := deps.GetFallbackDependencies() + for _, fallback := range fallbackDeps { + tailCalls := fallback.GetTailCalls() + for _, tailCall := range tailCalls { + t.allTailCallMapNames[tailCall.GetMapName()] = struct{}{} + } + } + } +} + +// rebuildAllTailCalls rebuilds all tailcall mappings based on the current dependency state. +// This method clears existing tailcall maps and then repopulates them with current dependencies. +// It's called during initial setup and whenever the dependency state changes (fallbacks, event additions/removals). +func (t *Tracee) rebuildAllTailCalls() error { + err := t.clearAllTailCallMaps() + if err != nil { + return errfmt.Errorf("failed to clear tailcall maps: %v", err) + } + + err = t.buildAllTailCallMaps() + if err != nil { + return errfmt.Errorf("failed to build tailcall maps: %v", err) + } + + return nil +} + +// clearAllTailCallMaps clears all BPF maps that are used for tailcalls. +// It iterates through all BPF maps using the module iterator and clears any that are used for tailcalls. +func (t *Tracee) clearAllTailCallMaps() error { + // Iterate through all BPF maps and clear the ones used for tailcalls + iterator := t.bpfModule.Iterator() + for bpfMap := iterator.NextMap(); bpfMap != nil; bpfMap = iterator.NextMap() { + mapName := bpfMap.Name() + + // Check if this map is used for tailcalls (using pre-computed list from all Core events) + if _, isTailCallMap := t.allTailCallMapNames[mapName]; isTailCallMap { + err := t.clearTailCallMap(bpfMap) + if err != nil { + return errfmt.Errorf("failed to clear tailcall map %s: %v", mapName, err) + } + } + } + + return nil +} + +// buildAllTailCallMaps builds all tailcall mappings based on current event dependencies. +func (t *Tracee) buildAllTailCallMaps() error { + for _, eventID := range t.eventsDependencies.GetEvents() { + depsNode, err := t.eventsDependencies.GetEvent(eventID) + if err != nil { + return errfmt.Errorf("failed to get event dependencies for %v: %v", eventID, err) + } + deps := depsNode.GetDependencies() + tailCalls := deps.GetTailCalls() + for _, tailCall := range tailCalls { + err := t.initTailCall(tailCall) + if err != nil { + return errfmt.Errorf("failed to initialize tail call: %v", err) + } + } + } + + return nil +} + +// clearTailCallMap clears all entries in the specified tailcall map. +// It uses map key iteration to only clear existing entries, which is more efficient +// than clearing all possible indexes. +func (t *Tracee) clearTailCallMap(bpfMap *bpf.BPFMap) error { + iterator := bpfMap.Iterator() + + // Collect all keys first to avoid iterator invalidation during deletion + var keysToDelete []uint32 + for iterator.Next() { + keyBytes := iterator.Key() + if len(keyBytes) >= 4 { // uint32 is 4 bytes + key := *(*uint32)(unsafe.Pointer(&keyBytes[0])) + keysToDelete = append(keysToDelete, key) + } + } + + // Now delete all the collected keys + for _, key := range keysToDelete { + err := bpfMap.DeleteKey(unsafe.Pointer(&key)) + if err != nil { + // Log but don't fail on individual delete errors + logger.Debugw("Failed to delete tailcall map entry", "map", bpfMap.Name(), "key", key, "error", err) + } + } + + return nil +} + // initDerivationTable initializes tracee's events.DerivationTable. For each // event, represented through its ID, we declare to which other events it can be // derived and the corresponding function to derive into that Event. @@ -1267,22 +1381,9 @@ func (t *Tracee) populateBPFMaps() error { } // Initialize tail call dependencies - // TODO: Tail calls are not updated upon events changes in the dependency manager. - // Hence, upon events addition, fallbacks or removal, tail calls will not be updated. - // This should be fixed dynamically in the future. - for _, eventID := range t.eventsDependencies.GetEvents() { - depsNode, err := t.eventsDependencies.GetEvent(eventID) - if err != nil { - return errfmt.Errorf("failed to get event dependencies: %v", err) - } - deps := depsNode.GetDependencies() - tailCalls := deps.GetTailCalls() - for _, tailCall := range tailCalls { - err := t.initTailCall(tailCall) - if err != nil { - return errfmt.Errorf("failed to initialize tail call: %v", err) - } - } + err = t.buildAllTailCallMaps() + if err != nil { + return errfmt.WrapError(err) } return nil @@ -1453,6 +1554,17 @@ func (t *Tracee) initBPF() error { // collector to free the BPF object t.config.BPFObjBytes = nil + // Register state change watcher for tailcall rebuilding + // This ensures tailcalls are updated whenever the dependency tree changes + t.eventsDependencies.SubscribeStateChange(func() { + // Note: This is called within the dependency manager's mutex, + // so we should avoid any operations that might deadlock + err := t.rebuildAllTailCalls() + if err != nil { + logger.Errorw("Failed to rebuild tailcalls after dependency change", "error", err) + } + }) + // Populate eBPF maps with initial data err = t.populateBPFMaps() diff --git a/pkg/events/dependencies/manager.go b/pkg/events/dependencies/manager.go index e0626fd78b22..313581767def 100644 --- a/pkg/events/dependencies/manager.go +++ b/pkg/events/dependencies/manager.go @@ -32,6 +32,7 @@ type Manager struct { probes map[probes.Handle]*ProbeNode onAdd map[NodeType][]func(node interface{}) []Action onRemove map[NodeType][]func(node interface{}) []Action + onStateChanged []func() // Watchers called when manager state changes dependenciesGetter func(events.ID) events.DependencyStrategy // Track failed probes and events to prevent issues such as incorrect fallback handling, // duplicate processing, or inconsistent state when dependencies are shared between events. @@ -48,6 +49,7 @@ func NewDependenciesManager(dependenciesGetter func(events.ID) events.Dependency probes: make(map[probes.Handle]*ProbeNode), onAdd: make(map[NodeType][]func(node interface{}) []Action), onRemove: make(map[NodeType][]func(node interface{}) []Action), + onStateChanged: make([]func(), 0), dependenciesGetter: dependenciesGetter, failedProbes: make(map[probes.Handle]struct{}), failedEvents: make(map[events.ID]struct{}), @@ -73,6 +75,17 @@ func (m *Manager) SubscribeRemove(subscribeType NodeType, onRemove func(node int m.onRemove[subscribeType] = append([]func(node interface{}) []Action{onRemove}, m.onRemove[subscribeType]...) } +// SubscribeStateChange adds a watcher function called when the manager's dependency tree changes. +// State change watchers are called in the order of their subscription. +// They are invoked only when nodes are actually added/removed from the tree or when fallbacks are applied. +// Changes to the "explicitly selected" status of existing nodes do not trigger these watchers. +func (m *Manager) SubscribeStateChange(onStateChanged func()) { + m.mu.Lock() + defer m.mu.Unlock() + + m.onStateChanged = append(m.onStateChanged, onStateChanged) +} + // GetEvent returns the dependencies of the given event. func (m *Manager) GetEvent(id events.ID) (*EventNode, error) { m.mu.RLock() @@ -105,7 +118,19 @@ func (m *Manager) SelectEvent(id events.ID) (*EventNode, error) { m.mu.Lock() defer m.mu.Unlock() - return m.buildEvent(id, nil) + existingNode := m.getEventNode(id) + + node, err := m.buildEvent(id, nil) + if err != nil { + return node, err + } + + // Only trigger state change if this was a new node added to the tree + if existingNode == nil { + m.triggerStateChanged() + } + + return node, err } // UnselectEvent marks the event as not explicitly selected. @@ -120,8 +145,15 @@ func (m *Manager) UnselectEvent(id events.ID) bool { if node == nil { return false } + node.unmarkAsExplicitlySelected() removed := m.cleanUnreferencedEventNode(node) + + // Only trigger state change if the node was actually removed from the tree + if removed { + m.triggerStateChanged() + } + return removed } @@ -136,7 +168,12 @@ func (m *Manager) RemoveEvent(id events.ID) error { m.mu.Lock() defer m.mu.Unlock() - return m.removeEvent(id) + err := m.removeEvent(id) + if err == nil { + m.triggerStateChanged() + } + + return err } // removeEvent removes the given event from the tree. @@ -398,6 +435,14 @@ func (m *Manager) triggerOnRemove(node interface{}) { } } +// triggerStateChanged triggers all state change watchers. +// This is called when the manager's state has actually changed. +func (m *Manager) triggerStateChanged() { + for _, onStateChanged := range m.onStateChanged { + onStateChanged() + } +} + func getNodeType(node interface{}) (NodeType, error) { switch node.(type) { case *EventNode: @@ -500,7 +545,15 @@ func (m *Manager) FailEvent(id events.ID) (bool, error) { m.mu.Lock() defer m.mu.Unlock() - return m.failEvent(id) + removed, err := m.failEvent(id) + // Always trigger state change on successful FailEvent since either: + // 1. Event was removed (state changed), or + // 2. Fallback was applied (dependencies changed, state changed) + if err == nil { + m.triggerStateChanged() + } + + return removed, err } // failEvent attempts to switch the given event dependencies to its next available fallback ones. @@ -589,6 +642,7 @@ func (m *Manager) FailProbe(handle probes.Handle) error { m.failedProbes[handle] = struct{}{} m.removeProbe(handle) + m.triggerStateChanged() return nil }