Skip to content

Commit f26d043

Browse files
committed
audit: improve audit queue handling when "audit=1" on cmdline
When an admin enables audit at early boot via the "audit=1" kernel command line the audit queue behavior is slightly different; the audit subsystem goes to greater lengths to avoid dropping records, which unfortunately can result in problems when the audit daemon is forcibly stopped for an extended period of time. This patch makes a number of changes designed to improve the audit queuing behavior so that leaving the audit daemon in a stopped state for an extended period does not cause a significant impact to the system. - kauditd_send_queue() is now limited to looping through the passed queue only once per call. This not only prevents the function from looping indefinitely when records are returned to the current queue, it also allows any recovery handling in kauditd_thread() to take place when kauditd_send_queue() returns. - Transient netlink send errors seen as -EAGAIN now cause the record to be returned to the retry queue instead of going to the hold queue. The intention of the hold queue is to store, perhaps for an extended period of time, the events which led up to the audit daemon going offline. The retry queue remains a temporary queue intended to protect against transient issues between the kernel and the audit daemon. - The retry queue is now limited by the audit_backlog_limit setting, the same as the other queues. This allows admins to bound the size of all of the audit queues on the system. - kauditd_rehold_skb() now returns records to the end of the hold queue to ensure ordering is preserved in the face of recent changes to kauditd_send_queue(). Cc: stable@vger.kernel.org Fixes: 5b52330 ("audit: fix auditd/kernel connection state tracking") Fixes: f4b3ee3 ("audit: improve robustness of the audit queue handling") Reported-by: Gaosheng Cui <cuigaosheng1@huawei.com> Tested-by: Gaosheng Cui <cuigaosheng1@huawei.com> Reviewed-by: Richard Guy Briggs <rgb@redhat.com> Signed-off-by: Paul Moore <paul@paul-moore.com>
1 parent e783362 commit f26d043

File tree

1 file changed

+43
-19
lines changed

1 file changed

+43
-19
lines changed

kernel/audit.c

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk_buff *skb)
541541
/**
542542
* kauditd_rehold_skb - Handle a audit record send failure in the hold queue
543543
* @skb: audit record
544+
* @error: error code (unused)
544545
*
545546
* Description:
546547
* This should only be used by the kauditd_thread when it fails to flush the
547548
* hold queue.
548549
*/
549-
static void kauditd_rehold_skb(struct sk_buff *skb)
550+
static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error)
550551
{
551-
/* put the record back in the queue at the same place */
552-
skb_queue_head(&audit_hold_queue, skb);
552+
/* put the record back in the queue */
553+
skb_queue_tail(&audit_hold_queue, skb);
553554
}
554555

555556
/**
556557
* kauditd_hold_skb - Queue an audit record, waiting for auditd
557558
* @skb: audit record
559+
* @error: error code
558560
*
559561
* Description:
560562
* Queue the audit record, waiting for an instance of auditd. When this
@@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb)
564566
* and queue it, if we have room. If we want to hold on to the record, but we
565567
* don't have room, record a record lost message.
566568
*/
567-
static void kauditd_hold_skb(struct sk_buff *skb)
569+
static void kauditd_hold_skb(struct sk_buff *skb, int error)
568570
{
569571
/* at this point it is uncertain if we will ever send this to auditd so
570572
* try to send the message via printk before we go any further */
571573
kauditd_printk_skb(skb);
572574

573575
/* can we just silently drop the message? */
574-
if (!audit_default) {
575-
kfree_skb(skb);
576-
return;
576+
if (!audit_default)
577+
goto drop;
578+
579+
/* the hold queue is only for when the daemon goes away completely,
580+
* not -EAGAIN failures; if we are in a -EAGAIN state requeue the
581+
* record on the retry queue unless it's full, in which case drop it
582+
*/
583+
if (error == -EAGAIN) {
584+
if (!audit_backlog_limit ||
585+
skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
586+
skb_queue_tail(&audit_retry_queue, skb);
587+
return;
588+
}
589+
audit_log_lost("kauditd retry queue overflow");
590+
goto drop;
577591
}
578592

579-
/* if we have room, queue the message */
593+
/* if we have room in the hold queue, queue the message */
580594
if (!audit_backlog_limit ||
581595
skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
582596
skb_queue_tail(&audit_hold_queue, skb);
@@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_buff *skb)
585599

586600
/* we have no other options - drop the message */
587601
audit_log_lost("kauditd hold queue overflow");
602+
drop:
588603
kfree_skb(skb);
589604
}
590605

591606
/**
592607
* kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
593608
* @skb: audit record
609+
* @error: error code (unused)
594610
*
595611
* Description:
596612
* Not as serious as kauditd_hold_skb() as we still have a connected auditd,
597613
* but for some reason we are having problems sending it audit records so
598614
* queue the given record and attempt to resend.
599615
*/
600-
static void kauditd_retry_skb(struct sk_buff *skb)
616+
static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error)
601617
{
602-
/* NOTE: because records should only live in the retry queue for a
603-
* short period of time, before either being sent or moved to the hold
604-
* queue, we don't currently enforce a limit on this queue */
605-
skb_queue_tail(&audit_retry_queue, skb);
618+
if (!audit_backlog_limit ||
619+
skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
620+
skb_queue_tail(&audit_retry_queue, skb);
621+
return;
622+
}
623+
624+
/* we have to drop the record, send it via printk as a last effort */
625+
kauditd_printk_skb(skb);
626+
audit_log_lost("kauditd retry queue overflow");
627+
kfree_skb(skb);
606628
}
607629

608630
/**
@@ -640,7 +662,7 @@ static void auditd_reset(const struct auditd_connection *ac)
640662
/* flush the retry queue to the hold queue, but don't touch the main
641663
* queue since we need to process that normally for multicast */
642664
while ((skb = skb_dequeue(&audit_retry_queue)))
643-
kauditd_hold_skb(skb);
665+
kauditd_hold_skb(skb, -ECONNREFUSED);
644666
}
645667

646668
/**
@@ -714,24 +736,26 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
714736
struct sk_buff_head *queue,
715737
unsigned int retry_limit,
716738
void (*skb_hook)(struct sk_buff *skb),
717-
void (*err_hook)(struct sk_buff *skb))
739+
void (*err_hook)(struct sk_buff *skb, int error))
718740
{
719741
int rc = 0;
720-
struct sk_buff *skb;
742+
struct sk_buff *skb = NULL;
743+
struct sk_buff *skb_tail;
721744
unsigned int failed = 0;
722745

723746
/* NOTE: kauditd_thread takes care of all our locking, we just use
724747
* the netlink info passed to us (e.g. sk and portid) */
725748

726-
while ((skb = skb_dequeue(queue))) {
749+
skb_tail = skb_peek_tail(queue);
750+
while ((skb != skb_tail) && (skb = skb_dequeue(queue))) {
727751
/* call the skb_hook for each skb we touch */
728752
if (skb_hook)
729753
(*skb_hook)(skb);
730754

731755
/* can we send to anyone via unicast? */
732756
if (!sk) {
733757
if (err_hook)
734-
(*err_hook)(skb);
758+
(*err_hook)(skb, -ECONNREFUSED);
735759
continue;
736760
}
737761

@@ -745,7 +769,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
745769
rc == -ECONNREFUSED || rc == -EPERM) {
746770
sk = NULL;
747771
if (err_hook)
748-
(*err_hook)(skb);
772+
(*err_hook)(skb, rc);
749773
if (rc == -EAGAIN)
750774
rc = 0;
751775
/* continue to drain the queue */

0 commit comments

Comments
 (0)