Skip to content

Update the support export tool to gather Patroni logs #117

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/content/reference/pgo_support_export.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ Collecting networkpolicies...
Collecting limitranges...
Collecting events...
Collecting Postgres logs...
Collecting pgBackRest logs...
Collecting Patroni logs...
Collecting pgBackRest Repo Host logs...
Collecting PostgresCluster pod logs...
Collecting monitoring pod logs...
Collecting operator pod logs...
Expand Down
11 changes: 11 additions & 0 deletions internal/cmd/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ func (exec Executor) listBackrestLogFiles() (string, string, error) {
return stdout.String(), stderr.String(), err
}

// listPatroniLogFiles returns the full path of Patroni log file.
// These are the Patroni logs stored on the Postgres instance.
func (exec Executor) listPatroniLogFiles() (string, string, error) {
var stdout, stderr bytes.Buffer

command := "ls -1dt pgdata/patroni/log/*"
err := exec(nil, &stdout, &stderr, "bash", "-ceu", "--", command)

return stdout.String(), stderr.String(), err
}

// listBackrestRepoHostLogFiles returns the full path of pgBackRest log files.
// These are the pgBackRest logs stored on the repo host
func (exec Executor) listBackrestRepoHostLogFiles() (string, string, error) {
Expand Down
19 changes: 19 additions & 0 deletions internal/cmd/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,25 @@ func TestListPGLogFiles(t *testing.T) {

}

func TestListPatroniLogFiles(t *testing.T) {

t.Run("default", func(t *testing.T) {
expected := errors.New("pass-through")
exec := func(
stdin io.Reader, stdout, stderr io.Writer, command ...string,
) error {
assert.DeepEqual(t, command, []string{"bash", "-ceu", "--", "ls -1dt pgdata/patroni/log/*"})
assert.Assert(t, stdout != nil, "should capture stdout")
assert.Assert(t, stderr != nil, "should capture stderr")
return expected
}
_, _, err := Executor(exec).listPatroniLogFiles()
assert.ErrorContains(t, err, "pass-through")

})

}

func TestCatFile(t *testing.T) {

t.Run("default", func(t *testing.T) {
Expand Down
116 changes: 116 additions & 0 deletions internal/cmd/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,9 @@ Collecting networkpolicies...
Collecting limitranges...
Collecting events...
Collecting Postgres logs...
Collecting pgBackRest logs...
Collecting Patroni logs...
Collecting pgBackRest Repo Host logs...
Collecting PostgresCluster pod logs...
Collecting monitoring pod logs...
Collecting operator pod logs...
Expand Down Expand Up @@ -471,6 +474,12 @@ Collecting PGO CLI logs...
writeInfo(cmd, fmt.Sprintf("Error gathering pgBackRest DB Hosts Logs: %s", err))
}

// Patroni Logs that are stored on the Postgres Instances
err = gatherPatroniLogs(ctx, clientset, restConfig, namespace, clusterName, tw, cmd)
if err != nil {
writeInfo(cmd, fmt.Sprintf("Error gathering Patroni Logs from Instance Pods: %s", err))
}

// All pgBackRest Logs on the Repo Host
err = gatherRepoHostLogs(ctx, clientset, restConfig, namespace, clusterName, tw, cmd)
if err != nil {
Expand Down Expand Up @@ -1308,6 +1317,113 @@ func gatherDbBackrestLogs(ctx context.Context,
return nil
}

// gatherPatroniLogs gathers all the file-based Patroni logs on the DB instance,
// if configured. By default, these logs will be sent to stdout and captured as
// Pod logs instead.
func gatherPatroniLogs(ctx context.Context,
clientset *kubernetes.Clientset,
config *rest.Config,
namespace string,
clusterName string,
tw *tar.Writer,
cmd *cobra.Command,
) error {
writeInfo(cmd, "Collecting Patroni logs...")

dbPods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{
LabelSelector: util.DBInstanceLabels(clusterName),
})

if err != nil {
if apierrors.IsForbidden(err) {
writeInfo(cmd, err.Error())
return nil
}
return err
}

if len(dbPods.Items) == 0 {
writeInfo(cmd, "No database instance pod found for gathering logs")
return nil
}

writeDebug(cmd, fmt.Sprintf("Found %d Pods\n", len(dbPods.Items)))

podExec, err := util.NewPodExecutor(config)
if err != nil {
return err
}

for _, pod := range dbPods.Items {
writeDebug(cmd, fmt.Sprintf("Pod Name is %s\n", pod.Name))

exec := func(stdin io.Reader, stdout, stderr io.Writer, command ...string,
) error {
return podExec(namespace, pod.Name, util.ContainerDatabase,
stdin, stdout, stderr, command...)
}

// Get Patroni Log Files
stdout, stderr, err := Executor(exec).listPatroniLogFiles()

// Depending upon the list* function above:
// An error may happen when err is non-nil or stderr is non-empty.
// In both cases, we want to print helpful information and continue to the
// next iteration.
if err != nil || stderr != "" {

if apierrors.IsForbidden(err) {
writeInfo(cmd, err.Error())
return nil
}

writeDebug(cmd, "Error getting Patroni logs\n")

if err != nil {
writeDebug(cmd, fmt.Sprintf("%s\n", err.Error()))
}
if stderr != "" {
writeDebug(cmd, stderr)
}

if strings.Contains(stderr, "No such file or directory") {
writeDebug(cmd, "Cannot find any Patroni log files. This is acceptable in some configurations.\n")
}
Comment on lines +1385 to +1391
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the stderr is No such file or directory, we'll print two lines for this one err, right? Is that what we want?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so. It follows are existing pattern and only shows up in the DEBUG logs. For reference, here's what you would see in cli.log:

2024-12-18 15:15:31.620 -0500 EST - DEBUG - Error getting Patroni logs
2024-12-18 15:15:31.620 -0500 EST - DEBUG - command terminated with exit code 2
2024-12-18 15:15:31.620 -0500 EST - DEBUG - ls: cannot access 'pgdata/patroni/log/*': No such file or directory
2024-12-18 15:15:31.620 -0500 EST - DEBUG - Cannot find any Patroni log files. This is acceptable in some configurations.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, if that's the pattern we've got, I'm fine with it (with the plan to come back and review our logging patterns).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the other cases, the check at strings.Contains(stderr, "No such file or directory") was necessary because the lack of logs was not necessarily an erroneous event. That extra information confirms there are no files, not that we encountered an error getting them.

(If I remember correctly)

continue
}

logFiles := strings.Split(strings.TrimSpace(stdout), "\n")
for _, logFile := range logFiles {
writeDebug(cmd, fmt.Sprintf("LOG FILE: %s\n", logFile))
var buf bytes.Buffer

stdout, stderr, err := Executor(exec).catFile(logFile)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with this call with catFile, but I am concerned that as the Patroni logs grow we will encounter issues similar to the Postgres logs: the entire file cannot be held in memory. We stream the PG logs to local disk to get around that issue. We've got good error handling here, so I don't think we should refactor for that; but it's something to keep in mind.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the Patroni logs, there is a defined storage limit, but I agree this could still be a problem for larger file sizes. I'm wondering if the right answer is to do some refactoring and combine the various functions that pull logs from the instance Pods so they all benefit from the streaming implementation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this isn't a blocker, I'm going to merge this PR now but I'll (hopefully) have a refactor PR up soon for consideration.

if err != nil {
if apierrors.IsForbidden(err) {
writeInfo(cmd, err.Error())
// Continue and output errors for each log file
// Allow the user to see and address all issues at once
continue
}
return err
}

buf.Write([]byte(stdout))
if stderr != "" {
str := fmt.Sprintf("\nError returned: %s\n", stderr)
buf.Write([]byte(str))
}

path := clusterName + fmt.Sprintf("/pods/%s/", pod.Name) + logFile
if err := writeTar(tw, buf.Bytes(), path, cmd); err != nil {
return err
}
}

}
return nil
}

// gatherRepoHostLogs gathers all the file-based pgBackRest logs on the repo host.
// There may not be any logs depending upon pgBackRest's log-level-file.
func gatherRepoHostLogs(ctx context.Context,
Expand Down
Loading