Skip to content

Commit 468ce9b

Browse files
committed
improve analysis performance
1 parent da0246f commit 468ce9b

File tree

3 files changed

+165
-38
lines changed

3 files changed

+165
-38
lines changed

analyser/statistics.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,15 +85,16 @@ func PrintFilteredResults(entries []LogEntry) {
8585

8686
// Create a tabwriter for clean column alignment
8787
writer := tabwriter.NewWriter(os.Stdout, 2, 4, 2, ' ', 0)
88-
fmt.Fprintf(writer, "%s\t%s\t%s\n", "LEVEL", "CLASS", "MESSAGE")
89-
fmt.Fprintf(writer, "%s\t%s\t%s\n", strings.Repeat("-", 10), strings.Repeat("-", 50), strings.Repeat("-", 100))
88+
fmt.Fprintf(writer, "%s\t%s\t%s\t%s\n", "TIMESTAMP", "LEVEL", "CLASS", "MESSAGE")
89+
fmt.Fprintf(writer, "%s\t%s\t%s\t%s\n", strings.Repeat("-", 10), strings.Repeat("-", 50), strings.Repeat("-", 100), strings.Repeat("-", 100))
9090

9191
for _, entry := range entries {
9292
levelColor := colorForLevel(entry.Level)
9393
level := fmt.Sprintf("%s%-8s%s", levelColor, entry.Level, colorReset)
9494
class := truncate(entry.Class, 50)
9595
message := truncate(entry.Message, 100)
96-
fmt.Fprintf(writer, "%s\t%s\t%s\n", level, class, message)
96+
timestamp := truncate(entry.Timestamp, 30)
97+
fmt.Fprintf(writer, "%s\t%s\t%s\t%s\n", timestamp, level, class, message)
9798
}
9899

99100
writer.Flush()

cmd/analyse.go

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,11 @@ func analyzeCatalinaLogs(cmd *cobra.Command, logFile string, statsFlag bool, lev
103103
var converted []analyser.LogEntry
104104
for _, e := range entries {
105105
converted = append(converted, analyser.LogEntry{
106-
Level: e.Level,
107-
Thread: e.Thread,
108-
Class: e.Class,
109-
Message: e.Message,
106+
Timestamp: e.Timestamp,
107+
Level: e.Level,
108+
Thread: e.Thread,
109+
Class: e.Class,
110+
Message: e.Message,
110111
})
111112
}
112113
analyser.PrintFilteredResults(converted)
@@ -117,10 +118,11 @@ func analyzeCatalinaLogs(cmd *cobra.Command, logFile string, statsFlag bool, lev
117118
var converted []analyser.LogEntry
118119
for _, e := range entries {
119120
converted = append(converted, analyser.LogEntry{
120-
Level: e.Level,
121-
Thread: e.Thread,
122-
Class: e.Class,
123-
Message: e.Message,
121+
Timestamp: e.Timestamp,
122+
Level: e.Level,
123+
Thread: e.Thread,
124+
Class: e.Class,
125+
Message: e.Message,
124126
})
125127
}
126128
analyser.PrintFilteredResults(converted)
@@ -131,10 +133,11 @@ func analyzeCatalinaLogs(cmd *cobra.Command, logFile string, statsFlag bool, lev
131133
var converted []analyser.LogEntry
132134
for _, e := range entries {
133135
converted = append(converted, analyser.LogEntry{
134-
Level: e.Level,
135-
Thread: e.Thread,
136-
Class: e.Class,
137-
Message: e.Message,
136+
Timestamp: e.Timestamp,
137+
Level: e.Level,
138+
Thread: e.Thread,
139+
Class: e.Class,
140+
Message: e.Message,
138141
})
139142
}
140143
analyser.PrintFilteredResults(converted)
@@ -153,10 +156,11 @@ func analyzeCatalinaLogs(cmd *cobra.Command, logFile string, statsFlag bool, lev
153156
var converted []analyser.LogEntry
154157
for _, e := range entries {
155158
converted = append(converted, analyser.LogEntry{
156-
Level: e.Level,
157-
Thread: e.Thread,
158-
Class: e.Class,
159-
Message: e.Message,
159+
Timestamp: e.Timestamp,
160+
Level: e.Level,
161+
Thread: e.Thread,
162+
Class: e.Class,
163+
Message: e.Message,
160164
})
161165
}
162166

parser/tomcat.go

Lines changed: 141 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@ package parser
33
import (
44
"bufio"
55
"fmt"
6+
"io"
67
"os"
78
"regexp"
9+
"runtime"
810
"strings"
11+
"sync"
912

1013
"github.com/rs/zerolog/log"
1114
)
@@ -21,9 +24,9 @@ type TomcatLogEntry struct {
2124

2225
// --- REGEX PATTERNS ---
2326

24-
// 1️⃣ OpenMRS (Log4j-style Catalina)
27+
// 1️⃣ Extended OpenMRS (Log4j-style Catalina with optional log4j.Level)
2528
var log4jPattern = regexp.MustCompile(
26-
`^(?P<Level>[A-Z]+)\s*-\s*(?P<Class>[A-Za-z0-9_.<>$]+(?:\([0-9]+\))?)\s*\|(?P<Timestamp>[0-9T:,\-]+)\|\s*(?P<Message>.*)$`)
29+
`^(?P<Level>[A-Z]+)(?:#org\.apache\.log4j\.Level)?\s*-\s*(?P<Class>[A-Za-z0-9_.<>$]+(?:\([0-9]+\))?)\s*\|(?P<Timestamp>[0-9T:,\-]+)\|\s*(?P<Message>.*)$`)
2730

2831
// 2️⃣ Native Tomcat (Catalina.out style)
2932
var tomcatPattern = regexp.MustCompile(
@@ -42,9 +45,10 @@ func ParseTomcatLogLine(line string) (*TomcatLogEntry, error) {
4245
matches := log4jPattern.FindStringSubmatch(line)
4346
return &TomcatLogEntry{
4447
Level: matches[1],
45-
Class: matches[2],
46-
Timestamp: matches[3],
47-
Message: matches[4],
48+
Thread: matches[2],
49+
Class: matches[3],
50+
Timestamp: matches[4],
51+
Message: matches[5],
4852
}, nil
4953
}
5054

@@ -71,24 +75,51 @@ func ParseTomcatLogFile(path string) ([]*TomcatLogEntry, error) {
7175
}
7276
defer file.Close()
7377

74-
scanner := bufio.NewScanner(file)
75-
var entries []*TomcatLogEntry
78+
numWorkers := runtime.NumCPU() * 2 // tune for your machine
79+
lines := make(chan string, numWorkers*10)
80+
results := make(chan *TomcatLogEntry, numWorkers*10)
81+
82+
var wg sync.WaitGroup
83+
84+
// Worker pool
85+
for i := 0; i < numWorkers; i++ {
86+
wg.Add(1)
87+
go func() {
88+
defer wg.Done()
89+
for line := range lines {
90+
entry, err := ParseTomcatLogLine(line)
91+
if err != nil {
92+
log.Debug().Msgf("Skipping non-log line: %s", line)
93+
continue
94+
}
95+
if entry != nil {
96+
results <- entry
97+
}
98+
}
99+
}()
100+
}
76101

77-
for scanner.Scan() {
78-
line := scanner.Text()
79-
entry, err := ParseTomcatLogLine(line)
80-
if err != nil {
81-
// Skip JVM/system messages and noise
82-
log.Debug().Msgf("Skipping non-log line: %s", line)
83-
continue
102+
// Reader goroutine
103+
go func() {
104+
scanner := bufio.NewScanner(file)
105+
for scanner.Scan() {
106+
lines <- scanner.Text()
84107
}
85-
if entry != nil {
86-
entries = append(entries, entry)
108+
close(lines)
109+
if err := scanner.Err(); err != nil {
110+
log.Error().Err(err).Msg("Error reading log file")
87111
}
88-
}
112+
}()
113+
114+
// Closer goroutine
115+
go func() {
116+
wg.Wait()
117+
close(results)
118+
}()
89119

90-
if err := scanner.Err(); err != nil {
91-
return nil, fmt.Errorf("error reading log file: %w", err)
120+
var entries []*TomcatLogEntry
121+
for entry := range results {
122+
entries = append(entries, entry)
92123
}
93124

94125
if len(entries) == 0 {
@@ -97,3 +128,94 @@ func ParseTomcatLogFile(path string) ([]*TomcatLogEntry, error) {
97128

98129
return entries, nil
99130
}
131+
132+
func ParseHugeTomcatLogFile(path string) ([]*TomcatLogEntry, error) {
133+
fileInfo, err := os.Stat(path)
134+
if err != nil {
135+
return nil, err
136+
}
137+
fileSize := fileInfo.Size()
138+
139+
numWorkers := runtime.NumCPU()
140+
chunkSize := fileSize / int64(numWorkers)
141+
results := make(chan []*TomcatLogEntry, numWorkers)
142+
errs := make(chan error, numWorkers)
143+
var wg sync.WaitGroup
144+
145+
for i := 0; i < numWorkers; i++ {
146+
start := int64(i) * chunkSize
147+
end := start + chunkSize
148+
if i == numWorkers-1 {
149+
end = fileSize
150+
}
151+
152+
wg.Add(1)
153+
go func(start, end int64) {
154+
defer wg.Done()
155+
entries, err := parseChunk(path, start, end)
156+
if err != nil {
157+
errs <- err
158+
return
159+
}
160+
results <- entries
161+
}(start, end)
162+
}
163+
164+
go func() {
165+
wg.Wait()
166+
close(results)
167+
close(errs)
168+
}()
169+
170+
var all []*TomcatLogEntry
171+
for res := range results {
172+
all = append(all, res...)
173+
}
174+
if len(errs) > 0 {
175+
return all, <-errs
176+
}
177+
return all, nil
178+
}
179+
180+
func parseChunk(path string, start, end int64) ([]*TomcatLogEntry, error) {
181+
f, err := os.Open(path)
182+
if err != nil {
183+
return nil, err
184+
}
185+
defer f.Close()
186+
187+
if _, err := f.Seek(start, 0); err != nil {
188+
return nil, err
189+
}
190+
191+
reader := bufio.NewReader(f)
192+
if start != 0 {
193+
// Move to the next newline to start at a full log line
194+
_, _ = reader.ReadBytes('\n')
195+
}
196+
197+
var entries []*TomcatLogEntry
198+
var bytesRead int64
199+
200+
for {
201+
if start+bytesRead > end {
202+
break
203+
}
204+
205+
line, err := reader.ReadString('\n')
206+
if err != nil {
207+
if err == io.EOF {
208+
break
209+
}
210+
return nil, err
211+
}
212+
bytesRead += int64(len(line))
213+
214+
entry, err := ParseTomcatLogLine(line)
215+
if err == nil && entry != nil {
216+
entries = append(entries, entry)
217+
}
218+
}
219+
220+
return entries, nil
221+
}

0 commit comments

Comments
 (0)