@@ -3,9 +3,12 @@ package parser
33import (
44 "bufio"
55 "fmt"
6+ "io"
67 "os"
78 "regexp"
9+ "runtime"
810 "strings"
11+ "sync"
912
1013 "github.com/rs/zerolog/log"
1114)
@@ -21,9 +24,9 @@ type TomcatLogEntry struct {
2124
2225// --- REGEX PATTERNS ---
2326
24- // 1️⃣ OpenMRS (Log4j-style Catalina)
27+ // 1️⃣ Extended OpenMRS (Log4j-style Catalina with optional log4j.Level )
2528var log4jPattern = regexp .MustCompile (
26- `^(?P<Level>[A-Z]+)\s*-\s*(?P<Class>[A-Za-z0-9_.<>$]+(?:\([0-9]+\))?)\s*\|(?P<Timestamp>[0-9T:,\-]+)\|\s*(?P<Message>.*)$` )
29+ `^(?P<Level>[A-Z]+)(?:#org\.apache\.log4j\.Level)? \s*-\s*(?P<Class>[A-Za-z0-9_.<>$]+(?:\([0-9]+\))?)\s*\|(?P<Timestamp>[0-9T:,\-]+)\|\s*(?P<Message>.*)$` )
2730
2831// 2️⃣ Native Tomcat (Catalina.out style)
2932var tomcatPattern = regexp .MustCompile (
@@ -42,9 +45,10 @@ func ParseTomcatLogLine(line string) (*TomcatLogEntry, error) {
4245 matches := log4jPattern .FindStringSubmatch (line )
4346 return & TomcatLogEntry {
4447 Level : matches [1 ],
45- Class : matches [2 ],
46- Timestamp : matches [3 ],
47- Message : matches [4 ],
48+ Thread : matches [2 ],
49+ Class : matches [3 ],
50+ Timestamp : matches [4 ],
51+ Message : matches [5 ],
4852 }, nil
4953 }
5054
@@ -71,24 +75,51 @@ func ParseTomcatLogFile(path string) ([]*TomcatLogEntry, error) {
7175 }
7276 defer file .Close ()
7377
74- scanner := bufio .NewScanner (file )
75- var entries []* TomcatLogEntry
78+ numWorkers := runtime .NumCPU () * 2 // tune for your machine
79+ lines := make (chan string , numWorkers * 10 )
80+ results := make (chan * TomcatLogEntry , numWorkers * 10 )
81+
82+ var wg sync.WaitGroup
83+
84+ // Worker pool
85+ for i := 0 ; i < numWorkers ; i ++ {
86+ wg .Add (1 )
87+ go func () {
88+ defer wg .Done ()
89+ for line := range lines {
90+ entry , err := ParseTomcatLogLine (line )
91+ if err != nil {
92+ log .Debug ().Msgf ("Skipping non-log line: %s" , line )
93+ continue
94+ }
95+ if entry != nil {
96+ results <- entry
97+ }
98+ }
99+ }()
100+ }
76101
77- for scanner .Scan () {
78- line := scanner .Text ()
79- entry , err := ParseTomcatLogLine (line )
80- if err != nil {
81- // Skip JVM/system messages and noise
82- log .Debug ().Msgf ("Skipping non-log line: %s" , line )
83- continue
102+ // Reader goroutine
103+ go func () {
104+ scanner := bufio .NewScanner (file )
105+ for scanner .Scan () {
106+ lines <- scanner .Text ()
84107 }
85- if entry != nil {
86- entries = append (entries , entry )
108+ close (lines )
109+ if err := scanner .Err (); err != nil {
110+ log .Error ().Err (err ).Msg ("Error reading log file" )
87111 }
88- }
112+ }()
113+
114+ // Closer goroutine
115+ go func () {
116+ wg .Wait ()
117+ close (results )
118+ }()
89119
90- if err := scanner .Err (); err != nil {
91- return nil , fmt .Errorf ("error reading log file: %w" , err )
120+ var entries []* TomcatLogEntry
121+ for entry := range results {
122+ entries = append (entries , entry )
92123 }
93124
94125 if len (entries ) == 0 {
@@ -97,3 +128,94 @@ func ParseTomcatLogFile(path string) ([]*TomcatLogEntry, error) {
97128
98129 return entries , nil
99130}
131+
132+ func ParseHugeTomcatLogFile (path string ) ([]* TomcatLogEntry , error ) {
133+ fileInfo , err := os .Stat (path )
134+ if err != nil {
135+ return nil , err
136+ }
137+ fileSize := fileInfo .Size ()
138+
139+ numWorkers := runtime .NumCPU ()
140+ chunkSize := fileSize / int64 (numWorkers )
141+ results := make (chan []* TomcatLogEntry , numWorkers )
142+ errs := make (chan error , numWorkers )
143+ var wg sync.WaitGroup
144+
145+ for i := 0 ; i < numWorkers ; i ++ {
146+ start := int64 (i ) * chunkSize
147+ end := start + chunkSize
148+ if i == numWorkers - 1 {
149+ end = fileSize
150+ }
151+
152+ wg .Add (1 )
153+ go func (start , end int64 ) {
154+ defer wg .Done ()
155+ entries , err := parseChunk (path , start , end )
156+ if err != nil {
157+ errs <- err
158+ return
159+ }
160+ results <- entries
161+ }(start , end )
162+ }
163+
164+ go func () {
165+ wg .Wait ()
166+ close (results )
167+ close (errs )
168+ }()
169+
170+ var all []* TomcatLogEntry
171+ for res := range results {
172+ all = append (all , res ... )
173+ }
174+ if len (errs ) > 0 {
175+ return all , <- errs
176+ }
177+ return all , nil
178+ }
179+
180+ func parseChunk (path string , start , end int64 ) ([]* TomcatLogEntry , error ) {
181+ f , err := os .Open (path )
182+ if err != nil {
183+ return nil , err
184+ }
185+ defer f .Close ()
186+
187+ if _ , err := f .Seek (start , 0 ); err != nil {
188+ return nil , err
189+ }
190+
191+ reader := bufio .NewReader (f )
192+ if start != 0 {
193+ // Move to the next newline to start at a full log line
194+ _ , _ = reader .ReadBytes ('\n' )
195+ }
196+
197+ var entries []* TomcatLogEntry
198+ var bytesRead int64
199+
200+ for {
201+ if start + bytesRead > end {
202+ break
203+ }
204+
205+ line , err := reader .ReadString ('\n' )
206+ if err != nil {
207+ if err == io .EOF {
208+ break
209+ }
210+ return nil , err
211+ }
212+ bytesRead += int64 (len (line ))
213+
214+ entry , err := ParseTomcatLogLine (line )
215+ if err == nil && entry != nil {
216+ entries = append (entries , entry )
217+ }
218+ }
219+
220+ return entries , nil
221+ }
0 commit comments