Skip to content

Commit e26fa78

Browse files
authored
feat: add InitSelfGuard() (#108)
1 parent eb7a505 commit e26fa78

File tree

2 files changed

+143
-0
lines changed

2 files changed

+143
-0
lines changed

main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
)
3232

3333
func main() {
34+
util.InitSelfGuard()
3435
object.InitFlag()
3536
object.InitAdapter()
3637
object.CreateTables()

util/supervisor.go

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
// Copyright 2025 The casbin Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package util
16+
17+
import (
18+
"fmt"
19+
"os"
20+
"os/exec"
21+
"os/signal"
22+
"syscall"
23+
"time"
24+
)
25+
26+
const (
27+
// EnvSupervisorKey is the environment variable key to detect if running under supervisor
28+
EnvSupervisorKey = "CASWAF_SUPERVISED"
29+
// MaxRestarts is the maximum number of restarts within the restart window
30+
MaxRestarts = 5
31+
// RestartWindow is the time window for counting restarts
32+
RestartWindow = 5 * time.Minute
33+
// RestartDelay is the delay before restarting after a crash
34+
RestartDelay = 2 * time.Second
35+
)
36+
37+
// InitSelfGuard initializes the self-recovery mechanism
38+
// If not already supervised, it starts a supervisor process and exits
39+
// If already supervised, it does nothing and returns
40+
func InitSelfGuard() {
41+
// Check if we're already supervised
42+
if os.Getenv(EnvSupervisorKey) == "1" {
43+
// Already supervised, just return and continue normal execution
44+
return
45+
}
46+
47+
// Start as supervisor
48+
err := runSupervisor()
49+
if err != nil {
50+
fmt.Printf("Supervisor error: %v\n", err)
51+
os.Exit(1)
52+
}
53+
// If we get here, supervisor exited cleanly
54+
os.Exit(0)
55+
}
56+
57+
// runSupervisor starts the supervisor that monitors and restarts the main process
58+
func runSupervisor() error {
59+
fmt.Println("Starting CasWAF with auto-recovery mechanism...")
60+
61+
restartTimes := []time.Time{}
62+
63+
for {
64+
// Clean up old restart times outside the window
65+
now := time.Now()
66+
validRestarts := []time.Time{}
67+
for _, t := range restartTimes {
68+
if now.Sub(t) < RestartWindow {
69+
validRestarts = append(validRestarts, t)
70+
}
71+
}
72+
restartTimes = validRestarts
73+
74+
// Check if we've exceeded max restarts
75+
if len(restartTimes) >= MaxRestarts {
76+
return fmt.Errorf("exceeded maximum restart limit (%d restarts in %v), stopping supervisor", MaxRestarts, RestartWindow)
77+
}
78+
79+
// Start the child process
80+
cmd := exec.Command(os.Args[0], os.Args[1:]...)
81+
cmd.Stdout = os.Stdout
82+
cmd.Stderr = os.Stderr
83+
cmd.Stdin = os.Stdin
84+
85+
// Set environment variable to indicate supervised process
86+
cmd.Env = append(os.Environ(), fmt.Sprintf("%s=1", EnvSupervisorKey))
87+
88+
// Start the process
89+
if err := cmd.Start(); err != nil {
90+
fmt.Printf("Failed to start process: %v\n", err)
91+
return err
92+
}
93+
94+
processStartTime := time.Now()
95+
fmt.Printf("Started supervised process with PID: %d\n", cmd.Process.Pid)
96+
97+
// Setup signal handling to forward signals to child
98+
sigChan := make(chan os.Signal, 1)
99+
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
100+
101+
// Wait for process completion or signal
102+
doneChan := make(chan error, 1)
103+
go func() {
104+
doneChan <- cmd.Wait()
105+
}()
106+
107+
select {
108+
case err := <-doneChan:
109+
// Process exited
110+
exitTime := time.Now()
111+
uptime := exitTime.Sub(processStartTime)
112+
113+
if err != nil {
114+
fmt.Printf("Process crashed after %v: %v\n", uptime, err)
115+
116+
// Record this restart
117+
restartTimes = append(restartTimes, time.Now())
118+
119+
fmt.Printf("Waiting %v before restarting... (restart %d/%d)\n",
120+
RestartDelay, len(restartTimes), MaxRestarts)
121+
time.Sleep(RestartDelay)
122+
123+
// Continue to restart
124+
continue
125+
} else {
126+
// Clean exit
127+
fmt.Println("Process exited cleanly")
128+
return nil
129+
}
130+
131+
case sig := <-sigChan:
132+
// Received shutdown signal, forward to child
133+
fmt.Printf("Received signal %v, forwarding to child process...\n", sig)
134+
if cmd.Process != nil {
135+
cmd.Process.Signal(sig)
136+
}
137+
// Wait for child to exit
138+
<-doneChan
139+
return nil
140+
}
141+
}
142+
}

0 commit comments

Comments
 (0)