1
1
package plugin
2
2
3
3
import (
4
+ "context"
4
5
"encoding/json"
5
6
"fmt"
6
7
"github.com/aylei/kubectl-debug/pkg/util"
@@ -9,14 +10,19 @@ import (
9
10
"io"
10
11
corev1 "k8s.io/api/core/v1"
11
12
"k8s.io/apimachinery/pkg/apis/meta/v1"
13
+ "k8s.io/apimachinery/pkg/util/uuid"
12
14
"k8s.io/cli-runtime/pkg/genericclioptions"
13
15
"k8s.io/client-go/kubernetes"
16
+ "k8s.io/client-go/tools/watch"
14
17
coreclient "k8s.io/client-go/kubernetes/typed/core/v1"
15
18
restclient "k8s.io/client-go/rest"
16
19
"k8s.io/client-go/tools/remotecommand"
20
+ "k8s.io/kubernetes/pkg/client/conditions"
21
+ "k8s.io/kubernetes/pkg/util/interrupt"
17
22
"log"
18
23
"net/url"
19
24
"os/user"
25
+ "time"
20
26
)
21
27
22
28
const (
@@ -54,17 +60,17 @@ type DebugOptions struct {
54
60
PodName string
55
61
56
62
// Debug options
57
- RetainContainer bool
58
- Image string
59
- ContainerName string
60
- Command [] string
61
- AgentPort int
62
- ConfigLocation string
63
-
64
- Flags * genericclioptions.ConfigFlags
65
- PodClient coreclient.PodsGetter
66
- Args []string
67
- Config * restclient.Config
63
+ Image string
64
+ ContainerName string
65
+ Command [] string
66
+ AgentPort int
67
+ ConfigLocation string
68
+ Fork bool
69
+
70
+ Flags * genericclioptions.ConfigFlags
71
+ CoreClient coreclient.CoreV1Interface
72
+ Args []string
73
+ Config * restclient.Config
68
74
69
75
genericclioptions.IOStreams
70
76
}
@@ -106,6 +112,8 @@ func NewDebugCmd(streams genericclioptions.IOStreams) *cobra.Command {
106
112
fmt .Sprintf ("Agent port for debug cli to connect, default to %d" , defaultAgentPort ))
107
113
cmd .Flags ().StringVar (& opts .ConfigLocation , "debug-config" , "" ,
108
114
fmt .Sprintf ("Debug config file, default to ~%s" , defaultConfigLocation ))
115
+ cmd .Flags ().BoolVar (& opts .Fork , "fork" , false ,
116
+ "Fork a new pod for debugging (useful if the pod status is CrashLoopBackoff)" )
109
117
opts .Flags .AddFlags (cmd .Flags ())
110
118
111
119
return cmd
@@ -173,7 +181,7 @@ func (o *DebugOptions) Complete(cmd *cobra.Command, args []string, argsLenAtDash
173
181
if err != nil {
174
182
return err
175
183
}
176
- o .PodClient = clientset .CoreV1 ()
184
+ o .CoreClient = clientset .CoreV1 ()
177
185
178
186
return nil
179
187
}
@@ -190,14 +198,10 @@ func (o *DebugOptions) Validate() error {
190
198
191
199
func (o * DebugOptions ) Run () error {
192
200
193
- pod , err := o .PodClient .Pods (o .Namespace ).Get (o .PodName , v1.GetOptions {})
201
+ pod , err := o .CoreClient .Pods (o .Namespace ).Get (o .PodName , v1.GetOptions {})
194
202
if err != nil {
195
203
return err
196
204
}
197
- if pod .Status .Phase == corev1 .PodSucceeded || pod .Status .Phase == corev1 .PodFailed {
198
- return fmt .Errorf ("cannot debug in a completed pod; current phase is %s" , pod .Status .Phase )
199
- }
200
- hostIP := pod .Status .HostIP
201
205
202
206
containerName := o .ContainerName
203
207
if len (containerName ) == 0 {
@@ -208,6 +212,32 @@ func (o *DebugOptions) Run() error {
208
212
containerName = pod .Spec .Containers [0 ].Name
209
213
}
210
214
215
+ // in fork mode, we launch an new pod as a copy of target pod
216
+ // and hack the entry point of the target container with sleep command
217
+ // which keeps the container running.
218
+ if o .Fork {
219
+ pod = copyAndStripPod (pod , containerName )
220
+ pod , err = o .CoreClient .Pods (pod .Namespace ).Create (pod )
221
+ if err != nil {
222
+ return err
223
+ }
224
+ watcher , err := o .CoreClient .Pods (pod .Namespace ).Watch (v1 .SingleObject (pod .ObjectMeta ))
225
+ // FIXME: hard code -> config
226
+ ctx , cancel := context .WithTimeout (context .Background (), 5 * time .Minute )
227
+ defer cancel ()
228
+ log .Println ("waiting for forked container running..." )
229
+ event , err := watch .UntilWithoutRetry (ctx , watcher , conditions .PodRunning )
230
+ if err != nil {
231
+ return err
232
+ }
233
+ pod = event .Object .(* corev1.Pod )
234
+ }
235
+
236
+ if pod .Status .Phase == corev1 .PodSucceeded || pod .Status .Phase == corev1 .PodFailed {
237
+ return fmt .Errorf ("cannot debug in a completed pod; current phase is %s" , pod .Status .Phase )
238
+ }
239
+ hostIP := pod .Status .HostIP
240
+
211
241
containerId , err := o .getContainerIdByName (pod , containerName )
212
242
if err != nil {
213
243
return err
@@ -244,7 +274,20 @@ func (o *DebugOptions) Run() error {
244
274
return o .remoteExecute ("POST" , uri , o .Config , o .In , o .Out , o .ErrOut , t .Raw , sizeQueue )
245
275
}
246
276
247
- if err := t .Safe (fn ); err != nil {
277
+ // ensure forked pod is deleted on cancelation
278
+ withCleanUp := func () error {
279
+ return interrupt .Chain (nil , func () {
280
+ if o .Fork {
281
+ err := o .CoreClient .Pods (pod .Namespace ).Delete (pod .Name , v1 .NewDeleteOptions (0 ))
282
+ if err != nil {
283
+ // we may leak pod here, but we have nothing to do except noticing the user
284
+ log .Printf ("failed to delete pod %s, consider manual deletion." , pod .Name )
285
+ }
286
+ }
287
+ }).Run (fn );
288
+ }
289
+
290
+ if err := t .Safe (withCleanUp ); err != nil {
248
291
fmt .Printf ("error execute remote, %v\n " , err )
249
292
return err
250
293
}
@@ -308,3 +351,32 @@ func (o *DebugOptions) setupTTY() term.TTY {
308
351
}
309
352
return t
310
353
}
354
+
355
+ // copyAndStripPod copy the given pod template, strip the probes and labels,
356
+ // and replace the entry point
357
+ func copyAndStripPod (pod * corev1.Pod , targetContainer string ) * corev1.Pod {
358
+ copied := & corev1.Pod {
359
+ ObjectMeta : * pod .ObjectMeta .DeepCopy (),
360
+ Spec : * pod .Spec .DeepCopy (),
361
+ }
362
+ copied .Name = fmt .Sprintf ("%s-%s-debug" , pod .Name , uuid .NewUUID ())
363
+ copied .Labels = nil
364
+ copied .Spec .RestartPolicy = corev1 .RestartPolicyNever
365
+ for i , c := range copied .Spec .Containers {
366
+ copied .Spec .Containers [i ].LivenessProbe = nil
367
+ copied .Spec .Containers [i ].ReadinessProbe = nil
368
+ if c .Name == targetContainer {
369
+ // Hack, infinite sleep command to keep the container running
370
+ copied .Spec .Containers [i ].Command = []string {"sh" , "-c" , "--" }
371
+ copied .Spec .Containers [i ].Args = []string {"while true; do sleep 30; done;" }
372
+ }
373
+ }
374
+ copied .ResourceVersion = ""
375
+ copied .UID = ""
376
+ copied .SelfLink = ""
377
+ copied .CreationTimestamp = v1.Time {}
378
+ copied .OwnerReferences = []v1.OwnerReference {}
379
+
380
+ return copied
381
+ }
382
+
0 commit comments