Skip to content

Commit 6b368f2

Browse files
authored
feat: [sc-103754] Be able to detect search domain misconfiguration #1391 (#1534)
* new collector dns * implement DNS collector * add dns service and endpoints check * add nil check on retrieve endpoints
1 parent cb5db17 commit 6b368f2

11 files changed

+414
-0
lines changed

config/crds/troubleshoot.sh_collectors.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,15 @@ spec:
292292
required:
293293
- data
294294
type: object
295+
dns:
296+
properties:
297+
collectorName:
298+
type: string
299+
exclude:
300+
type: BoolString
301+
timeout:
302+
type: string
303+
type: object
295304
exec:
296305
properties:
297306
args:

config/crds/troubleshoot.sh_preflights.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1972,6 +1972,15 @@ spec:
19721972
required:
19731973
- data
19741974
type: object
1975+
dns:
1976+
properties:
1977+
collectorName:
1978+
type: string
1979+
exclude:
1980+
type: BoolString
1981+
timeout:
1982+
type: string
1983+
type: object
19751984
exec:
19761985
properties:
19771986
args:

config/crds/troubleshoot.sh_supportbundles.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,6 +2003,15 @@ spec:
20032003
required:
20042004
- data
20052005
type: object
2006+
dns:
2007+
properties:
2008+
collectorName:
2009+
type: string
2010+
exclude:
2011+
type: BoolString
2012+
timeout:
2013+
type: string
2014+
type: object
20062015
exec:
20072016
properties:
20082017
args:

pkg/apis/troubleshoot/v1beta2/collector_shared.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,11 @@ type Sonobuoy struct {
293293
Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"`
294294
}
295295

296+
type DNS struct {
297+
CollectorMeta `json:",inline" yaml:",inline"`
298+
Timeout string `json:"timeout,omitempty" yaml:"timeout,omitempty"`
299+
}
300+
296301
type Collect struct {
297302
ClusterInfo *ClusterInfo `json:"clusterInfo,omitempty" yaml:"clusterInfo,omitempty"`
298303
ClusterResources *ClusterResources `json:"clusterResources,omitempty" yaml:"clusterResources,omitempty"`
@@ -322,6 +327,7 @@ type Collect struct {
322327
Goldpinger *Goldpinger `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"`
323328
Sonobuoy *Sonobuoy `json:"sonobuoy,omitempty" yaml:"sonobuoy,omitempty"`
324329
NodeMetrics *NodeMetrics `json:"nodeMetrics,omitempty" yaml:"nodeMetrics,omitempty"`
330+
DNS *DNS `json:"dns,omitempty" yaml:"dns,omitempty"`
325331
}
326332

327333
func (c *Collect) AccessReviewSpecs(overrideNS string) []authorizationv1.SelfSubjectAccessReviewSpec {

pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/collect/collector.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ func GetCollector(collector *troubleshootv1beta2.Collect, bundlePath string, nam
124124
return &CollectSonobuoyResults{collector.Sonobuoy, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
125125
case collector.NodeMetrics != nil:
126126
return &CollectNodeMetrics{collector.NodeMetrics, bundlePath, clientConfig, client, ctx, RBACErrors}, true
127+
case collector.DNS != nil:
128+
return &CollectDNS{collector.DNS, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true
127129
default:
128130
return nil, false
129131
}
@@ -215,6 +217,8 @@ func getCollectorName(c interface{}) string {
215217
collector = "sonobuoy"
216218
case *CollectNodeMetrics:
217219
collector = "node-metrics"
220+
case *CollectDNS:
221+
collector = "dns"
218222
default:
219223
collector = "<none>"
220224
}

pkg/collect/dns.go

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
package collect
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/json"
7+
"fmt"
8+
"io"
9+
"path/filepath"
10+
"strings"
11+
"time"
12+
13+
"github.com/pkg/errors"
14+
troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2"
15+
16+
corev1 "k8s.io/api/core/v1"
17+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+
"k8s.io/client-go/kubernetes"
19+
"k8s.io/client-go/rest"
20+
"k8s.io/klog/v2"
21+
)
22+
23+
const (
24+
dnsUtilsImage = "registry.k8s.io/e2e-test-images/jessie-dnsutils:1.3"
25+
)
26+
27+
type CollectDNS struct {
28+
Collector *troubleshootv1beta2.DNS
29+
BundlePath string
30+
Namespace string
31+
ClientConfig *rest.Config
32+
Client kubernetes.Interface
33+
Context context.Context
34+
RBACErrors
35+
}
36+
37+
func (c *CollectDNS) Title() string {
38+
return getCollectorName(c)
39+
}
40+
41+
func (c *CollectDNS) IsExcluded() (bool, error) {
42+
return isExcluded(c.Collector.Exclude)
43+
}
44+
45+
func (c *CollectDNS) Collect(progressChan chan<- interface{}) (CollectorResult, error) {
46+
47+
ctx, cancel := context.WithTimeout(c.Context, time.Duration(60*time.Second))
48+
defer cancel()
49+
50+
sb := strings.Builder{}
51+
52+
// get kubernetes Cluster IP
53+
clusterIP, err := getKubernetesClusterIP(c.Client, ctx)
54+
if err == nil {
55+
sb.WriteString(fmt.Sprintf("=== Kubernetes Cluster IP from API Server: %s\n", clusterIP))
56+
} else {
57+
sb.WriteString(fmt.Sprintf("=== Failed to detect Kubernetes Cluster IP: %v\n", err))
58+
}
59+
60+
// run a pod and perform DNS lookup
61+
podLog, err := troubleshootDNSFromPod(c.Client, ctx)
62+
if err == nil {
63+
sb.WriteString(fmt.Sprintf("=== Test DNS resolution in pod %s: \n", dnsUtilsImage))
64+
sb.WriteString(podLog)
65+
} else {
66+
sb.WriteString(fmt.Sprintf("=== Failed to run commands from pod: %v\n", err))
67+
}
68+
69+
// is DNS pods running?
70+
sb.WriteString(fmt.Sprintf("=== Running kube-dns pods: %s\n", getRunningKubeDNSPodNames(c.Client, ctx)))
71+
72+
// is DNS service up?
73+
sb.WriteString(fmt.Sprintf("=== Running kube-dns service: %s\n", getKubeDNSServiceClusterIP(c.Client, ctx)))
74+
75+
// are DNS endpoints exposed?
76+
sb.WriteString(fmt.Sprintf("=== kube-dns endpoints: %s\n", getKubeDNSEndpoints(c.Client, ctx)))
77+
78+
// get DNS server config
79+
coreDNSConfig, err := getCoreDNSConfig(c.Client, ctx)
80+
if err == nil {
81+
sb.WriteString("=== CoreDNS config: \n")
82+
sb.WriteString(coreDNSConfig)
83+
}
84+
kubeDNSConfig, err := getKubeDNSConfig(c.Client, ctx)
85+
if err == nil {
86+
sb.WriteString("=== KubeDNS config: \n")
87+
sb.WriteString(kubeDNSConfig)
88+
}
89+
90+
data := sb.String()
91+
output := NewResult()
92+
output.SaveResult(c.BundlePath, filepath.Join("dns", c.Collector.CollectorName), bytes.NewBuffer([]byte(data)))
93+
94+
return output, nil
95+
}
96+
97+
func getKubernetesClusterIP(client kubernetes.Interface, ctx context.Context) (string, error) {
98+
service, err := client.CoreV1().Services("default").Get(ctx, "kubernetes", metav1.GetOptions{})
99+
if err != nil {
100+
klog.V(2).Infof("Failed to detect Kubernetes Cluster IP: %v", err)
101+
return "", err
102+
}
103+
104+
return service.Spec.ClusterIP, nil
105+
}
106+
107+
func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context) (string, error) {
108+
namespace := "default"
109+
command := []string{"/bin/sh", "-c", `
110+
set -x
111+
cat /etc/resolv.conf
112+
nslookup -debug kubernetes
113+
exit 0
114+
`}
115+
116+
// TODO: image pull secret?
117+
podLabels := map[string]string{
118+
"troubleshoot-role": "dns-collector",
119+
}
120+
pod := &corev1.Pod{
121+
ObjectMeta: metav1.ObjectMeta{
122+
GenerateName: "troubleshoot-dns-",
123+
Namespace: namespace,
124+
Labels: podLabels,
125+
},
126+
Spec: corev1.PodSpec{
127+
Containers: []corev1.Container{
128+
{
129+
Name: "troubleshoot-dns",
130+
Image: dnsUtilsImage,
131+
Command: command,
132+
},
133+
},
134+
RestartPolicy: corev1.RestartPolicyNever,
135+
},
136+
}
137+
138+
created, err := client.CoreV1().Pods(namespace).Create(ctx, pod, metav1.CreateOptions{})
139+
if err != nil {
140+
return "", errors.Wrap(err, "failed to run troubleshoot DNS pod")
141+
}
142+
klog.V(2).Infof("Pod with prefix %s has been created", created.GenerateName)
143+
144+
defer func() {
145+
if created == nil {
146+
return
147+
}
148+
err := client.CoreV1().Pods(namespace).Delete(ctx, created.Name, metav1.DeleteOptions{})
149+
if err != nil {
150+
klog.Errorf("Failed to delete troubleshoot DNS pod %s: %v", created.Name, err)
151+
}
152+
klog.V(2).Infof("Deleted pod %s", created.Name)
153+
}()
154+
155+
// wait for pod to be completed
156+
watcher, err := client.CoreV1().Pods(namespace).Watch(ctx, metav1.ListOptions{
157+
LabelSelector: "troubleshoot-role=dns-collector",
158+
})
159+
if err != nil {
160+
return "", errors.Wrap(err, "failed to watch pod")
161+
}
162+
defer func() {
163+
if watcher != nil {
164+
watcher.Stop()
165+
}
166+
}()
167+
168+
for event := range watcher.ResultChan() {
169+
pod, ok := event.Object.(*corev1.Pod)
170+
if !ok {
171+
continue
172+
}
173+
if pod.Status.Phase == corev1.PodSucceeded {
174+
break
175+
}
176+
if pod.Status.Phase == corev1.PodFailed {
177+
return "", errors.New("troubleshoot DNS pod failed")
178+
}
179+
}
180+
181+
// get pod logs
182+
podLogOpts := corev1.PodLogOptions{}
183+
req := client.CoreV1().Pods(namespace).GetLogs(created.Name, &podLogOpts)
184+
podLogs, err := req.Stream(ctx)
185+
if err != nil {
186+
return "", errors.Wrap(err, "failed to get pod logs")
187+
}
188+
defer podLogs.Close()
189+
190+
bytes, err := io.ReadAll(podLogs)
191+
if err != nil {
192+
return "", errors.Wrap(err, "failed to read troubleshoot DNS pod logs")
193+
}
194+
195+
return string(bytes), nil
196+
}
197+
198+
func getCoreDNSConfig(client kubernetes.Interface, ctx context.Context) (string, error) {
199+
configMap, err := client.CoreV1().ConfigMaps("kube-system").Get(ctx, "coredns", metav1.GetOptions{})
200+
if err != nil {
201+
klog.V(2).Infof("Failed to detect CoreDNS config: %v", err)
202+
return "", err
203+
}
204+
205+
return configMap.Data["Corefile"], nil
206+
}
207+
208+
func getKubeDNSConfig(client kubernetes.Interface, ctx context.Context) (string, error) {
209+
configMap, err := client.CoreV1().ConfigMaps("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
210+
if err != nil {
211+
klog.V(2).Infof("Failed to detect KubeDNS config: %v", err)
212+
return "", err
213+
}
214+
215+
if configMap.Data == nil {
216+
return "", nil
217+
}
218+
219+
dataBytes, err := json.Marshal(configMap.Data)
220+
if err != nil {
221+
return "", err
222+
}
223+
224+
return string(dataBytes), nil
225+
}
226+
227+
func getRunningKubeDNSPodNames(client kubernetes.Interface, ctx context.Context) string {
228+
pods, err := client.CoreV1().Pods("kube-system").List(ctx, metav1.ListOptions{
229+
LabelSelector: "k8s-app=kube-dns",
230+
})
231+
if err != nil {
232+
klog.V(2).Infof("failed to list kube-dns pods: %v", err)
233+
return ""
234+
}
235+
236+
var podNames []string
237+
for _, pod := range pods.Items {
238+
if pod.Status.Phase == corev1.PodRunning {
239+
podNames = append(podNames, pod.Name)
240+
}
241+
}
242+
243+
return strings.Join(podNames, ", ")
244+
}
245+
246+
func getKubeDNSServiceClusterIP(client kubernetes.Interface, ctx context.Context) string {
247+
service, err := client.CoreV1().Services("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
248+
if err != nil {
249+
klog.V(2).Infof("failed to get kube-dns service: %v", err)
250+
return ""
251+
}
252+
253+
return service.Spec.ClusterIP
254+
}
255+
256+
func getKubeDNSEndpoints(client kubernetes.Interface, ctx context.Context) string {
257+
endpoints, err := client.CoreV1().Endpoints("kube-system").Get(ctx, "kube-dns", metav1.GetOptions{})
258+
if err != nil {
259+
klog.V(2).Infof("failed to get kube-dns endpoints: %v", err)
260+
return ""
261+
}
262+
263+
var endpointStrings []string
264+
for _, subset := range endpoints.Subsets {
265+
for _, address := range subset.Addresses {
266+
if len(subset.Ports) > 0 {
267+
endpointStrings = append(endpointStrings, fmt.Sprintf("%s:%d", address.IP, subset.Ports[0].Port))
268+
}
269+
}
270+
}
271+
272+
return strings.Join(endpointStrings, ", ")
273+
}

0 commit comments

Comments
 (0)