Skip to content

Support .NET auto instrumentation and Windows platforms #193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,12 @@ ARG VERSION_DATE
ARG AGENT_VERSION
ARG AUTO_INSTRUMENTATION_JAVA_VERSION
ARG AUTO_INSTRUMENTATION_PYTHON_VERSION
ARG AUTO_INSTRUMENTATION_DOTNET_VERSION
ARG DCMG_EXPORTER_VERSION
ARG NEURON_MONITOR_VERSION

# Build
RUN CGO_ENABLED=0 GOOS=linux GO111MODULE=on go build -ldflags="-X ${VERSION_PKG}.version=${VERSION} -X ${VERSION_PKG}.buildDate=${VERSION_DATE} -X ${VERSION_PKG}.agent=${AGENT_VERSION} -X ${VERSION_PKG}.autoInstrumentationJava=${AUTO_INSTRUMENTATION_JAVA_VERSION} -X ${VERSION_PKG}.autoInstrumentationPython=${AUTO_INSTRUMENTATION_PYTHON_VERSION} -X ${VERSION_PKG}.dcgmExporter=${DCMG_EXPORTER_VERSION} -X ${VERSION_PKG}.neuronMonitor=${NEURON_MONITOR_VERSION}" -a -o manager main.go
RUN CGO_ENABLED=0 GOOS=linux GO111MODULE=on go build -ldflags="-X ${VERSION_PKG}.version=${VERSION} -X ${VERSION_PKG}.buildDate=${VERSION_DATE} -X ${VERSION_PKG}.agent=${AGENT_VERSION} -X ${VERSION_PKG}.autoInstrumentationJava=${AUTO_INSTRUMENTATION_JAVA_VERSION} -X ${VERSION_PKG}.autoInstrumentationPython=${AUTO_INSTRUMENTATION_PYTHON_VERSION} -X ${VERSION_PKG}.autoInstrumentationDotNet=${AUTO_INSTRUMENTATION_DOTNET_VERSION} -X ${VERSION_PKG}.dcgmExporter=${DCMG_EXPORTER_VERSION} -X ${VERSION_PKG}.neuronMonitor=${NEURON_MONITOR_VERSION}" -a -o manager main.go

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ VERSION_PKG ?= "github.com/aws/amazon-cloudwatch-agent-operator/internal/version
AGENT_VERSION ?= "$(shell grep -v '\#' versions.txt | grep cloudwatch-agent | awk -F= '{print $$2}')"
AUTO_INSTRUMENTATION_JAVA_VERSION ?= "$(shell grep -v '\#' versions.txt | grep aws-otel-java-instrumentation | awk -F= '{print $$2}')"
AUTO_INSTRUMENTATION_PYTHON_VERSION ?= "$(shell grep -v '\#' versions.txt | grep aws-otel-python-instrumentation | awk -F= '{print $$2}')"
AUTO_INSTRUMENTATION_DOTNET_VERSION ?= "$(shell grep -v '\#' versions.txt | grep aws-otel-dotnet-instrumentation | awk -F= '{print $$2}')"
DCGM_EXPORTER_VERSION ?= "$(shell grep -v '\#' versions.txt | grep dcgm-exporter | awk -F= '{print $$2}')"
NEURON_MONITOR_VERSION ?= "$(shell grep -v '\#' versions.txt | grep neuron-monitor | awk -F= '{print $$2}')"

Expand Down Expand Up @@ -153,7 +154,7 @@ generate: controller-gen api-docs
# buildx is used to ensure same results for arm based systems (m1/2 chips)
.PHONY: container
container:
docker buildx build --load --platform linux/${ARCH} -t ${IMG} --build-arg VERSION_PKG=${VERSION_PKG} --build-arg VERSION=${VERSION} --build-arg VERSION_DATE=${VERSION_DATE} --build-arg AGENT_VERSION=${AGENT_VERSION} --build-arg AUTO_INSTRUMENTATION_JAVA_VERSION=${AUTO_INSTRUMENTATION_JAVA_VERSION} --build-arg AUTO_INSTRUMENTATION_PYTHON_VERSION=${AUTO_INSTRUMENTATION_PYTHON_VERSION} --build-arg DCGM_EXPORTER_VERSION=${DCGM_EXPORTER_VERSION} --build-arg NEURON_MONITOR_VERSION=${NEURON_MONITOR_VERSION} .
docker buildx build --load --platform linux/${ARCH} -t ${IMG} --build-arg VERSION_PKG=${VERSION_PKG} --build-arg VERSION=${VERSION} --build-arg VERSION_DATE=${VERSION_DATE} --build-arg AGENT_VERSION=${AGENT_VERSION} --build-arg AUTO_INSTRUMENTATION_JAVA_VERSION=${AUTO_INSTRUMENTATION_JAVA_VERSION} --build-arg AUTO_INSTRUMENTATION_PYTHON_VERSION=${AUTO_INSTRUMENTATION_PYTHON_VERSION} --build-arg AUTO_INSTRUMENTATION_DOTNET_VERSION=${AUTO_INSTRUMENTATION_DOTNET_VERSION} --build-arg DCGM_EXPORTER_VERSION=${DCGM_EXPORTER_VERSION} --build-arg NEURON_MONITOR_VERSION=${NEURON_MONITOR_VERSION} .

# Push the container image, used only for local dev purposes
.PHONY: container-push
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Amazon CloudWatch Agent Operator
The Amazon CloudWatch Agent Operator is software developed to manage the [CloudWatch Agent](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/Install-CloudWatch-Agent.html) on kubernetes.

Supported Languages:
- Java
- Python
- .NET

This repo is based off of the [OpenTelemetry Operator](https://github.com/open-telemetry/opentelemetry-operator)

## Build and Deployment
Expand Down
9 changes: 8 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ const (
cloudwatchAgentImageRepository = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent"
autoInstrumentationJavaImageRepository = "public.ecr.aws/aws-observability/adot-autoinstrumentation-java"
autoInstrumentationPythonImageRepository = "public.ecr.aws/aws-observability/adot-autoinstrumentation-python"
autoInstrumentationDotNetImageRepository = "public.ecr.aws/aws-observability/adot-autoinstrumentation-dotnet"
dcgmExporterImageRepository = "nvcr.io/nvidia/k8s/dcgm-exporter"
neuronMonitorImageRepository = "public.ecr.aws/neuron"
)
Expand Down Expand Up @@ -98,6 +99,7 @@ func main() {
agentImage string
autoInstrumentationJava string
autoInstrumentationPython string
autoInstrumentationDotNet string
autoAnnotationConfigStr string
webhookPort int
tlsOpt tlsConfig
Expand All @@ -111,14 +113,16 @@ func main() {
stringFlagOrEnv(&agentImage, "agent-image", "RELATED_IMAGE_COLLECTOR", fmt.Sprintf("%s:%s", cloudwatchAgentImageRepository, v.AmazonCloudWatchAgent), "The default CloudWatch Agent image. This image is used when no image is specified in the CustomResource.")
stringFlagOrEnv(&autoInstrumentationJava, "auto-instrumentation-java-image", "RELATED_IMAGE_AUTO_INSTRUMENTATION_JAVA", fmt.Sprintf("%s:%s", autoInstrumentationJavaImageRepository, v.AutoInstrumentationJava), "The default OpenTelemetry Java instrumentation image. This image is used when no image is specified in the CustomResource.")
stringFlagOrEnv(&autoInstrumentationPython, "auto-instrumentation-python-image", "RELATED_IMAGE_AUTO_INSTRUMENTATION_PYTHON", fmt.Sprintf("%s:%s", autoInstrumentationPythonImageRepository, v.AutoInstrumentationPython), "The default OpenTelemetry Python instrumentation image. This image is used when no image is specified in the CustomResource.")
stringFlagOrEnv(&autoInstrumentationDotNet, "auto-instrumentation-dotnet-image", "RELATED_IMAGE_AUTO_INSTRUMENTATION_DOTNET", fmt.Sprintf("%s:%s", autoInstrumentationDotNetImageRepository, v.AutoInstrumentationDotNet), "The default OpenTelemetry Dotnet instrumentation image. This image is used when no image is specified in the CustomResource.")
stringFlagOrEnv(&autoAnnotationConfigStr, "auto-annotation-config", "AUTO_ANNOTATION_CONFIG", "", "The configuration for auto-annotation.")
stringFlagOrEnv(&dcgmExporterImage, "dcgm-exporter-image", "RELATED_IMAGE_DCGM_EXPORTER", fmt.Sprintf("%s:%s", dcgmExporterImageRepository, v.DcgmExporter), "The default DCGM Exporter image. This image is used when no image is specified in the CustomResource.")
stringFlagOrEnv(&neuronMonitorImage, "neuron-monitor-image", "RELATED_IMAGE_NEURON_MONITOR", fmt.Sprintf("%s:%s", neuronMonitorImageRepository, v.NeuronMonitor), "The default Neuron monitor image. This image is used when no image is specified in the CustomResource.")
pflag.Parse()

// set java instrumentation java image in environment variable to be used for default instrumentation
// set supported language instrumentation images in environment variable to be used for default instrumentation
os.Setenv("AUTO_INSTRUMENTATION_JAVA", autoInstrumentationJava)
os.Setenv("AUTO_INSTRUMENTATION_PYTHON", autoInstrumentationPython)
os.Setenv("AUTO_INSTRUMENTATION_DOTNET", autoInstrumentationDotNet)

logger := zap.New(zap.UseFlagOptions(&opts))
ctrl.SetLogger(logger)
Expand All @@ -128,6 +132,7 @@ func main() {
"cloudwatch-agent", agentImage,
"auto-instrumentation-java", autoInstrumentationJava,
"auto-instrumentation-python", autoInstrumentationPython,
"auto-instrumentation-dotnet", autoInstrumentationDotNet,
"dcgm-exporter", dcgmExporterImage,
"neuron-monitor", neuronMonitorImage,
"build-date", v.BuildDate,
Expand All @@ -142,6 +147,7 @@ func main() {
config.WithCollectorImage(agentImage),
config.WithAutoInstrumentationJavaImage(autoInstrumentationJava),
config.WithAutoInstrumentationPythonImage(autoInstrumentationPython),
config.WithAutoInstrumentationDotNetImage(autoInstrumentationDotNet),
config.WithDcgmExporterImage(dcgmExporterImage),
config.WithNeuronMonitorImage(neuronMonitorImage),
)
Expand Down Expand Up @@ -238,6 +244,7 @@ func main() {
instrumentation.NewTypeSet(
instrumentation.TypeJava,
instrumentation.TypePython,
instrumentation.TypeDotNet,
),
)
mgr.GetWebhookServer().Register("/mutate-v1-workload", &webhook.Admission{
Expand Down
3 changes: 3 additions & 0 deletions pkg/instrumentation/auto/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import "github.com/aws/amazon-cloudwatch-agent-operator/pkg/instrumentation"
type AnnotationConfig struct {
Java AnnotationResources `json:"java"`
Python AnnotationResources `json:"python"`
DotNet AnnotationResources `json:"dotnet"`
}

func (c AnnotationConfig) getResources(instType instrumentation.Type) AnnotationResources {
Expand All @@ -18,6 +19,8 @@ func (c AnnotationConfig) getResources(instType instrumentation.Type) Annotation
return c.Java
case instrumentation.TypePython:
return c.Python
case instrumentation.TypeDotNet:
return c.DotNet
default:
return AnnotationResources{}
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/instrumentation/auto/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,21 @@ func TestConfig(t *testing.T) {
DaemonSets: []string{"ds2"},
StatefulSets: []string{"ss2"},
},
DotNet: AnnotationResources{
Namespaces: []string{"n3"},
Deployments: []string{"d3"},
DaemonSets: []string{"ds3"},
StatefulSets: []string{"ss3"},
},
}
assert.Equal(t, cfg.Java, cfg.getResources(instrumentation.TypeJava))
assert.Equal(t, []string{"n1"}, getNamespaces(cfg.Java))
assert.Equal(t, []string{"d1"}, getDeployments(cfg.Java))
assert.Equal(t, cfg.Python, cfg.getResources(instrumentation.TypePython))
assert.Equal(t, []string{"ds2"}, getDaemonSets(cfg.Python))
assert.Equal(t, []string{"ss2"}, getStatefulSets(cfg.Python))
assert.Equal(t, cfg.DotNet, cfg.getResources(instrumentation.TypeDotNet))
assert.Equal(t, []string{"ds3"}, getDaemonSets(cfg.DotNet))
assert.Equal(t, []string{"ss3"}, getStatefulSets(cfg.DotNet))
assert.Equal(t, AnnotationResources{}, cfg.getResources("invalidType"))
}
55 changes: 42 additions & 13 deletions pkg/instrumentation/defaultinstrumentation.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package instrumentation

import (
"errors"
"fmt"
"os"

corev1 "k8s.io/api/core/v1"
Expand All @@ -20,11 +21,11 @@ const (
defaultNamespace = "default"
defaultKind = "Instrumentation"

httpPrefix = "http://"
httpsPrefix = "https://"
http = "http"
https = "https"
)

func getDefaultInstrumentation(agentConfig *adapters.CwaConfig) (*v1alpha1.Instrumentation, error) {
func getDefaultInstrumentation(agentConfig *adapters.CwaConfig, isWindowsPod bool) (*v1alpha1.Instrumentation, error) {
javaInstrumentationImage, ok := os.LookupEnv("AUTO_INSTRUMENTATION_JAVA")
if !ok {
return nil, errors.New("unable to determine java instrumentation image")
Expand All @@ -33,13 +34,24 @@ func getDefaultInstrumentation(agentConfig *adapters.CwaConfig) (*v1alpha1.Instr
if !ok {
return nil, errors.New("unable to determine python instrumentation image")
}
dotNetInstrumentationImage, ok := os.LookupEnv("AUTO_INSTRUMENTATION_DOTNET")
if !ok {
return nil, errors.New("unable to determine dotnet instrumentation image")
}

cloudwatchAgentServiceEndpoint := "cloudwatch-agent.amazon-cloudwatch"
if isWindowsPod {
// Windows pods use the headless service endpoint due to limitations with the agent on host network mode
// https://kubernetes.io/docs/concepts/services-networking/windows-networking/#limitations
cloudwatchAgentServiceEndpoint = "cloudwatch-agent-windows-headless.amazon-cloudwatch.svc.cluster.local"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to use headless endpoint for windows nodes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick - worth adding a comment on why does the service endpoint differ for windows

}

// set protocol by checking cloudwatch agent config for tls setting
exporterPrefix := httpPrefix
exporterPrefix := http
if agentConfig != nil {
appSignalsConfig := agentConfig.GetApplicationSignalsConfig()
if appSignalsConfig != nil && appSignalsConfig.TLS != nil {
exporterPrefix = httpsPrefix
exporterPrefix = https
}
}

Expand All @@ -65,12 +77,12 @@ func getDefaultInstrumentation(agentConfig *adapters.CwaConfig) (*v1alpha1.Instr
Env: []corev1.EnvVar{
{Name: "OTEL_AWS_APP_SIGNALS_ENABLED", Value: "true"}, //TODO: remove in favor of new name once safe
{Name: "OTEL_AWS_APPLICATION_SIGNALS_ENABLED", Value: "true"},
{Name: "OTEL_TRACES_SAMPLER_ARG", Value: "endpoint=http://cloudwatch-agent.amazon-cloudwatch:2000"},
{Name: "OTEL_TRACES_SAMPLER_ARG", Value: fmt.Sprintf("endpoint=%s://%s:2000", http, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_TRACES_SAMPLER", Value: "xray"},
{Name: "OTEL_EXPORTER_OTLP_PROTOCOL", Value: "http/protobuf"},
{Name: "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", Value: exporterPrefix + "cloudwatch-agent.amazon-cloudwatch:4316/v1/traces"},
{Name: "OTEL_AWS_APP_SIGNALS_EXPORTER_ENDPOINT", Value: exporterPrefix + "cloudwatch-agent.amazon-cloudwatch:4316/v1/metrics"}, //TODO: remove in favor of new name once safe
{Name: "OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT", Value: exporterPrefix + "cloudwatch-agent.amazon-cloudwatch:4316/v1/metrics"},
{Name: "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", Value: fmt.Sprintf("%s://%s:4316/v1/traces", exporterPrefix, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_AWS_APP_SIGNALS_EXPORTER_ENDPOINT", Value: fmt.Sprintf("%s://%s:4316/v1/metrics", exporterPrefix, cloudwatchAgentServiceEndpoint)}, //TODO: remove in favor of new name once safe
{Name: "OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT", Value: fmt.Sprintf("%s://%s:4316/v1/metrics", exporterPrefix, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_METRICS_EXPORTER", Value: "none"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Expand All @@ -80,18 +92,35 @@ func getDefaultInstrumentation(agentConfig *adapters.CwaConfig) (*v1alpha1.Instr
Env: []corev1.EnvVar{
{Name: "OTEL_AWS_APP_SIGNALS_ENABLED", Value: "true"}, //TODO: remove in favor of new name once safe
{Name: "OTEL_AWS_APPLICATION_SIGNALS_ENABLED", Value: "true"},
{Name: "OTEL_TRACES_SAMPLER_ARG", Value: "endpoint=http://cloudwatch-agent.amazon-cloudwatch:2000"},
{Name: "OTEL_TRACES_SAMPLER_ARG", Value: fmt.Sprintf("endpoint=%s://%s:2000", http, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_TRACES_SAMPLER", Value: "xray"},
{Name: "OTEL_EXPORTER_OTLP_PROTOCOL", Value: "http/protobuf"},
{Name: "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", Value: exporterPrefix + "cloudwatch-agent.amazon-cloudwatch:4316/v1/traces"},
{Name: "OTEL_AWS_APP_SIGNALS_EXPORTER_ENDPOINT", Value: exporterPrefix + "cloudwatch-agent.amazon-cloudwatch:4316/v1/metrics"}, //TODO: remove in favor of new name once safe
{Name: "OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT", Value: exporterPrefix + "cloudwatch-agent.amazon-cloudwatch:4316/v1/metrics"},
{Name: "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", Value: fmt.Sprintf("%s://%s:4316/v1/traces", exporterPrefix, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_AWS_APP_SIGNALS_EXPORTER_ENDPOINT", Value: fmt.Sprintf("%s://%s:4316/v1/metrics", exporterPrefix, cloudwatchAgentServiceEndpoint)}, //TODO: remove in favor of new name once safe
{Name: "OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT", Value: fmt.Sprintf("%s://%s:4316/v1/metrics", exporterPrefix, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_METRICS_EXPORTER", Value: "none"},
{Name: "OTEL_PYTHON_DISTRO", Value: "aws_distro"},
{Name: "OTEL_PYTHON_CONFIGURATOR", Value: "aws_configurator"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
},
DotNet: v1alpha1.DotNet{
Image: dotNetInstrumentationImage,
Env: []corev1.EnvVar{
{Name: "OTEL_AWS_APPLICATION_SIGNALS_ENABLED", Value: "true"},
{Name: "OTEL_TRACES_SAMPLER_ARG", Value: fmt.Sprintf("endpoint=%s://%s:2000", http, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_TRACES_SAMPLER", Value: "xray"},
{Name: "OTEL_EXPORTER_OTLP_PROTOCOL", Value: "http/protobuf"},
{Name: "OTEL_EXPORTER_OTLP_ENDPOINT", Value: fmt.Sprintf("%s://%s:4316", exporterPrefix, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", Value: fmt.Sprintf("%s://%s:4316/v1/traces", exporterPrefix, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_AWS_APPLICATION_SIGNALS_EXPORTER_ENDPOINT", Value: fmt.Sprintf("%s://%s:4316/v1/metrics", exporterPrefix, cloudwatchAgentServiceEndpoint)},
{Name: "OTEL_METRICS_EXPORTER", Value: "none"},
{Name: "OTEL_DOTNET_DISTRO", Value: "aws_distro"},
{Name: "OTEL_DOTNET_CONFIGURATOR", Value: "aws_configurator"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
{Name: "OTEL_DOTNET_AUTO_PLUGINS", Value: "AWS.Distro.OpenTelemetry.AutoInstrumentation.Plugin, AWS.Distro.OpenTelemetry.AutoInstrumentation"},
},
},
},
}, nil
}
Loading
Loading