Skip to content

Allow configurable resource requests and limits received by helm chart. #196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 29 additions & 12 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ func stringFlagOrEnv(p *string, name string, envName string, defaultValue string
pflag.StringVar(p, name, defaultValue, usage)
}

func setEnvLang(lang string, cfg map[string]string) {
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_CPU_"+lang, cfg["cpu"])
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_MEMORY_"+lang, cfg["memory"])
}

func main() {
// registers any flags that underlying libraries might use
opts := zap.Options{}
Expand All @@ -93,23 +98,25 @@ func main() {

// add flags related to this operator
var (
metricsAddr string
probeAddr string
pprofAddr string
agentImage string
autoInstrumentationJava string
autoInstrumentationPython string
autoInstrumentationDotNet string
autoAnnotationConfigStr string
webhookPort int
tlsOpt tlsConfig
dcgmExporterImage string
neuronMonitorImage string
metricsAddr string
probeAddr string
pprofAddr string
autoInstrumentationConfigStr string
agentImage string
autoInstrumentationJava string
autoInstrumentationPython string
autoInstrumentationDotNet string
autoAnnotationConfigStr string
webhookPort int
tlsOpt tlsConfig
dcgmExporterImage string
neuronMonitorImage string
)

pflag.StringVar(&metricsAddr, "metrics-addr", ":8080", "The address the metric endpoint binds to.")
pflag.StringVar(&probeAddr, "health-probe-addr", ":8081", "The address the probe endpoint binds to.")
pflag.StringVar(&pprofAddr, "pprof-addr", "", "The address to expose the pprof server. Default is empty string which disables the pprof server.")
pflag.StringVar(&autoInstrumentationConfigStr, "auto-instrumentation-config", "", "The configuration for auto-instrumentation.")
stringFlagOrEnv(&agentImage, "agent-image", "RELATED_IMAGE_COLLECTOR", fmt.Sprintf("%s:%s", cloudwatchAgentImageRepository, v.AmazonCloudWatchAgent), "The default CloudWatch Agent image. This image is used when no image is specified in the CustomResource.")
stringFlagOrEnv(&autoInstrumentationJava, "auto-instrumentation-java-image", "RELATED_IMAGE_AUTO_INSTRUMENTATION_JAVA", fmt.Sprintf("%s:%s", autoInstrumentationJavaImageRepository, v.AutoInstrumentationJava), "The default OpenTelemetry Java instrumentation image. This image is used when no image is specified in the CustomResource.")
stringFlagOrEnv(&autoInstrumentationPython, "auto-instrumentation-python-image", "RELATED_IMAGE_AUTO_INSTRUMENTATION_PYTHON", fmt.Sprintf("%s:%s", autoInstrumentationPythonImageRepository, v.AutoInstrumentationPython), "The default OpenTelemetry Python instrumentation image. This image is used when no image is specified in the CustomResource.")
Expand All @@ -119,6 +126,16 @@ func main() {
stringFlagOrEnv(&neuronMonitorImage, "neuron-monitor-image", "RELATED_IMAGE_NEURON_MONITOR", fmt.Sprintf("%s:%s", neuronMonitorImageRepository, v.NeuronMonitor), "The default Neuron monitor image. This image is used when no image is specified in the CustomResource.")
pflag.Parse()

// set instrumentation cpu and memory limits in environment variables to be used for default instrumentation
autoInstrumentationConfig := map[string]map[string]string{"java": {"cpu": "500m", "memory": "64Mi"}, "python": {"cpu": "500m", "memory": "32Mi"}, "dotnet": {"cpu": "500m", "memory": "128Mi"}}
err := json.Unmarshal([]byte(autoInstrumentationConfigStr), &autoInstrumentationConfig)
if err != nil {
setupLog.Error(err, "Unable to unmarshal auto-instrumentation config, assuming default values")
}
setEnvLang("JAVA", autoInstrumentationConfig["java"])
setEnvLang("PYTHON", autoInstrumentationConfig["python"])
setEnvLang("DOTNET", autoInstrumentationConfig["dotnet"])

// set supported language instrumentation images in environment variable to be used for default instrumentation
os.Setenv("AUTO_INSTRUMENTATION_JAVA", autoInstrumentationJava)
os.Setenv("AUTO_INSTRUMENTATION_PYTHON", autoInstrumentationPython)
Expand Down
24 changes: 24 additions & 0 deletions pkg/instrumentation/defaultinstrumentation.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package instrumentation
import (
"errors"
"fmt"
"k8s.io/apimachinery/pkg/api/resource"
"os"

corev1 "k8s.io/api/core/v1"
Expand All @@ -25,6 +26,20 @@ const (
https = "https"
)

func getInstrumentationConfigLimits(lang string) corev1.ResourceList {
instrumentationConfigCpu, _ := os.LookupEnv("AUTO_INSTRUMENTATION_LIMIT_CPU_" + lang)
instrumentationConfigMemory, _ := os.LookupEnv("AUTO_INSTRUMENTATION_LIMIT_MEMORY_" + lang)

instrumentationConfigLimits := corev1.ResourceList{}
if instrumentationConfigCpu != "" {
instrumentationConfigLimits[corev1.ResourceCPU] = resource.MustParse(instrumentationConfigCpu)
}
if instrumentationConfigMemory != "" {
instrumentationConfigLimits[corev1.ResourceMemory] = resource.MustParse(instrumentationConfigMemory)
}
return instrumentationConfigLimits
}

func getDefaultInstrumentation(agentConfig *adapters.CwaConfig, isWindowsPod bool) (*v1alpha1.Instrumentation, error) {
javaInstrumentationImage, ok := os.LookupEnv("AUTO_INSTRUMENTATION_JAVA")
if !ok {
Expand Down Expand Up @@ -86,6 +101,9 @@ func getDefaultInstrumentation(agentConfig *adapters.CwaConfig, isWindowsPod boo
{Name: "OTEL_METRICS_EXPORTER", Value: "none"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: getInstrumentationConfigLimits("JAVA"),
},
},
Python: v1alpha1.Python{
Image: pythonInstrumentationImage,
Expand All @@ -103,6 +121,9 @@ func getDefaultInstrumentation(agentConfig *adapters.CwaConfig, isWindowsPod boo
{Name: "OTEL_PYTHON_CONFIGURATOR", Value: "aws_configurator"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: getInstrumentationConfigLimits("PYTHON"),
},
},
DotNet: v1alpha1.DotNet{
Image: dotNetInstrumentationImage,
Expand All @@ -120,6 +141,9 @@ func getDefaultInstrumentation(agentConfig *adapters.CwaConfig, isWindowsPod boo
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
{Name: "OTEL_DOTNET_AUTO_PLUGINS", Value: "AWS.Distro.OpenTelemetry.AutoInstrumentation.Plugin, AWS.Distro.OpenTelemetry.AutoInstrumentation"},
},
Resources: corev1.ResourceRequirements{
Limits: getInstrumentationConfigLimits("DOTNET"),
},
},
},
}, nil
Expand Down
85 changes: 85 additions & 0 deletions pkg/instrumentation/defaultinstrumentation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package instrumentation

import (
"k8s.io/apimachinery/pkg/api/resource"
"os"
"reflect"
"testing"
Expand All @@ -19,6 +20,12 @@ func Test_getDefaultInstrumentationLinux(t *testing.T) {
os.Setenv("AUTO_INSTRUMENTATION_JAVA", defaultJavaInstrumentationImage)
os.Setenv("AUTO_INSTRUMENTATION_PYTHON", defaultPythonInstrumentationImage)
os.Setenv("AUTO_INSTRUMENTATION_DOTNET", defaultDotNetInstrumentationImage)
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_CPU_JAVA", "500m")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_MEMORY_JAVA", "64Mi")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_CPU_PYTHON", "500m")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_MEMORY_PYTHON", "32Mi")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_CPU_DOTNET", "500m")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_MEMORY_DOTNET", "128Mi")

httpInst := &v1alpha1.Instrumentation{
Status: v1alpha1.InstrumentationStatus{},
Expand Down Expand Up @@ -51,6 +58,12 @@ func Test_getDefaultInstrumentationLinux(t *testing.T) {
{Name: "OTEL_METRICS_EXPORTER", Value: "none"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("64Mi"),
},
},
},
Python: v1alpha1.Python{
Image: defaultPythonInstrumentationImage,
Expand All @@ -68,6 +81,12 @@ func Test_getDefaultInstrumentationLinux(t *testing.T) {
{Name: "OTEL_PYTHON_CONFIGURATOR", Value: "aws_configurator"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("32Mi"),
},
},
},
DotNet: v1alpha1.DotNet{
Image: defaultDotNetInstrumentationImage,
Expand All @@ -85,6 +104,12 @@ func Test_getDefaultInstrumentationLinux(t *testing.T) {
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
{Name: "OTEL_DOTNET_AUTO_PLUGINS", Value: "AWS.Distro.OpenTelemetry.AutoInstrumentation.Plugin, AWS.Distro.OpenTelemetry.AutoInstrumentation"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("128Mi"),
},
},
},
},
}
Expand Down Expand Up @@ -119,6 +144,12 @@ func Test_getDefaultInstrumentationLinux(t *testing.T) {
{Name: "OTEL_METRICS_EXPORTER", Value: "none"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("64Mi"),
},
},
},
Python: v1alpha1.Python{
Image: defaultPythonInstrumentationImage,
Expand All @@ -136,6 +167,12 @@ func Test_getDefaultInstrumentationLinux(t *testing.T) {
{Name: "OTEL_PYTHON_CONFIGURATOR", Value: "aws_configurator"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("32Mi"),
},
},
},
DotNet: v1alpha1.DotNet{
Image: defaultDotNetInstrumentationImage,
Expand All @@ -153,6 +190,12 @@ func Test_getDefaultInstrumentationLinux(t *testing.T) {
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
{Name: "OTEL_DOTNET_AUTO_PLUGINS", Value: "AWS.Distro.OpenTelemetry.AutoInstrumentation.Plugin, AWS.Distro.OpenTelemetry.AutoInstrumentation"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("128Mi"),
},
},
},
},
}
Expand Down Expand Up @@ -218,6 +261,12 @@ func Test_getDefaultInstrumentationWindows(t *testing.T) {
os.Setenv("AUTO_INSTRUMENTATION_JAVA", defaultJavaInstrumentationImage)
os.Setenv("AUTO_INSTRUMENTATION_PYTHON", defaultPythonInstrumentationImage)
os.Setenv("AUTO_INSTRUMENTATION_DOTNET", defaultDotNetInstrumentationImage)
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_CPU_JAVA", "500m")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_MEMORY_JAVA", "64Mi")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_CPU_PYTHON", "500m")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_MEMORY_PYTHON", "32Mi")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_CPU_DOTNET", "500m")
os.Setenv("AUTO_INSTRUMENTATION_LIMIT_MEMORY_DOTNET", "128Mi")

httpInst := &v1alpha1.Instrumentation{
Status: v1alpha1.InstrumentationStatus{},
Expand Down Expand Up @@ -250,6 +299,12 @@ func Test_getDefaultInstrumentationWindows(t *testing.T) {
{Name: "OTEL_METRICS_EXPORTER", Value: "none"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("64Mi"),
},
},
},
Python: v1alpha1.Python{
Image: defaultPythonInstrumentationImage,
Expand All @@ -267,6 +322,12 @@ func Test_getDefaultInstrumentationWindows(t *testing.T) {
{Name: "OTEL_PYTHON_CONFIGURATOR", Value: "aws_configurator"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("32Mi"),
},
},
},
DotNet: v1alpha1.DotNet{
Image: defaultDotNetInstrumentationImage,
Expand All @@ -284,6 +345,12 @@ func Test_getDefaultInstrumentationWindows(t *testing.T) {
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
{Name: "OTEL_DOTNET_AUTO_PLUGINS", Value: "AWS.Distro.OpenTelemetry.AutoInstrumentation.Plugin, AWS.Distro.OpenTelemetry.AutoInstrumentation"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("128Mi"),
},
},
},
},
}
Expand Down Expand Up @@ -318,6 +385,12 @@ func Test_getDefaultInstrumentationWindows(t *testing.T) {
{Name: "OTEL_METRICS_EXPORTER", Value: "none"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("64Mi"),
},
},
},
Python: v1alpha1.Python{
Image: defaultPythonInstrumentationImage,
Expand All @@ -335,6 +408,12 @@ func Test_getDefaultInstrumentationWindows(t *testing.T) {
{Name: "OTEL_PYTHON_CONFIGURATOR", Value: "aws_configurator"},
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("32Mi"),
},
},
},
DotNet: v1alpha1.DotNet{
Image: defaultDotNetInstrumentationImage,
Expand All @@ -352,6 +431,12 @@ func Test_getDefaultInstrumentationWindows(t *testing.T) {
{Name: "OTEL_LOGS_EXPORTER", Value: "none"},
{Name: "OTEL_DOTNET_AUTO_PLUGINS", Value: "AWS.Distro.OpenTelemetry.AutoInstrumentation.Plugin, AWS.Distro.OpenTelemetry.AutoInstrumentation"},
},
Resources: corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("128Mi"),
},
},
},
},
}
Expand Down
Loading