kubescape · matthyx · Jul 1, 2026 · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026
diff --git a/.github/workflows/pr-created.yaml b/.github/workflows/pr-created.yaml
@@ -26,8 +26,33 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+        with:
+          path: operator
       - uses: actions/setup-go@v5
         with:
           go-version: "1.25"
-      - name: Build with integration tag
-        run: go test -tags=integration -run=^$ ./...
+      # The node-agent autoscaler integration tests render the actual DaemonSet
+      # template that lives in the helm-charts repo. Check it out and extract the
+      # template so the tests run for real instead of skipping (see
+      # nodeagentautoscaler/integration_test.go).
+      - name: Checkout helm-charts
+        uses: actions/checkout@v4
+        with:
+          repository: kubescape/helm-charts
+          ref: main
+          path: helm-charts
-      - name: Checkout helm-charts
-        uses: actions/checkout@v4
-        with:
-          repository: kubescape/helm-charts
-          ref: main
-          path: helm-charts
+      - name: Checkout helm-charts
+        uses: actions/checkout@v4
+        with:
+          repository: kubescape/helm-charts
+          ref: main
+          path: helm-charts
+          persist-credentials: false
-      - name: Checkout helm-charts
-        uses: actions/checkout@v4
-        with:
-          repository: kubescape/helm-charts
-          ref: main
-          path: helm-charts
+      - name: Checkout helm-charts
+        uses: actions/checkout@v4
+        with:
+          repository: kubescape/helm-charts
+          ref: main
+          path: helm-charts
+          persist-credentials: false
+      - uses: azure/setup-helm@v4
+      - name: Extract node-agent daemonset template
+        run: |
+          # The chart has local (file://) subchart dependencies that must be
+          # vendored into charts/ before templating a fresh checkout.
+          helm dependency build helm-charts/charts/kubescape-operator
+          helm template test helm-charts/charts/kubescape-operator \
+            --kube-version 1.29.0 \
+            --set nodeAgent.autoscaler.enabled=true --set clusterName=test \
+          | grep -A 400 "daemonset-template.yaml:" | tail -n +2 | sed 's/^    //' \
+          | awk '/^---/{exit} {print}' > /tmp/test-daemonset-template.yaml
+          test -s /tmp/test-daemonset-template.yaml || { echo "template extraction produced an empty file"; exit 1; }
+      - name: Build and run integration tests
+        working-directory: operator
+        run: go test -tags=integration ./...
diff --git a/config/config.go b/config/config.go
@@ -66,8 +66,13 @@ type NodeAgentAutoscalerResourceBounds struct {
 
 // NodeAgentAutoscalerConfig defines the configuration for node agent autoscaling
 type NodeAgentAutoscalerConfig struct {
-	Enabled                bool                                   `json:"enabled" mapstructure:"enabled"`
-	NodeGroupLabel         string                                 `json:"nodeGroupLabel" mapstructure:"nodeGroupLabel"`
+	Enabled        bool   `json:"enabled" mapstructure:"enabled"`
+	NodeGroupLabel string `json:"nodeGroupLabel" mapstructure:"nodeGroupLabel"`
+	// DefaultNodeGroup is the group value assigned to nodes that do not carry the
+	// NodeGroupLabel. This ensures a node-agent DaemonSet is still deployed on such
+	// nodes (e.g. on-prem or custom clusters that do not populate the label).
+	// If set to an empty string, nodes without the label are skipped.
+	DefaultNodeGroup       string                                 `json:"defaultNodeGroup" mapstructure:"defaultNodeGroup"`
 	ResourcePercentages    NodeAgentAutoscalerResourcePercentages `json:"resourcePercentages" mapstructure:"resourcePercentages"`
 	MinResources           NodeAgentAutoscalerResourceBounds      `json:"minResources" mapstructure:"minResources"`
 	MaxResources           NodeAgentAutoscalerResourceBounds      `json:"maxResources" mapstructure:"maxResources"`
@@ -306,6 +311,7 @@ func LoadConfig(path string) (Config, error) {
 	// Node agent autoscaler defaults
 	viper.SetDefault("nodeAgentAutoscaler.enabled", false)
 	viper.SetDefault("nodeAgentAutoscaler.nodeGroupLabel", "node.kubernetes.io/instance-type")
+	viper.SetDefault("nodeAgentAutoscaler.defaultNodeGroup", "default")
 	viper.SetDefault("nodeAgentAutoscaler.resourcePercentages.requestCPU", 2)
 	viper.SetDefault("nodeAgentAutoscaler.resourcePercentages.requestMemory", 2)
 	viper.SetDefault("nodeAgentAutoscaler.resourcePercentages.limitCPU", 5)

diff --git a/config/config_test.go b/config/config_test.go
@@ -103,8 +103,9 @@ func TestLoadConfig(t *testing.T) {
 					Namespace: "default",
 				},
 				NodeAgentAutoscaler: NodeAgentAutoscalerConfig{
-					Enabled:        false,
-					NodeGroupLabel: "node.kubernetes.io/instance-type",
+					Enabled:          false,
+					NodeGroupLabel:   "node.kubernetes.io/instance-type",
+					DefaultNodeGroup: "default",
 					ResourcePercentages: NodeAgentAutoscalerResourcePercentages{
 						RequestCPU:    2,
 						RequestMemory: 2,

diff --git a/docs/node-agent-autoscaler.md b/docs/node-agent-autoscaler.md
@@ -79,6 +79,53 @@ annotations:
   argocd.argoproj.io/sync-options: Prune=false
 ```
 
+**Node Targeting:**
+
+Each DaemonSet selects the nodes it runs on based on the grouping label:
+
+- **Normal groups** use a `nodeSelector` matching the grouping label value, e.g.
+  `node.kubernetes.io/instance-type: m5.large`.
+- **The default group** (`IsDefaultGroup`) targets nodes that are *missing* the
+  grouping label. A `nodeSelector` cannot match an absent label, so its template
+  branch instead uses a `DoesNotExist` node affinity:
+
+  ```yaml
+  affinity:
+    nodeAffinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+        nodeSelectorTerms:
+        - matchExpressions:
+          - key: <NodeGroupLabelKey>   # the configured grouping label key
+            operator: DoesNotExist
+  ```
+
+  This ensures a `node-agent` is deployed on label-less nodes (on-prem / custom
+  clusters) without requiring the cluster operator to label nodes manually. The
+  branch is driven by `IsDefaultGroup` in the template data; the DaemonSet
+  template must therefore include both branches. Both the `nodeSelector` and this
+  affinity key off `NodeGroupLabelKey` (the operator's configured grouping label),
+  so they always match the label the operator actually groups by — even when it is
+  overridden from the default `node.kubernetes.io/instance-type`.
+
+The default group affinity carries **only** the `DoesNotExist` term; its OS
+requirement is already enforced by the `nodeSelector`. Non-default groups are not
+given an operator-managed affinity at all, so any user-provided `nodeAgent.affinity`
+(zone / GPU / topology constraints) is preserved for them.
+
+The label-less population is tracked under an internal sentinel key, so a node
+that legitimately carries `<groupingLabel>=<defaultNodeGroup>` forms its own
+normal (nodeSelector-targeted) group and is never merged into the affinity-based
+default group.
+
+> **Edge case — shared `kubescape.io/node-group` value.** In the unlikely event a
+> node is *labelled* `<groupingLabel>=<defaultNodeGroup>` while other nodes lack
+> the label, the real group and the synthetic default group both render their
+> `kubescape.io/node-group` selector value as `<defaultNodeGroup>`, so the two
+> DaemonSets share a pod selector. This is safe in practice — their node targeting
+> is mutually exclusive (label-present vs. absent) and `ownerReferences` prevent
+> pod adoption across them — but `kubectl get pods -l kubescape.io/node-group=…`
+> will list both groups' pods together.
+
 ### NodeGrouper (`nodegrouper.go`)
 
 Handles node discovery and resource calculation.
@@ -109,6 +156,15 @@ func calculatePercentage(q resource.Quantity, percent int) resource.Quantity {
 }
 ```
 
+**Representative allocatable per group:**
+
+Instance-type groups are homogeneous by definition, so a group's allocatable is
+taken from the first node seen. The **default group** is the exception: label-less
+nodes can be heterogeneous (mixed on-prem/bare-metal hardware), so it is sized off
+the **minimum** allocatable CPU/memory across its nodes. This guarantees the
+node-agent's requests fit every node in the group and makes the result independent
+of the (unstable) node list order.
+
 **Naming Collision Detection:**
 
 Different label values can sanitize to the same name (e.g., `m5.large` and `m5_large` both become `m5-large`). The autoscaler detects this and adds a short hash suffix:
@@ -137,9 +193,11 @@ Manages DaemonSet template loading and rendering.
 
 ```go
 type TemplateData struct {
-    Name           string            // e.g., "node-agent-m5-large"
-    NodeGroupLabel string            // e.g., "m5.large"
-    Resources      TemplateResources // Requests and limits
+    Name              string            // e.g., "node-agent-m5-large"
+    NodeGroupLabel    string            // the group's label value, e.g., "m5.large"
+    NodeGroupLabelKey string            // the configured grouping label key, e.g., "node.kubernetes.io/instance-type"
+    IsDefaultGroup    bool              // true for the fallback group of label-less nodes
+    Resources         TemplateResources // Requests and limits
 }
 
 type TemplateResources struct {
@@ -193,6 +251,7 @@ The autoscaler is configured via the operator's ConfigMap:
 type NodeAgentAutoscalerConfig struct {
     Enabled             bool          // Enable/disable autoscaler
     NodeGroupLabel      string        // Label to group nodes by
+    DefaultNodeGroup    string        // Group value for nodes missing NodeGroupLabel (default: "default"; empty = skip)
     ResourcePercentages struct {
         RequestCPU    int  // % of allocatable CPU for requests
         RequestMemory int  // % of allocatable memory for requests
@@ -295,7 +354,8 @@ kubectl get events -n kubescape --field-selector reason=Created,reason=Deleted
 
 | Scenario | Behavior |
 |----------|----------|
-| Node missing group label | Log error, skip node (no DaemonSet for it) |
+| Node missing group label (`DefaultNodeGroup` set) | Assign node to the default group (targeted via `DoesNotExist` affinity) so a node-agent is still deployed |
+| Node missing group label (`DefaultNodeGroup` empty) | Log error, skip node (no DaemonSet for it) |
 | Template parse error | Fatal error on startup |
 | Template render error | Log error, skip node group |
 | API error (create/update/delete) | Log error, continue reconciliation |

diff --git a/nodeagentautoscaler/autoscaler.go b/nodeagentautoscaler/autoscaler.go
@@ -41,20 +41,20 @@ const (
 
 // Autoscaler manages node-agent DaemonSets based on node groups
 type Autoscaler struct {
-	client               kubernetes.Interface
-	config               config.NodeAgentAutoscalerConfig
-	namespace            string
-	nodeGrouper          *NodeGrouper
-	templateRenderer     *TemplateRenderer
-	stopCh               chan struct{}
-	operatorDeployment   string // Name of the operator deployment (for owner references)
-	ownerRef             *metav1.OwnerReference
-	eventRecorder        record.EventRecorder
+	client             kubernetes.Interface
+	config             config.NodeAgentAutoscalerConfig
+	namespace          string
+	nodeGrouper        *NodeGrouper
+	templateRenderer   *TemplateRenderer
+	stopCh             chan struct{}
+	operatorDeployment string // Name of the operator deployment (for owner references)
+	ownerRef           *metav1.OwnerReference
+	eventRecorder      record.EventRecorder
 }
 
 // NewAutoscaler creates a new Autoscaler instance
 func NewAutoscaler(client kubernetes.Interface, cfg config.NodeAgentAutoscalerConfig, namespace string, operatorDeploymentName string) (*Autoscaler, error) {
-	templateRenderer, err := NewTemplateRenderer(cfg.TemplatePath, cfg.GoMemLimitPercentage)
+	templateRenderer, err := NewTemplateRenderer(cfg.TemplatePath, cfg.GoMemLimitPercentage, cfg.NodeGroupLabel)
 	if err != nil {
 		return nil, err
 	}
@@ -177,21 +177,23 @@ func (a *Autoscaler) Reconcile(ctx context.Context) error {
 		return err
 	}
 
-	// Build a map of existing DaemonSets by node group
-	existingByNodeGroup := make(map[string]*appsv1.DaemonSet)
+	// Build a map of existing DaemonSets by name. The DaemonSet name (derived from
+	// the group's sanitized, collision-resolved name) is the unique per-group
+	// identity. The kubescape.io/node-group label value is NOT unique: the default
+	// group and a real group can legitimately share a label value (e.g. "default").
+	existingByName := make(map[string]*appsv1.DaemonSet, len(existingDaemonSets))
 	for i := range existingDaemonSets {
 		ds := &existingDaemonSets[i]
-		if nodeGroup, ok := ds.Labels[NodeGroupLabelKey]; ok {
-			existingByNodeGroup[nodeGroup] = ds
-		}
+		existingByName[ds.Name] = ds
 	}
 
-	// Track which node groups we've processed
-	processedNodeGroups := make(map[string]bool)
+	// Track which DaemonSets we've processed (by name)
+	processedNames := make(map[string]bool)
 
 	// Create or update DaemonSets for each node group
 	for _, group := range nodeGroups {
-		processedNodeGroups[group.LabelValue] = true
+		dsName := GenerateDaemonSetName(group)
+		processedNames[dsName] = true
 
 		// Calculate resources for this group
 		resources, err := a.nodeGrouper.CalculateResources(group)
@@ -231,7 +233,7 @@ func (a *Autoscaler) Reconcile(ctx context.Context) error {
 		}
 
 		// Check if DaemonSet exists
-		if existingDS, exists := existingByNodeGroup[group.LabelValue]; exists {
+		if existingDS, exists := existingByName[dsName]; exists {
 			// Update if needed
 			if err := a.updateDaemonSetIfNeeded(ctx, existingDS, desiredDS); err != nil {
 				logger.L().Error("failed to update DaemonSet",
@@ -249,8 +251,8 @@ func (a *Autoscaler) Reconcile(ctx context.Context) error {
 	}
 
 	// Delete orphaned DaemonSets (node groups that no longer exist)
-	for nodeGroup, ds := range existingByNodeGroup {
-		if !processedNodeGroups[nodeGroup] {
+	for name, ds := range existingByName {
+		if !processedNames[name] {
 			if err := a.deleteDaemonSet(ctx, ds); err != nil {
 				logger.L().Error("failed to delete orphaned DaemonSet",
 					helpers.String("name", ds.Name),
@@ -352,7 +354,6 @@ func (a *Autoscaler) updateDaemonSetIfNeeded(ctx context.Context, existing, desi
 	return nil
 }
 
-
 // deleteDaemonSet deletes a DaemonSet
 func (a *Autoscaler) deleteDaemonSet(ctx context.Context, ds *appsv1.DaemonSet) error {
 	logger.L().Info("deleting orphaned DaemonSet",
@@ -381,4 +382,3 @@ func (a *Autoscaler) recordEvent(obj runtime.Object, eventType, reason, message
 		a.eventRecorder.Event(obj, eventType, reason, message)
 	}
 }
-