@@ -464,7 +464,7 @@ func (p *DynamicPolicy) GetResourcesAllocation(_ context.Context,
464
464
if p .applySidecarAllocationInfoFromMainContainer (allocationInfo , mainContainerAllocationInfo ) {
465
465
general .Infof ("pod: %s/%s, container: %s sync allocation info from main container" ,
466
466
allocationInfo .PodNamespace , allocationInfo .PodName , containerName )
467
- p .state .SetAllocationInfo (podUID , containerName , allocationInfo )
467
+ p .state .SetAllocationInfo (podUID , containerName , allocationInfo , true )
468
468
needUpdateMachineState = true
469
469
}
470
470
}
@@ -488,11 +488,11 @@ func (p *DynamicPolicy) GetResourcesAllocation(_ context.Context,
488
488
}
489
489
490
490
allocationInfo .InitTimestamp = time .Now ().Format (util .QRMTimeFormat )
491
- p .state .SetAllocationInfo (podUID , containerName , allocationInfo )
491
+ p .state .SetAllocationInfo (podUID , containerName , allocationInfo , true )
492
492
} else if allocationInfo .RampUp && time .Now ().After (initTs .Add (p .transitionPeriod )) {
493
493
general .Infof ("pod: %s/%s, container: %s ramp up finished" , allocationInfo .PodNamespace , allocationInfo .PodName , allocationInfo .ContainerName )
494
494
allocationInfo .RampUp = false
495
- p .state .SetAllocationInfo (podUID , containerName , allocationInfo )
495
+ p .state .SetAllocationInfo (podUID , containerName , allocationInfo , true )
496
496
497
497
if allocationInfo .CheckShared () {
498
498
allocationInfosJustFinishRampUp = append (allocationInfosJustFinishRampUp , allocationInfo )
@@ -503,7 +503,7 @@ func (p *DynamicPolicy) GetResourcesAllocation(_ context.Context,
503
503
}
504
504
505
505
if len (allocationInfosJustFinishRampUp ) > 0 {
506
- if err = p .putAllocationsAndAdjustAllocationEntries (allocationInfosJustFinishRampUp , true ); err != nil {
506
+ if err = p .putAllocationsAndAdjustAllocationEntries (allocationInfosJustFinishRampUp , true , true ); err != nil {
507
507
// not influencing return response to kubelet when putAllocationsAndAdjustAllocationEntries failed
508
508
general .Errorf ("putAllocationsAndAdjustAllocationEntries failed with error: %v" , err )
509
509
}
@@ -517,7 +517,7 @@ func (p *DynamicPolicy) GetResourcesAllocation(_ context.Context,
517
517
general .Errorf ("GetResourcesAllocation GenerateMachineStateFromPodEntries failed with error: %v" , err )
518
518
return nil , fmt .Errorf ("GenerateMachineStateFromPodEntries failed with error: %v" , err )
519
519
}
520
- p .state .SetMachineState (updatedMachineState )
520
+ p .state .SetMachineState (updatedMachineState , true )
521
521
}
522
522
523
523
podEntries = p .state .GetPodEntries ()
@@ -843,14 +843,17 @@ func (p *DynamicPolicy) Allocate(ctx context.Context,
843
843
if err != nil {
844
844
resp = nil
845
845
respErr = fmt .Errorf ("add container to qos aware server failed with error: %v" , err )
846
- _ = p .removeContainer (req .PodUid , req .ContainerName )
846
+ _ = p .removeContainer (req .PodUid , req .ContainerName , false )
847
847
}
848
848
} else if respErr != nil {
849
- _ = p .removeContainer (req .PodUid , req .ContainerName )
849
+ _ = p .removeContainer (req .PodUid , req .ContainerName , false )
850
850
_ = p .emitter .StoreInt64 (util .MetricNameAllocateFailed , 1 , metrics .MetricTypeNameRaw ,
851
851
metrics.MetricTag {Key : "error_message" , Val : metric .MetricTagValueFormat (respErr )},
852
852
metrics.MetricTag {Key : util .MetricTagNameInplaceUpdateResizing , Val : strconv .FormatBool (util .PodInplaceUpdateResizing (req ))})
853
853
}
854
+ if err := p .state .StoreState (); err != nil {
855
+ general .ErrorS (err , "store state failed" , "podName" , req .PodName , "containerName" , req .ContainerName )
856
+ }
854
857
855
858
p .Unlock ()
856
859
if respErr != nil {
@@ -908,7 +911,7 @@ func (p *DynamicPolicy) Allocate(ctx context.Context,
908
911
if p .allocationHandlers [qosLevel ] == nil {
909
912
return nil , fmt .Errorf ("katalyst QoS level: %s is not supported yet" , qosLevel )
910
913
}
911
- return p .allocationHandlers [qosLevel ](ctx , req )
914
+ return p .allocationHandlers [qosLevel ](ctx , req , false )
912
915
}
913
916
914
917
// AllocateForPod is called during pod admit so that the resource
@@ -961,34 +964,40 @@ func (p *DynamicPolicy) RemovePod(ctx context.Context,
961
964
}
962
965
}
963
966
964
- err = p .removePod (req .PodUid , podEntries )
967
+ err = p .removePod (req .PodUid , podEntries , false )
965
968
if err != nil {
966
969
general .ErrorS (err , "remove pod failed with error" , "podUID" , req .PodUid )
967
970
return nil , err
968
971
}
969
972
970
- aErr := p .adjustAllocationEntries ()
973
+ aErr := p .adjustAllocationEntries (false )
971
974
if aErr != nil {
972
975
general .ErrorS (aErr , "adjustAllocationEntries failed" , "podUID" , req .PodUid )
973
976
}
977
+ if err := p .state .StoreState (); err != nil {
978
+ general .ErrorS (err , "store state failed" , "podUID" , req .PodUid )
979
+ }
974
980
975
981
return & pluginapi.RemovePodResponse {}, nil
976
982
}
977
983
978
- func (p * DynamicPolicy ) removePod (podUID string , podEntries state.PodEntries ) error {
984
+ func (p * DynamicPolicy ) removePod (podUID string , podEntries state.PodEntries , persistCheckpoint bool ) error {
979
985
delete (podEntries , podUID )
980
986
981
987
updatedMachineState , err := generateMachineStateFromPodEntries (p .machineInfo .CPUTopology , podEntries )
982
988
if err != nil {
983
989
return fmt .Errorf ("GenerateMachineStateFromPodEntries failed with error: %v" , err )
984
990
}
985
991
986
- p .state .SetPodEntries (podEntries )
987
- p .state .SetMachineState (updatedMachineState )
992
+ p .state .SetPodEntries (podEntries , false )
993
+ p .state .SetMachineState (updatedMachineState , false )
994
+ if persistCheckpoint {
995
+ return p .state .StoreState ()
996
+ }
988
997
return nil
989
998
}
990
999
991
- func (p * DynamicPolicy ) removeContainer (podUID , containerName string ) error {
1000
+ func (p * DynamicPolicy ) removeContainer (podUID , containerName string , persistCheckpoint bool ) error {
992
1001
podEntries := p .state .GetPodEntries ()
993
1002
994
1003
found := false
@@ -1007,8 +1016,11 @@ func (p *DynamicPolicy) removeContainer(podUID, containerName string) error {
1007
1016
return fmt .Errorf ("GenerateMachineStateFromPodEntries failed with error: %v" , err )
1008
1017
}
1009
1018
1010
- p .state .SetPodEntries (podEntries )
1011
- p .state .SetMachineState (updatedMachineState )
1019
+ p .state .SetPodEntries (podEntries , false )
1020
+ p .state .SetMachineState (updatedMachineState , false )
1021
+ if persistCheckpoint {
1022
+ return p .state .StoreState ()
1023
+ }
1012
1024
return nil
1013
1025
}
1014
1026
@@ -1074,8 +1086,11 @@ func (p *DynamicPolicy) cleanPools() error {
1074
1086
return fmt .Errorf ("calculate machineState by podEntries failed with error: %v" , err )
1075
1087
}
1076
1088
1077
- p .state .SetPodEntries (podEntries )
1078
- p .state .SetMachineState (machineState )
1089
+ p .state .SetPodEntries (podEntries , false )
1090
+ p .state .SetMachineState (machineState , false )
1091
+ if err := p .state .StoreState (); err != nil {
1092
+ general .ErrorS (err , "store state failed" )
1093
+ }
1079
1094
} else {
1080
1095
general .Infof ("there is no pool to delete" )
1081
1096
}
@@ -1105,7 +1120,7 @@ func (p *DynamicPolicy) initReservePool() error {
1105
1120
TopologyAwareAssignments : topologyAwareAssignments ,
1106
1121
OriginalTopologyAwareAssignments : machine .DeepcopyCPUAssignment (topologyAwareAssignments ),
1107
1122
}
1108
- p .state .SetAllocationInfo (commonstate .PoolNameReserve , commonstate .FakedContainerName , curReserveAllocationInfo )
1123
+ p .state .SetAllocationInfo (commonstate .PoolNameReserve , commonstate .FakedContainerName , curReserveAllocationInfo , true )
1109
1124
1110
1125
return nil
1111
1126
}
@@ -1173,7 +1188,7 @@ func (p *DynamicPolicy) initReclaimPool() error {
1173
1188
TopologyAwareAssignments : topologyAwareAssignments ,
1174
1189
OriginalTopologyAwareAssignments : machine .DeepcopyCPUAssignment (topologyAwareAssignments ),
1175
1190
}
1176
- p .state .SetAllocationInfo (commonstate .PoolNameReclaim , commonstate .FakedContainerName , curPoolAllocationInfo )
1191
+ p .state .SetAllocationInfo (commonstate .PoolNameReclaim , commonstate .FakedContainerName , curPoolAllocationInfo , true )
1177
1192
} else {
1178
1193
general .Infof ("exist initial %s: %s" , commonstate .PoolNameReclaim , reclaimedAllocationInfo .AllocationResult .String ())
1179
1194
}
0 commit comments