Terminating状态的pod是怎么被删除的

一个Terminating状态(数据并没有直接从etcd删除,而是设置了DeletionTimestamp)的Pod是怎么删除的呢?我们知道删除一个资源对象是要调用APIServer接口从etcd中将数据删除。
假如要调用接口那请求发起方又是哪个组件呢?

创建一个Terminating状态的Pod

k apply -f b2.yaml,然后k delete pod b2你会得到一个处于Terminating状态的Pod

1
2
3
4
5
6
7
8
9
10
11
# b2.yaml
apiVersion: v1
kind: Pod
metadata:
name: b2
finalizers:
- kubernetes
spec:
containers:
- name: app
image: hysyeah/my-curl:v1

删除Terminating状态的pod

k edit pod b2finalizers移除,然后你会发现pod b2被立马删除了。
你可以通过命令watch -n 1 k get podwatch -n 1 etcdctl get /registry/pods/default/b2 -w json来监听对应的资源是否删除。

关于etcdctl的使用可以查看

验证

为了验证猜测,首先我把k8s集群中的kube-controller-manager给移除了。然后对Terminating状态的移除finalizers操作,发现pod被删除,可见这跟kube-controller-manager
然后猜测可能是kubelet发送的请求,首先在kubelet中并没有发现删除pod的代码,然后经过验证发现这与kubelet也没有关系。

最后观察k edit pod b2 --v=9看这条操作调用了什么接口。

源码分析
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
// 当我们删除一个带有finalizer的pod时并不会马上将etcd中的数据删除
// staging/src/k8s.io/apiserver/pkg/registry/generic/registry/store.go
func (e *Store) Delete(ctx context.Context, name string, deleteValidation rest.ValidateObjectFunc, options *metav1.DeleteOptions) (runtime.Object, bool, error) {
...
// 这里会设置DeletionTimestamp的值
graceful, pendingGraceful, err := rest.BeforeDelete(e.DeleteStrategy, ctx, obj, options)
if err != nil {
return nil, false, err
}
// this means finalizers cannot be updated via DeleteOptions if a deletion is already pending
if pendingGraceful {
out, err := e.finalizeDelete(ctx, obj, false, options)
return out, false, err
}
// check if obj has pending finalizers
accessor, err := meta.Accessor(obj)
if err != nil {
return nil, false, apierrors.NewInternalError(err)
}
pendingFinalizers := len(accessor.GetFinalizers()) != 0
var ignoreNotFound
// 用于判断资源是否能被立即删除
var deleteImmediately bool = true
var lastExisting, out runtime.Object

// Handle combinations of graceful deletion and finalization by issuing
// the correct updates.
shouldUpdateFinalizers, _ := deletionFinalizersForGarbageCollection(ctx, e, accessor, options)
// TODO: remove the check, because we support no-op updates now.
if graceful || pendingFinalizers || shouldUpdateFinalizers {
// / updateForGracefulDeletionAndFinalizers 函数用于为对象进行优雅删除和最终化的更新,
// 它设置删除时间戳和优雅删除的宽限期秒数(graceful deletion),
// 并更新最终器(finalizers)列表

// updateForGracefulDeletionAndFinalizers 通过设置DeletionTimestamp和grace perios seconds和更新finalizers列表
// 以实现优雅删除和终结
err, ignoreNotFound, deleteImmediately, out, lastExisting = e.updateForGracefulDeletionAndFinalizers(ctx, name, key, options, preconditions, deleteValidation, obj)
// Update the preconditions.ResourceVersion if set since we updated the object.
if err == nil && deleteImmediately && preconditions.ResourceVersion != nil {
accessor, err = meta.Accessor(out)
if err != nil {
return out, false, apierrors.NewInternalError(err)
}
resourceVersion := accessor.GetResourceVersion()
preconditions.ResourceVersion = &resourceVersion
}
}

// !deleteImmediately covers all cases where err != nil. We keep both to be future-proof.
// 如果不能立即删除,直接返回。并不会删除etcd中的数据,pod状态变为Terminating
if !deleteImmediately || err != nil {
return out, false, err
}

// Going further in this function is not useful when we are
// performing a dry-run request. Worse, it will actually
// override "out" with the version of the object in database
// that doesn't have the finalizer and deletiontimestamp set
// (because the update above was dry-run too). If we already
// have that version available, let's just return it now,
// otherwise, we can call dry-run delete that will get us the
// latest version of the object.
if dryrun.IsDryRun(options.DryRun) && out != nil {
return out, true, nil
}

// delete immediately, or no graceful deletion supported
klog.V(6).InfoS("Going to delete object from registry", "object", klog.KRef(genericapirequest.NamespaceValue(ctx), name))
out = e.NewFunc()
if err := e.Storage.Delete(ctx, key, out, &preconditions, storage.ValidateObjectFunc(deleteValidation), dryrun.IsDryRun(options.DryRun), nil); err != nil {
// Please refer to the place where we set ignoreNotFound for the reason
// why we ignore the NotFound error .
if storage.IsNotFound(err) && ignoreNotFound && lastExisting != nil {
// The lastExisting object may not be the last state of the object
// before its deletion, but it's the best approximation.
out, err := e.finalizeDelete(ctx, lastExisting, true, options)
return out, true, err
}
return nil, false, storeerr.InterpretDeleteError(err, qualifiedResource, name)
}
out, err = e.finalizeDelete(ctx, out, true, options)
return out, true, err
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// 在下面的函数中打个断点,执行edit操作之后通过监听etcd数据,发现并没有被删除
// 可以知道删除逻辑就在这里了
// staging/src/k8s.io/apiserver/pkg/registry/generic/registry/store.go
func (e *Store) Update(ctx context.Context, name string, objInfo rest.UpdatedObjectInfo, createValidation rest.ValidateObjectFunc, updateValidation rest.ValidateObjectUpdateFunc, forceAllowCreate bool, options *metav1.UpdateOptions) (runtime.Object, bool, error) {
key, err := e.KeyFunc(ctx, name)
if err != nil {
return nil, false, err
}

var (
creatingObj runtime.Object
creating = false
)

qualifiedResource := e.qualifiedResourceFromContext(ctx)
storagePreconditions := &storage.Preconditions{}
if preconditions := objInfo.Preconditions(); preconditions != nil {
storagePreconditions.UID = preconditions.UID
storagePreconditions.ResourceVersion = preconditions.ResourceVersion
}

out := e.NewFunc()
// deleteObj is only used in case a deletion is carried out
var deleteObj runtime.Object
err = e.Storage.GuaranteedUpdate(ctx, key, out, true, storagePreconditions, func(existing runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) {
existingResourceVersion, err := e.Storage.Versioner().ObjectResourceVersion(existing)

...//省略部分代码


if err != nil {
// delete the object
// 最终会进入到这里
if err == errEmptiedFinalizers {
// deleteWithoutFinalizers 会将数据从etcd中删除
return e.deleteWithoutFinalizers(ctx, name, key, deleteObj, storagePreconditions, newDeleteOptionsFromUpdateOptions(options))
}
if creating {
err = storeerr.InterpretCreateError(err, qualifiedResource, name)
err = rest.CheckGeneratedNameError(ctx, e.CreateStrategy, err, creatingObj)
} else {
err = storeerr.InterpretUpdateError(err, qualifiedResource, name)
}
return nil, false, err
}

if creating {
if e.AfterCreate != nil {
e.AfterCreate(out, newCreateOptionsFromUpdateOptions(options))
}
} else {
if e.AfterUpdate != nil {
e.AfterUpdate(out, options)
}
}
if e.Decorator != nil {
e.Decorator(out)
}
return out, creating, nil
}

func (e *Store) deleteWithoutFinalizers(ctx context.Context, name, key string, obj runtime.Object, preconditions *storage.Preconditions, options *metav1.DeleteOptions) (runtime.Object, bool, error) {
out := e.NewFunc()
klog.V(6).InfoS("Going to delete object from registry, triggered by update", "object", klog.KRef(genericapirequest.NamespaceValue(ctx), name))
// Using the rest.ValidateAllObjectFunc because the request is an UPDATE request and has already passed the admission for the UPDATE verb.
if err := e.Storage.Delete(ctx, key, out, preconditions, rest.ValidateAllObjectFunc, dryrun.IsDryRun(options.DryRun), nil); err != nil {
// Deletion is racy, i.e., there could be multiple update
// requests to remove all finalizers from the object, so we
// ignore the NotFound error.
if storage.IsNotFound(err) {
_, err := e.finalizeDelete(ctx, obj, true, options)
// clients are expecting an updated object if a PUT succeeded,
// but finalizeDelete returns a metav1.Status, so return
// the object in the request instead.
return obj, false, err
}
return nil, false, storeerr.InterpretDeleteError(err, e.qualifiedResourceFromContext(ctx), name)
}
_, err := e.finalizeDelete(ctx, out, true, options)
// clients are expecting an updated object if a PUT succeeded, but
// finalizeDelete returns a metav1.Status, so return the object in
// the request instead.
return obj, false, err
}
小结

Terminating状态的Pod即不是由kubelet,也不是由kube-controller-manager发起请求删除的。而是当更新资源时(删除finalizers)调用Update接口会进入deleteWithoutFinalizers从而删除数据。


REF:
1.staging/src/k8s.io/apiserver/pkg/registry/generic/registry/store.go