先删除运行时间长的pod
# 脚本
#!/bin/bash
# 20 */1 * * * PATH=/usr/local/bin:/usr/bin:/bin /data/cron_scripts/set_k8s_annotate.sh >> /data/cron_scripts/set_k8s_annotate.log
NAMESPACE="birenchong-prod"
ANNOTATE_KEY="controller.kubernetes.io/pod-deletion-cost"
# 定义函数来处理重复代码
update_pod_annotations() {
local APP_LABEL=$1
local RUN_TIME_THRESHOLD=$2
# 获取运行时间超过阈值的 Pod 列表
PODS=$(kubectl get pods -n $NAMESPACE -l app=$APP_LABEL --sort-by=.metadata.creationTimestamp -o jsonpath='{range .items[?(@.status.phase=="Running")]}{.metadata.name} {.status.startTime}{"\n"}{end}')
# 获取当前时间的时间戳(以秒为单位)
CURRENT_TIMESTAMP=$(date -u +%s)
# 循环遍历每个 Pod,更新注释
while read -r POD START_TIME; do
START_TIMESTAMP=$(date -u -d "$START_TIME" +%s) # 将启动时间转换为时间戳(以秒为单位)
RUN_TIME=$(( (CURRENT_TIMESTAMP - START_TIMESTAMP) / 3600 )) # 计算运行时间(以小时为单位)
if [[ $RUN_TIME -gt $RUN_TIME_THRESHOLD ]]; then # 如果运行时间大于阈值
kubectl annotate pod $POD -n $NAMESPACE $ANNOTATE_KEY=-$RUN_TIME --overwrite # 更新注释
echo $POD $RUN_TIME
fi
done <<< "$PODS"
}
date
# 更新 birenchong-prod-java-api-8080 的 Pod 注释
update_pod_annotations "birenchong-prod-java-api-8080" 5
# 更新 birenchong-prod-java-api 的 Pod 注释
update_pod_annotations "birenchong-prod-java-api" 2
# 更新 birenchong-prod-athena-api 的 Pod 注释
update_pod_annotations "birenchong-prod-athena-api" 2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# */5 * * * * PATH=/usr/local/bin:/usr/bin:/bin /srv/scripts/k8s/venv/bin/python3 /srv/scripts/k8s/set_k8s_annotate_based_on_node_and_runtime.py >> /srv/scripts/k8s/set_k8s_annotate_based_on_node_and_runtime.log 2>&1
from datetime import datetime, timezone
from pprint import pprint
from kubernetes import client, config
import subprocess
from math import floor
def calculate_time_passed(start_time_iso):
try:
start_time = datetime.fromisoformat(start_time_iso).replace(
tzinfo=timezone.utc)
current_time = datetime.now(timezone.utc)
time_passed = current_time - start_time
return floor(time_passed.total_seconds() / 3600)
except ValueError as e:
print(f"Error parsing start time: {e}")
return None
def list_running_pods(api_instance, namespace, label_selector, threshold_time):
running_pods_info = []
try:
pods = api_instance.list_namespaced_pod(namespace,
label_selector=label_selector)
for pod in pods.items:
# 检查 Pod 是否正在删除
if pod.metadata.deletion_timestamp:
continue
if pod.status.phase == "Running":
current_annotations_num = pod.metadata.annotations.get(
"controller.kubernetes.io/pod-deletion-cost", 0)
start_time = pod.status.start_time.isoformat(
) if pod.status.start_time else None
pod_info = {
"name": pod.metadata.name,
"annotations_num": 0,
"current_annotations_num": current_annotations_num,
"start_time": start_time,
"node_name": pod.spec.node_name,
"threshold_time": threshold_time
}
running_pods_info.append(pod_info)
except client.exceptions.ApiException as e:
print(f"Error fetching pods: {e}")
return running_pods_info
def update_pod_annotations(pod, namespace_name, annotate_key):
shell = f"kubectl annotate pod {pod['name']} -n {namespace_name} {annotate_key}={pod['annotations_num']} --overwrite"
# print(shell)
try:
subprocess.run(shell,
shell=True,
check=True,
capture_output=True,
text=True)
print(f"Set pod {pod['name']} annotations to {pod['annotations_num']}")
except subprocess.CalledProcessError as e:
print(f"Error updating annotations for pod {pod['name']}: {e}")
if __name__ == '__main__':
pprint("==========")
print(datetime.now(timezone.utc))
pprint("----------")
config.load_kube_config()
api_instance = client.CoreV1Api()
annotate_key = "controller.kubernetes.io/pod-deletion-cost"
running_pods_info = []
namespace_name = "birenchong-prod"
labels = [{
"app": "birenchong-prod-java-api-8080",
"threshold_time": 5
}, {
"app": "birenchong-prod-java-api",
"threshold_time": 2
}, {
"app": "birenchong-prod-athena-api",
"threshold_time": 2
}, {
"app": "birenchong-prod-birenchong-game",
"threshold_time": 5
}]
for label in labels:
label_selector = f"app={label['app']}"
threshold_time = label['threshold_time']
pods_info = list_running_pods(api_instance, namespace_name,
label_selector, threshold_time)
running_pods_info.extend(pods_info)
node_pod_num = {}
for pod in running_pods_info:
node_name = pod['node_name']
node_pod_num[node_name] = node_pod_num.get(node_name, 0) + 1
# pprint(node_pod_num)
# pprint("----------")
for pod in running_pods_info:
time_passed_hours = calculate_time_passed(pod['start_time'])
if time_passed_hours is not None and time_passed_hours > pod[
'threshold_time']:
pod['annotations_num'] = int(
pod['annotations_num']) - time_passed_hours
if node_pod_num[pod['node_name']] == 1:
pod['annotations_num'] -= 10
pod['annotations_num'] = str(pod['annotations_num'])
# pprint(running_pods_info)
# pprint("----------")
for pod in running_pods_info:
if int(pod['annotations_num']) != int(pod['current_annotations_num']):
update_pod_annotations(pod, namespace_name, annotate_key)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
Last Updated: 2024/03/12, 17:16:32