Published on

Kubernetes 环境配置

Authors

1. 集群安装

1. 修改主机名称

sudo hostnamectl set-hostname {hostname}

2. 关闭swap分区

# 永久关闭swap分区
sudo sed -ri 's/.*swap.*/#&/' /etc/fstab

3. 安装containerd

sudo apt-get remove docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin -y

# Add Docker's official GPG key:
sudo apt-get update
sudo apt-get install ca-certificates curl
install -m 0755 -d /etc/apt/keyrings
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg
sudo chmod a+r /etc/apt/keyrings/docker.gpg
echo \
  "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://mirrors.tuna.tsinghua.edu.cn/docker-ce/linux/ubuntu \
  "$(. /etc/os-release && echo "$VERSION_CODENAME")" stable" | \
  tee /etc/apt/sources.list.d/docker.list > /dev/null
sudo apt-get update

sudo apt-get install containerd.io  -y

将本目录下的config.toml文件替换,位置在:/etc/containerd,完成后重启containerd

disabled_plugins = []
imports = []
oom_score = 0
plugin_dir = ""
required_plugins = []
root = "/var/lib/containerd"
state = "/run/containerd"
temp = ""
version = 2

[cgroup]
path = ""

[debug]
address = ""
format = ""
gid = 0
level = ""
uid = 0

[grpc]
address = "/run/containerd/containerd.sock"
gid = 0
max_recv_message_size = 16777216
max_send_message_size = 16777216
tcp_address = ""
tcp_tls_ca = ""
tcp_tls_cert = ""
tcp_tls_key = ""
uid = 0

[metrics]
address = ""
grpc_histogram = false

[plugins]

[plugins."io.containerd.gc.v1.scheduler"]
deletion_threshold = 0
mutation_threshold = 100
pause_threshold = 0.02
schedule_delay = "0s"
startup_delay = "100ms"

[plugins."io.containerd.grpc.v1.cri"]
device_ownership_from_security_context = false
disable_apparmor = false
disable_cgroup = false
disable_hugetlb_controller = true
disable_proc_mount = false
disable_tcp_service = true
enable_selinux = false
enable_tls_streaming = false
enable_unprivileged_icmp = false
enable_unprivileged_ports = false
ignore_image_defined_volumes = false
max_concurrent_downloads = 3
max_container_log_line_size = 16384
netns_mounts_under_state_dir = false
restrict_oom_score_adj = false
sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.9"
selinux_category_range = 1024
stats_collect_period = 10
stream_idle_timeout = "4h0m0s"
stream_server_address = "127.0.0.1"
stream_server_port = "0"
systemd_cgroup = false
tolerate_missing_hugetlb_controller = true
unset_seccomp_profile = ""

[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "/opt/cni/bin"
conf_dir = "/etc/cni/net.d"
conf_template = ""
ip_pref = ""
max_conf_num = 1

[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "runc"
disable_snapshot_annotations = true
discard_unpacked_layers = false
ignore_rdt_not_enabled_errors = false
no_pivot = false
snapshotter = "overlayfs"

[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = ""

[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
BinaryName = ""
CriuImagePath = ""
CriuPath = ""
CriuWorkPath = ""
IoGid = 0
IoUid = 0
NoNewKeyring = false
NoPivotRoot = false
Root = ""
ShimCgroup = ""
SystemdCgroup = true

[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
base_runtime_spec = ""
cni_conf_dir = ""
cni_max_conf_num = 0
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_path = ""
runtime_root = ""
runtime_type = ""

[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime.options]

[plugins."io.containerd.grpc.v1.cri".image_decryption]
key_model = "node"

[plugins."io.containerd.grpc.v1.cri".registry]
config_path = ""

[plugins."io.containerd.grpc.v1.cri".registry.auths]

[plugins."io.containerd.grpc.v1.cri".registry.configs]

[plugins."io.containerd.grpc.v1.cri".registry.headers]

[plugins."io.containerd.grpc.v1.cri".registry.mirrors]

[plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
endpoint = ["https://docker.m.daocloud.io"]

[plugins."io.containerd.grpc.v1.cri".registry.mirrors."registry.k8s.io"]
endpoint = ["https://k8s.m.daocloud.io"]

[plugins."io.containerd.grpc.v1.cri".registry.mirrors."k8s.gcr.io"]
endpoint = ["https://k8s-gcr.m.daocloud.io"]

[plugins."io.containerd.grpc.v1.cri".registry.mirrors."quay.io"]
endpoint = ["https://quay.m.daocloud.io"]

[plugins."io.containerd.grpc.v1.cri".registry.mirrors."gcr.io"]
endpoint = ["https://gcr.m.daocloud.io"]

[plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
tls_cert_file = ""
tls_key_file = ""

[plugins."io.containerd.internal.v1.opt"]
path = "/opt/containerd"

[plugins."io.containerd.internal.v1.restart"]
interval = "10s"

[plugins."io.containerd.internal.v1.tracing"]
sampling_ratio = 1.0
service_name = "containerd"

[plugins."io.containerd.metadata.v1.bolt"]
content_sharing_policy = "shared"

[plugins."io.containerd.monitor.v1.cgroups"]
no_prometheus = false

[plugins."io.containerd.runtime.v1.linux"]
no_shim = false
runtime = "runc"
runtime_root = ""
shim = "containerd-shim"
shim_debug = false

[plugins."io.containerd.runtime.v2.task"]
platforms = ["linux/amd64"]
sched_core = false

[plugins."io.containerd.service.v1.diff-service"]
default = ["walking"]

[plugins."io.containerd.service.v1.tasks-service"]
rdt_config_file = ""

[plugins."io.containerd.snapshotter.v1.aufs"]
root_path = ""

[plugins."io.containerd.snapshotter.v1.btrfs"]
root_path = ""

[plugins."io.containerd.snapshotter.v1.devmapper"]
async_remove = false
base_image_size = ""
discard_blocks = false
fs_options = ""
fs_type = ""
pool_name = ""
root_path = ""

[plugins."io.containerd.snapshotter.v1.native"]
root_path = ""

[plugins."io.containerd.snapshotter.v1.overlayfs"]
root_path = ""
upperdir_label = false

[plugins."io.containerd.snapshotter.v1.zfs"]
root_path = ""

[plugins."io.containerd.tracing.processor.v1.otlp"]
endpoint = ""
insecure = false
protocol = ""

[proxy_plugins]

[stream_processors]

[stream_processors."io.containerd.ocicrypt.decoder.v1.tar"]
accepts = ["application/vnd.oci.image.layer.v1.tar+encrypted"]
args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
env = [
  "OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf",
]
path = "ctd-decoder"
returns = "application/vnd.oci.image.layer.v1.tar"

[stream_processors."io.containerd.ocicrypt.decoder.v1.tar.gzip"]
accepts = ["application/vnd.oci.image.layer.v1.tar+gzip+encrypted"]
args = ["--decryption-keys-path", "/etc/containerd/ocicrypt/keys"]
env = [
  "OCICRYPT_KEYPROVIDER_CONFIG=/etc/containerd/ocicrypt/ocicrypt_keyprovider.conf",
]
path = "ctd-decoder"
returns = "application/vnd.oci.image.layer.v1.tar+gzip"

[timeouts]
"io.containerd.timeout.bolt.open" = "0s"
"io.containerd.timeout.shim.cleanup" = "5s"
"io.containerd.timeout.shim.load" = "5s"
"io.containerd.timeout.shim.shutdown" = "3s"
"io.containerd.timeout.task.state" = "2s"

[ttrpc]
address = ""
gid = 0
uid = 0
sudo systemctl enable containerd --now
sudo systemctl daemon-reload && sudo systemctl restart containerd

4. 安装CNI

sudo mkdir -p /opt/cni/bin
CNI_VERSION="v1.5.1" # 注意此处的版本号
ARCH="amd64"

sudo curl -L "https://ghp.ci/https://github.com/containernetworking/plugins/releases/download/${CNI_VERSION}/cni-plugins-linux-${ARCH}-${CNI_VERSION}.tgz" | sudo tar -C /opt/cni/bin -xz

5. 安装kubeadm、kubelet、kubectl

sudo apt-get update
sudo apt-get install -y apt-transport-https ca-certificates curl gpg
K8S_VERSION="v1.28"

curl -fsSL https://pkgs.k8s.io/core:/stable:/${K8S_VERSION}/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg

新建/etc/apt/sources.list.d/kubernetes.list,填写以下内容,注意版本号要与密钥的版本号一致:

deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://mirrors.tuna.tsinghua.edu.cn/kubernetes/core:/stable:/v1.28/deb/ /
# deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://mirrors.tuna.tsinghua.edu.cn/kubernetes/addons:/cri-o:/stable:/v1.28/deb/ /

安装并锁定kubeadm、kubelet、kubectl

sudo apt-get update
sudo apt-get install -y kubelet kubeadm kubectl
sudo apt-mark hold kubelet kubeadm kubectl

6. 主节点初始化

集群的配置已经编写在 init-default.yaml文件中,需要替换几处配置:

apiVersion: kubeadm.k8s.io/v1beta4
bootstrapTokens:
  - groups:
      - system:bootstrappers:kubeadm:default-node-token
    token: abcdef.0123456789abcdef
    ttl: 24h0m0s
    usages:
      - signing
      - authentication
kind: InitConfiguration
localAPIEndpoint:
  advertiseAddress: 192.168.1.2
  bindPort: 6443
nodeRegistration:
  criSocket: unix:///var/run/containerd/containerd.sock
  imagePullPolicy: IfNotPresent
  imagePullSerial: true
  name: k8s-master
  taints:
    - effect: NoSchedule
      key: node-role.kubernetes.io/control-plane
timeouts:
  controlPlaneComponentHealthCheck: 4m0s
  discovery: 5m0s
  etcdAPICall: 2m0s
  kubeletHealthCheck: 4m0s
  kubernetesAPICall: 1m0s
  tlsBootstrap: 5m0s
  upgradeManifests: 5m0s
---
apiServer: {}
apiVersion: kubeadm.k8s.io/v1beta4
caCertificateValidityPeriod: 87600h0m0s
certificateValidityPeriod: 8760h0m0s
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controllerManager: {}
dns: {}
encryptionAlgorithm: RSA-2048
etcd:
  local:
    dataDir: /var/lib/etcd
imageRepository: registry.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: v1.32.3
networking:
  dnsDomain: cluster.local
  serviceSubnet: 10.96.0.0/12
proxy: {}
scheduler: {}

替换完成后执行以下命令进行初始化:

sudo kubeadm init --config init.yaml

mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

7. 从节点加入集群

主节点重新生成token,并复制到从节点上执行:

kubeadm token create --print-join-command

2. 集群升级

集群的版本升级需要逐步进行,先升级主节点,再升级工作节点,不可跨版本升级!

1. 确定升级版本:

apt update
apt-cache madison kubeadm

注:从1.28版本开始,每个版本都需要修改apt的配置,否则无法查看到新的版本,具体可以查看:https://mirrors.tuna.tsinghua.edu.cn/help/kubernetes/

输出类似如下:

   kubeadm |  1.28.2-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   kubeadm |  1.28.1-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   kubeadm |  1.28.0-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   kubeadm |  1.27.6-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   kubeadm |  1.27.5-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   kubeadm |  1.27.4-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   kubeadm |  1.27.3-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   kubeadm |  1.27.2-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   kubeadm |  1.27.1-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   kubeadm |  1.27.0-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages
   ......

2. 升级kubeadm

apt-mark unhold kubeadm && \
apt-get update && apt-get install -y kubeadm='1.28.x-*' && \ # <-- 这里替换为升级的版本号
apt-mark hold kubeadm

验证版本:

kubeadm version

检查集群是否可升级:

kubeadm upgrade plan

执行命令升级:

# 首个主节点:
sudo kubeadm upgrade apply v1.28.x # <-- 这里替换为升级的版本号

# 其余主节点:
sudo kubeadm upgrade node

清空节点,让节点进入维护状态:

# 将 <node-to-drain> 替换为你要腾空的控制面节点名称
kubectl drain <node-to-drain> --ignore-daemonsets

3. 升级kubelet 和 kubectl

apt-mark unhold kubelet kubectl && \
apt-get update && apt-get install -y kubelet='1.28.x-*' kubectl='1.28.x-*' && \ # <-- 这里的版本好要跟上面保持一致
apt-mark hold kubelet kubectl

重启kubelet

sudo systemctl daemon-reload
sudo systemctl restart kubelet

退出维护状态并上线:

# 将 <node-to-uncordon> 替换为你的节点名称
kubectl uncordon <node-to-uncordon>

3. 其他问题

1. 集群证书升级

集群的证书默认有效期只有一年,需要定期更新集群证书,查看证书剩余有效期:

aosan@k8s-test-master:~$ sudo kubeadm certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'

CERTIFICATE                EXPIRES                  RESIDUAL TIME   CERTIFICATE AUTHORITY   EXTERNALLY MANAGED
admin.conf                 Sep 14, 2025 07:18 UTC   268d            ca                      no
apiserver                  Sep 14, 2025 07:18 UTC   268d            ca                      no
apiserver-etcd-client      Sep 14, 2025 07:18 UTC   268d            etcd-ca                 no
apiserver-kubelet-client   Sep 14, 2025 07:18 UTC   268d            ca                      no
controller-manager.conf    Sep 14, 2025 07:19 UTC   268d            ca                      no
etcd-healthcheck-client    Sep 14, 2025 07:18 UTC   268d            etcd-ca                 no
etcd-peer                  Sep 14, 2025 07:18 UTC   268d            etcd-ca                 no
etcd-server                Sep 14, 2025 07:18 UTC   268d            etcd-ca                 no
front-proxy-client         Sep 14, 2025 07:18 UTC   268d            front-proxy-ca          no
scheduler.conf             Sep 14, 2025 07:19 UTC   268d            ca                      no

CERTIFICATE AUTHORITY   EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
ca                      Sep 12, 2034 07:18 UTC   9y              no
etcd-ca                 Sep 12, 2034 07:18 UTC   9y              no
front-proxy-ca          Sep 12, 2034 07:18 UTC   9y              no

备份文件:

sudo cp -r /etc/kubernetes /etc/kubernetes.old

更新证书:

sudo kubeadm certs renew all

更新 ~/.kube/config 文件

sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config