边缘节点docker和k3s工作路径修改方案


检查

  1. 检查docker信息
docker info
# 参考输出
# ...
# Server:
#  Containers: 36
#   Running: 34
#   Paused: 0
#   Stopped: 2
#  Images: 7
#  ...
#  Docker Root Dir: /var/lib/docker
#  ... 
  1. 检查镜像
docker images
# 参考输出
#REPOSITORY   TAG                                                IMAGE ID            CREATED             SIZE
#swr.cn-global-1.cloud.nisco.cn/com-huaweicloud-lmstudio/iit-template-euler-arm-python3.9-tf2.6.5-cpu   1.0.0-dev_for_pangustudio-20250711151310.a52160b   e0be962b3d0a        4 months ago        5.02GB
#swr.cn-global-1.cloud.nisco.cn/modelarts-applications/89458d2f-a25c-469b-b116-972fd986d575             0.0.1                                              af3ecb786853        5 months ago        9.39GB
#...
  1. 检查运行的容器
docker ps
# 参考输出
# CONTAINER ID        IMAGE                        COMMAND                  CREATED             STATUS            PORTS               NAMES
# 451be91a626e        a4f35fb8f53b                 "/bin/sh -c './init_…"   6 days ago          Up 6 days k8s_lms-mod-f44ae76d-1741834094454_lms-svc-f44ae76d-1741834220191-7bf86dbb64-52tjh_hilens_39d61325-bd0e-47d3-8e28-cb95e1e4f02c_0
# 17106714224a        rancher/mirrored-pause:3.5   "/pause"                 6 days ago          Up 6 days k8s_POD_lms-svc-f44ae76d-1741834220191-7bf86dbb64-52tjh_hilens_39d61325-bd0e-47d3-8e28-cb95e1e4f02c_0
  1. 检查selinux状态
getenforce
# 输出应为 Permissive 或 Disabled,**不能是Enforcing**,不然会导致服务启动失败
# 如果输出是Enforcing,使用如下命令修改
setenforce 0
  1. 检查目标目录的可用容量
df -h /home
## 示例输出
# Filesystem                  Size  Used Avail Use% Mounted on
# /dev/mapper/openeuler-home  805G   91G  673G  12% /home
# 目标目录可用空间应 > 200G

操作

  1. 获取待操作节点信息,以root用户登录节点,涉及到节点如下,root用户的密码线下找管理员获取:
  • 191.168.163.232
  • 191.168.163.233
  1. 选定目标路径,并创建该路径,这里目标路径选定为/home/edge-infer
mkdir /home/edge-infer
  1. 停止edge-agent和docker进程
systemctl stop hdad

systemctl stop docker.socket
systemctl stop docker
  1. 将原有的docker, kubelet, rancher文件复制到新路径下
cp -rp /var/lib/docker /home/edge-infer/docker
cp -rp /var/lib/kubelet /home/edge-infer/kubelet
cp -rp /var/lib/rancher /home/edge-infer/rancher
  1. 修改docker配置文件
vi /etc/docker/daemon.json
# 增加一个新的配置项 data-root, 其值为 /home/edge-infer/docker

修改完成后,/etc/docker/daemon.json 的内容为:

{
        "default-runtime": "ascend",
        "insecure-registries": [
                "swr.cn-global-1.cloud.nisco.cn"
        ],
        "runtimes": {
                "ascend": {
                        "path": "/usr/local/Ascend/Ascend-Docker-Runtime/ascend-docker-runtime",
                        "runtimeArgs": []
                }
        },
        "data-root": "/home/edge-infer/docker"
}
  1. 修改 k3s 配置文件
vi /etc/hilens/hda.conf 
# 增加如下两个新的配置项
hilens.k3s.server.kubelet-arg=root-dir=/home/edge-infer/kubelet
hilens.k3s.server.data-dir=/home/edge-infer/rancher/k3s
  1. 启动服务
systemctl daemon-reload
systemctl start docker

systemctl start docker.socket
systemctl start hdad
  1. 检查环境是否正常

需等待一段时间,等待pod的状态都变成Running

kubectl get pod -A
# 参考输出
# NAMESPACE     NAME                                              READY   STATUS    RESTARTS   AGE
# hilens        lms-svc-17f232c3-1749178767155-7b6f4b7959-s78sr   1/1     Running   0          6d19h
# hilens        lms-svc-1d2ef732-1748419975652-74584f86dd-8lbv7   1/1     Running   2          166d
# hilens        lms-svc-20838390-1756353757901-c987bb7f8-bh5hw    1/1     Running   0          74d
# hilens        lms-svc-64095c68-1741834130604-767886b97b-4hj7c   1/1     Running   0          74d
  1. 清理原有文件
rm -rf /var/lib/docker
rm -rf /var/lib/kubelet
rm -rf /var/lib/rancher