1、系统环境
Centos7.6 GPU V100 2块
2、新增node节点
2.1、开启GPU
[root@master conf]# vi common.yaml
# Container Engine Acceleration
# Use nvidia gpu acceleration in containers
# KubeSphere currently support Nvidia GPU V100 P100 1060 1080 1080Ti
# The driver version is 387.26,cuda is 9.1
nvidia_accelerator_enabled: true
nvidia_gpu_nodes:
- gpu-node01
2.2、在hosts.ini中添加node节点信息
## Add other registry.
docker_registry_mirrors:
[root@master1 conf]# ls
common.yaml common.yaml_bak hosts.ini hosts.ini_bak plugin-aliyun.yaml plugin-qingcloud.yaml plugin-tencentcloud.yaml
[root@master1 conf]# cat hosts.ini
; Parameters:
; ansible_connection: connection type to the target machine
; ansible_host: the host name of the target machine
; ip: ip address of the target machine
; ansible_user: the default user name for ssh connection
; ansible_ssh_pass: the password for ssh connection
; ansible_become_pass: the privilege escalation password to grant access
; ansible_port: the ssh port number, if not 22
; If installer is ran as non-root user who has sudo privilege, refer to the following sample configuration:
; e.g
; master ansible_connection=local ip=192.168.0.5 ansible_user=ubuntu ansible_become_pass=Qcloud@123
; node1 ansible_host=192.168.0.6 ip=192.168.0.6 ansible_user=ubuntu ansible_become_pass=Qcloud@123
; node2 ansible_host=192.168.0.8 ip=192.168.0.8 ansible_user=ubuntu ansible_become_pass=Qcloud@123
; As recommended as below sample configuration, use root account by default to install
[all]
master1 ansible_connection=local ip=192.168.0.6
node1 ansible_host=192.168.0.6 ip=192.168.0.6 ansible_user=ubuntu ansible_become_pass=Qcloud@123
node2 ansible_host=192.168.0.8 ip=192.168.0.8 ansible_user=ubuntu ansible_become_pass=Qcloud@123
gpu-node ansible_host=10.6.209.62 ip=10.6.209.62 ansible_ssh_pass=Uc!1X9MvzA
[local-registry]
master1
[kube-master]
master1
[kube-node]
#node1
#node2
gpu-node
[etcd]
master1
[k8s-cluster:children]
kube-node
kube-master
2.3、开始执行添加node节点脚本
[root@master scripts]# pwd
/root/kubesphere-all-offline-v2.1.1/scripts
[root@master scripts]# ./add-nodes.sh
坐等添加成功,后查看GPU节点为NotReady状态,查看节点上的服务都不正常,查看日志发现没有/usr/bin/nvidia-container-runtime
3、安装GPU驱动和nvidia docker 插件
3.1、安装nvidia docker 插件(由于没网络,选择离线安装,安装包已经下载)
[root@gpu-node01 tmp]# ls *.rpm
libnvidia-container1-1.2.0-1.x86_64.rpm nvidia-container-runtime-3.3.0-1.x86_64.rpm nvidia-docker2-2.4.0-1.noarch.rpm
libnvidia-container-tools-1.2.0-1.x86_64.rpm nvidia-container-toolkit-1.2.1-2.x86_64.rpm
[root@gpu-node01 tmp]# yum install -y libnvidia-container*
[root@gpu-node01 tmp]# yum install -y nvidia-container*
[root@gpu-node01 tmp]# yum install -y nvidia-docker*
3.2、下载GPU驱动
[root@gpu-node01 tmp]# wget https://cn.download.nvidia.cn/tesla/418.116.00/NVIDIA-Linux-x86_64-418.116.00.run
3.3、 禁用nouveau
[root@gpu-node01 ~]#vi /etc/modprobe.d/nouveau-blacklist.conf
blacklist nouveau
options nouveau modeset=0
备份原始image
[root@gpu-node01 ~]# mv /boot/initramfs-$(uname -r).img /boot/initramfs-$(uname -r)-nouveau.img
生成新的内核镜像
[root@gpu-node01 ~]# dracut /boot/initramfs-$(uname -r).img $(uname -r)
重启服务器
[root@gpu-node01 ~]# reboot
3.4、安装GPU驱动
[root@gpu-node01 ~]# ./NVIDIA-Linux-x86_64-418.116.00.run --kernel-source-path=/usr/src/kernels/3.10.0-1127.19.1.el7.x86_64/ -k $(uname -r)
3.5、docker服务添加参数
[root@gpu-node01 tmp]# vi /etc/systemd/system/docker.service
[Unit]
Description=Docker Application Container Engine
Documentation=http://docs.docker.com
After=network.target containerd.service
[Service]
Type=notify
Environment=GOTRACEBACK=crash
ExecReload=/bin/kill -s HUP $MAINPID
Delegate=yes
KillMode=process
ExecStart=/usr/bin/dockerd
--add-runtime nvidia=/usr/bin/nvidia-container-runtime
--default-runtime nvidia
$DOCKER_OPTS
$DOCKER_STORAGE_OPTIONS
$DOCKER_NETWORK_OPTIONS
$INSECURE_REGISTRY
LimitNOFILE=1048576
LimitNPROC=1048576
LimitCORE=infinity
TimeoutStartSec=1min
# restart the docker process if it exits prematurely
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s
[Install]
WantedBy=multi-user.target
3.6、kubelet服务器添加GPU参数
[root@gpu-node01 tmp]# vi /etc/systemd/system/kubelet.service
[Unit]
Description=Kubernetes Kubelet Server
Documentation=https://github.com/GoogleCloudPlatform/kubernetes
After=docker.service
Wants=docker.socket
[Service]
User=root
EnvironmentFile=-/etc/kubernetes/kubelet.env
ExecStart=/usr/local/bin/kubelet
$KUBE_LOGTOSTDERR
$KUBE_LOG_LEVEL
$KUBELET_API_SERVER
$KUBELET_ADDRESS
$KUBELET_PORT
$KUBELET_HOSTNAME
$KUBELET_ARGS
$DOCKER_SOCKET
$KUBELET_NETWORK_PLUGIN
$KUBELET_VOLUME_PLUGIN
$KUBELET_CLOUDPROVIDER
$KUBELET_GPU_ARGS
Restart=always
RestartSec=10s
[Install]
WantedBy=multi-user.target
3.7、重启docker服务和kubelet服务
[root@gpu-node01 tmp]# systemctl daemon-reload
[root@gpu-node01 tmp]# systemctl restart docker
[root@gpu-node01 tmp]# systemctl restart kubelet
3.8、安装k8s调度gpu的插件
[root@master opt]# wget https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.6.0/nvidia-device-plugin.yml
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-daemonset
namespace: kube-system
spec:
selector:
matchLabels:
name: nvidia-device-plugin-ds
updateStrategy:
type: RollingUpdate
template:
metadata:
# This annotation is deprecated. Kept here for backward compatibility
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
labels:
name: nvidia-device-plugin-ds
spec:
tolerations:
# This toleration is deprecated. Kept here for backward compatibility
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
- key: CriticalAddonsOnly
operator: Exists
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
# Mark this pod as a critical add-on; when enabled, the critical add-on
# scheduler reserves resources for critical add-on pods so that they can
# be rescheduled after a failure.
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
priorityClassName: "system-node-critical"
containers:
- image: registry.uih/library/nvidia/k8s-device-plugin:1.0.0-beta6
name: nvidia-device-plugin-ctr
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
在master节点上运行,DaemonSet模式运行
[root@master ~]# kubectl get po --all-namespaces |grep nvi
kube-system nvidia-device-plugin-daemonset-cwlsq 1/1 Running 0 8d
kube-system nvidia-device-plugin-daemonset-p4b4d 1/1 Running 0 8d
kube-system nvidia-device-plugin-daemonset-p5h4c 1/1 Running 0 8d
kube-system nvidia-device-plugin-daemonset-xjbxj 1/1 Running 0 8d
4、配置GPU节点调度策略
4.1、该节点只允许GPU的服务运行在该节点上
给该GPU节点打一个taint污点标签,只有拥有和这个 taint 相匹配的 toleration 的 pod 才能够被分配到这个节点
[root@master tmp]# kubectl taint nodes gpu-node01 nvidia.com/gpu=true:NoSchedule
4.2、GPU服务只允许运行在gpu服务器上
查看GPU节点的node标签[nvidia.com/gpu=true]
[root@master tmp]# kubectl get node --show-labels | grep true
gpu-node01 Ready worker 2d1h v1.16.7 beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=gpu-node01,kubernetes.io/os=linux,node-role.kubernetes.io/worker=,nvidia.com/gpu=true
4.3、GPU服务加上两点
4.3.1、只允许调度在带有nvidia.com/gpu=true 标签的节点上
nodeSelector: {
"nvidia.com/gpu": "true"
}
nodeSelector可以通过打标签的形式让Pod被调度到指定的Node上
4.3.2、允许GPU节点接受为如下tolerations的pod服务
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: NoSchedule
NoSchedule:只有拥有和这个 taint 相匹配的 toleration 的 pod 才能够被分配到这个节点
5、ChatbotAI服务helm values 的完整配置
# Default values for ChatbotAI.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 2
image:
repository: registry.uih/com.uih.uplus/chatbot_algor_service_feature
tag: v1.1
pullPolicy: IfNotPresent
service:
port: 9890
type: NodePort
nodePort: 30890
ingress:
enabled: false
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
[root@master ai-chatbot]#
[root@master ai-chatbot]# cat values.yaml
# Default values for ChatbotAI.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 2
image:
repository: registry.uih/com.uih.uplus/chatbot_algor_service_feature
tag: v1.1
pullPolicy: IfNotPresent
service:
port: 9890
type: NodePort
nodePort: 30890
ingress:
enabled: false
annotations: {}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths: []
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# nvidia.com/gpu: 1
nodeSelector: {
"nvidia.com/gpu": "true"
}
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: NoSchedule
affinity: {}
6、总结:
1、GPU节点只允许需要的GPU服务允许在该节点上,通过taint 的 tolerations 来控制,保证带有如下taint标签的服务才能在GPU节点上运行
tolerations:
- key: "nvidia.com/gpu"
operator: "Exists"
effect: NoSchedule
2、所有的GPU服务只允许在GPU节点上运行,通过如下节点标签来控制
nodeSelector: {
"nvidia.com/gpu": "true"
}
最终达到我们所要的结果:GPU服务每次都调度到GPU节点上,GPU节点只允许GPU服务运行在该节点上