侧边栏壁纸
  • 累计撰写 105 篇文章
  • 累计创建 54 个标签
  • 累计收到 1 条评论

目 录CONTENT

文章目录

PVE lxc auto build script for nvdia gpu

FlyingEagle
2026-04-15 / 0 评论 / 0 点赞 / 10 阅读 / 5,548 字

create two file in same directory and run the lxcbuild.sh

lxcbuild.sh

#!/bin/bash

# 1. 基礎變數設定
LXC_ID=104
PASSWORD="passwall"
LXC_NAME="AIstack"
LXC_IP="192.168.1.2"

# 2. 刪除舊容器 (如果存在)
pct stop $LXC_ID 2>/dev/null
pct destroy $LXC_ID 2>/dev/null

# 3. 建立 LXC 容器
pct create $LXC_ID \
  /var/lib/vz/template/cache/debian-13-standard_13.1-2_amd64.tar.zst \
  --arch amd64 --cores 6 --memory 28672 \
  --hostname "$LXC_NAME" --ostype debian \
  --rootfs local-zfs:8 --swap 512 \
  --onboot 1 --unprivileged 1 \
  --features keyctl=1,nesting=1 \
  --password "$PASSWORD" \
  --net0 name=eth0,bridge=vmbr0,firewall=1,gw=192.168.2.254,ip="$LXC_IP"/24,type=veth

# 4. 使用官方 pct 指令添加設備 (這會自動處理權限)
#pct set $LXC_ID --dev0 /dev/nvidia0
#pct set $LXC_ID --dev1 /dev/nvidiactl
#pct set $LXC_ID --dev2 /dev/nvidia-uvm
#pct set $LXC_ID --dev3 /dev/nvidia-uvm-tools
#pct set $LXC_ID --dev4 /dev/nvidia-modeset
#pct set $LXC_ID --dev5 /dev/nvidia-caps/nvidia-cap1
#pct set $LXC_ID --dev6 /dev/nvidia-caps/nvidia-cap2
pct set $LXC_ID --mp0 /rpool/docker-data,mp=/mnt/docker-data
pct set $LXC_ID --mp1 /rpool/llm-models/ollama_data/models,mp=/mnt/model


# 5. 僅保留 AppArmor 與 cgroup 權限 (移除引發錯誤的 mount.entry)
cat >> /etc/pve/lxc/$LXC_ID.conf << EOF
lxc.apparmor.profile: unconfined
#lxc.cgroup2.devices.allow: c 195:* rwm
#lxc.cgroup2.devices.allow: c 234:* rwm
#lxc.cgroup2.devices.allow: c 511:* rwm
# NVIDIA device access
lxc.cgroup2.devices.allow: c 195:* rwm
lxc.cgroup2.devices.allow: c 511:* rwm

# DRI device access (for GPU rendering/encoding)
lxc.cgroup2.devices.allow: c 226:0 rwm
lxc.cgroup2.devices.allow: c 226:1 rwm
lxc.cgroup2.devices.allow: c 226:128 rwm

# Input device access
lxc.cgroup2.devices.allow: c 13:* rwm

# NVIDIA device mounts
lxc.mount.entry: /dev/nvidia0 dev/nvidia0 none bind,optional,create=file
lxc.mount.entry: /dev/nvidiactl dev/nvidiactl none bind,optional,create=file
lxc.mount.entry: /dev/nvidia-modeset dev/nvidia-modeset none bind,optional,create=file
lxc.mount.entry: /dev/nvidia-uvm dev/nvidia-uvm none bind,optional,create=file
lxc.mount.entry: /dev/nvidia-uvm-tools dev/nvidia-uvm-tools none bind,optional,create=file
lxc.mount.entry: /dev/nvidia-caps/nvidia-cap1 dev/nvidia-caps/nvidia-cap1 none bind,optional,create=file
lxc.mount.entry: /dev/nvidia-caps/nvidia-cap2 dev/nvidia-caps/nvidia-cap2 none bind,optional,create=file

# DRI device mounts
lxc.mount.entry: /dev/dri dev/dri none bind,optional,create=dir

# Input device mounts
lxc.mount.entry: /dev/uinput dev/uinput none bind,optional,create=file
lxc.mount.entry: /dev/uhid dev/uhid none bind,optional,create=file

# System mounts
lxc.mount.entry: /run/udev run/udev none bind,optional,create=dir
EOF

# 6. 啟動容器
pct start $LXC_ID
sleep 5

pct exec $LXC_ID -- mkdir -p /mnt/{10t,10t0,8t,8t1,14t0,14t,nvme600g,nvme,docker-data,model}
#pct exec $LXC_ID -- mkdir -p /mnt/{docker-data,model}
pct set $LXC_ID --mp0 /rpool/llm-models,mp=/mnt/models
pct set $LXC_ID --mp1 /rpool/docker-data,mp=/mnt/docker-data
pct set $LXC_ID --mp2 /mnt/10t,mp=/mnt/10t
pct set $LXC_ID --mp3 /mnt/10t0,mp=/mnt/10t0
pct set $LXC_ID --mp4 /mnt/8t,mp=/mnt/8t
pct set $LXC_ID --mp6 /mnt/8t1,mp=/mnt/8t1
pct set $LXC_ID --mp8 /mnt/14t0,mp=/mnt/14t0
pct set $LXC_ID --mp9 /mnt/14t,mp=/mnt/14t
pct set $LXC_ID --mp10 /mnt/nvme,mp=/mnt/nvme
pct set $LXC_ID --mp11 /mnt/nvme600g,mp=/mnt/nvme600g
pct push $LXC_ID NVIDIA-Linux-x86_64-580.142.run /root/NVIDIA-Linux-x86_64-580.142.run

# Push the external setup script to container
pct push $LXC_ID /root/lxc-setup.sh /root/lxc-setup.sh

# Execute the setup script inside container
pct exec $LXC_ID -- bash /root/lxc-setup.sh

echo "LXC container $LXC_ID setup complete!"

lxc-setup.sh

#!/bin/bash
set -e  # Exit on error

echo "Starting LXC setup..."

# FIRST: Set temporary locale to avoid warnings during initial apt operations
export LANG=C.UTF-8
export LC_ALL=C.UTF-8

# Install basic packages including locales FIRST
apt update && apt install -y locales

# NOW generate and set the proper locale
sed -i 's/^# *\(en_US.UTF-8\)/\1/' /etc/locale.gen
locale-gen en_US.UTF-8
update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8

# Set the new locale for the current session
export LANG=en_US.UTF-8
export LC_ALL=en_US.UTF-8

# Install remaining dependencies (locale warnings should now be gone)
apt install -y g++ freeglut3-dev build-essential libx11-dev libxmu-dev libxi-dev \
  libglu1-mesa-dev libfreeimage-dev gpg libglfw3-dev wget htop btop nvtop nano \
  glances git pciutils cmake curl zstd libcurl4-openssl-dev dkms make openssh-server

# Enable SSH root login
echo "Configuring SSH for root access..."
sed -i 's/^#PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config
sed -i 's/^PermitRootLogin.*/PermitRootLogin yes/' /etc/ssh/sshd_config

# Restart and enable SSH service
systemctl restart ssh
systemctl enable ssh

# Make NVIDIA driver executable and install
chmod +x /root/NVIDIA-Linux-x86_64-580.142.run
/root/NVIDIA-Linux-x86_64-580.142.run --no-kernel-modules -a -q --ui=none

# Install Docker
curl -fsSL https://get.docker.com -o get-docker.sh
sh get-docker.sh

curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
  && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
    sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
    sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list

apt-get update
apt-get install -y nvidia-container-toolkit

nvidia-ctk runtime configure --runtime=docker
systemctl restart docker

# Verify installations
nvidia-smi || echo "Note: nvidia-smi may require container restart"
docker --version

# Get the actual IP of this LXC container
CONTAINER_IP=$(ip -4 addr show eth0 | grep -oP '(?<=inet\s)\d+(\.\d+){3}' 2>/dev/null || echo "IP could not be detected")

echo "LXC setup completed successfully!"
echo "Root SSH login is enabled. You can connect via: ssh root@$CONTAINER_IP"
0

评论区