CentOS 编译 PyTorch

随着深度学习技术的快速发展,PyTorch 作为一种流行的深度学习框架,被广泛应用于各种研究领域和工业应用中,在 CentOS 系统上编译 PyTorch,可以帮助用户充分利用系统资源,同时满足特定需求,本文将详细介绍在 CentOS 系统上编译 PyTorch 的步骤和注意事项。
安装依赖
在编译 PyTorch 之前,需要确保系统已经安装了以下依赖:

- GCC 编译器
- Python 解释器
- CMake
- OpenBLAS 或 MKL
- CUDA Toolkit(如果需要支持 CUDA)
以下是在 CentOS 上安装这些依赖的命令:
sudo yum install -y gcc python3 python3-pip cmake openblas-dev sudo yum install -y python3-dev python3-pip sudo pip3 install numpy sudo yum install -y cuda-toolkit
下载 PyTorch 源码
从 PyTorch 官方网站下载源码:

git clone --recursive https://github.com/pytorch/pytorch.git cd pytorch
配置编译选项
进入 PyTorch 源码目录后,配置编译选项,以下是一个示例配置:
# 配置 Python 解释器路径
export PYTHON_VERSION=3.8
export PYTHON_EXECUTABLE=$(which python3.8)
export PYTHON_INCLUDE_DIR=$(python3.8-config --include)
export PYTHON_LIBRARY=$(python3.8-config --library)
# 配置 CUDA Toolkit 版本
export CUDA_VERSION=11.0
export CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
# 配置其他选项
export USE_OPENMP=1
export USE_MKL=0
export USE_CUDNN=1
export USE_CUDA=1
export USE_CXX11_ABI=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBLAS=1
export USE_CXX11_ABI=1
export USE_CUDA=1
export USE_DISTRIBUTED=0
export USE_NCCL=0
export USE_NNPACK=0
export USE_QNNPACK=0
export USE_X86_AVX=1
export USE_X86_FMA=1
export USE_X86_SSE2=1
export USE_X86_SSE3=1
export USE_X86_SSE41=1
export USE_X86_SSE42=1
export USE_X86_AVX2=1
export USE_X86_FMA4=1
export USE_AMP=0
export USE_FBGEMM=0
export USE_NUPHARMO=0
export USE_REVERSE_MODE_JIT=0
export USE_CUDA_RNN=0
export USE_CUDNN=1
export USE_Numba=0
export USE_Numba_CUDA=0
export USE_TENSORFLOW=0
export USE_CUBL 