Enable win-arm64
This patch enables Pytorch build from source with Ninja and
'Visual Studio 16 2019' CMake generator on Windows on Arm.
Tests:
- Build from source: 'python setup.py develop'.
- Run simple Pytorch example: passed
- python test\test_torch.py:
-- same results as on x64
-- Ran 1344 tests, failures=2
Pull Request resolved: https://github.com/pytorch/pytorch/pull/72424
diff --git a/c10/util/Bitset.h b/c10/util/Bitset.h
index bed04a4..4143ae5 100644
--- a/c10/util/Bitset.h
+++ b/c10/util/Bitset.h
@@ -76,7 +76,7 @@
// (i.e. if the very first bit is set, this function returns '1'), and a
// return of '0' means that there was no bit set.
size_t find_first_set() const {
-#if defined(_MSC_VER) && defined(_M_X64)
+#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64))
unsigned long result;
bool has_bits_set = (0 != _BitScanForward64(&result, bitset_));
if (!has_bits_set) {
diff --git a/caffe2/serialize/crc_alt.h b/caffe2/serialize/crc_alt.h
index 108d998..3299327 100644
--- a/caffe2/serialize/crc_alt.h
+++ b/caffe2/serialize/crc_alt.h
@@ -101,8 +101,11 @@
// Windows always little endian
#define __BYTE_ORDER __LITTLE_ENDIAN
+ #if !defined(_M_ARM64)
// intrinsics / prefetching
#include <xmmintrin.h>
+ #endif
+
#ifdef __MINGW32__
#define PREFETCH(location) __builtin_prefetch(location)
#else
diff --git a/tools/build_pytorch_libs.py b/tools/build_pytorch_libs.py
index d795770..60a2e3c 100644
--- a/tools/build_pytorch_libs.py
+++ b/tools/build_pytorch_libs.py
@@ -1,4 +1,5 @@
import os
+import platform
from glob import glob
import shutil
from typing import Dict, Optional
@@ -10,6 +11,22 @@
def _overlay_windows_vcvars(env: Dict[str, str]) -> Dict[str, str]:
vc_arch = 'x64' if IS_64BIT else 'x86'
+
+ if platform.machine() == 'ARM64':
+ vc_arch = 'x64_arm64'
+
+ # First Win11 Windows on Arm build version that supports x64 emulation
+ # is 10.0.22000.
+ win11_1st_version = (10, 0, 22000)
+ current_win_version = tuple(int(version_part) for version_part in
+ platform.version().split('.'))
+ if current_win_version < win11_1st_version:
+ vc_arch = 'x86_arm64'
+ print("Warning: 32-bit toolchain will be used, but 64-bit linker "
+ "is recommended to avoid out-of-memory linker error!")
+ print("Warning: Please consider upgrading to Win11, where x64 "
+ "emulation is enabled!")
+
vc_env: Dict[str, str] = distutils._msvccompiler._get_vc_env(vc_arch)
# Keys in `_get_vc_env` are always lowercase.
# We turn them into uppercase before overlaying vcvars
diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py
index ff17577..a22c58e 100644
--- a/tools/setup_helpers/cmake.py
+++ b/tools/setup_helpers/cmake.py
@@ -4,6 +4,7 @@
import multiprocessing
import os
+import platform
import re
from subprocess import check_call, check_output, CalledProcessError
import sys
@@ -220,8 +221,11 @@
'in the build steps carefully.')
sys.exit(1)
if IS_64BIT:
- args.append('-Ax64')
- toolset_dict['host'] = 'x64'
+ if platform.machine() == 'ARM64':
+ args.append('-A ARM64')
+ else:
+ args.append('-Ax64')
+ toolset_dict['host'] = 'x64'
if toolset_dict:
toolset_expr = ','.join(["{}={}".format(k, v) for k, v in toolset_dict.items()])
args.append('-T' + toolset_expr)
diff --git a/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp b/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
index 68fee99..013a8e8 100644
--- a/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
+++ b/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
@@ -230,7 +230,7 @@
// understand for AVX512. When we need better CPU performance this
// optimization can be re-enabled by tracking down the platforms where
// this error occurs and only selectively disabling it.
-#ifdef _MSC_VER
+#if (defined(_MSC_VER) && !defined(_M_ARM64))
// According to https://stackoverflow.com/a/29178079, we are able to
// detect which arch level is supported by the vectorizer using
// the macro __isa_available. It is added during runtime.