mirror of
https://github.com/gentoo-mirror/gentoo.git
synced 2026-01-06 02:17:34 -08:00
dev-cpp/eigen: properly guard VSX use
Fixes build on VSX-less ppc* hardware. Closes: https://bugs.gentoo.org/936107 Thanks-to: jonys <vidra.jonas@seznam.cz> Signed-off-by: Sam James <sam@gentoo.org>
This commit is contained in:
parent
2fb52613e7
commit
c7a9b7d8cd
458
dev-cpp/eigen/eigen-3.4.0-r3.ebuild
Normal file
458
dev-cpp/eigen/eigen-3.4.0-r3.ebuild
Normal file
@ -0,0 +1,458 @@
|
||||
# Copyright 1999-2024 Gentoo Authors
|
||||
# Distributed under the terms of the GNU General Public License v2
|
||||
|
||||
EAPI=8
|
||||
|
||||
FORTRAN_NEEDED="test"
|
||||
inherit cmake cuda fortran-2 llvm toolchain-funcs
|
||||
|
||||
DESCRIPTION="C++ template library for linear algebra"
|
||||
HOMEPAGE="https://eigen.tuxfamily.org/index.php?title=Main_Page"
|
||||
|
||||
if [[ ${PV} = *9999* ]] ; then
|
||||
inherit git-r3
|
||||
EGIT_REPO_URI="https://gitlab.com/lib${PN}/${PN}.git"
|
||||
if [[ ${PV} = 3.4.9999* ]] ; then
|
||||
EGIT_COMMIT="3.4"
|
||||
fi
|
||||
else
|
||||
SRC_URI="
|
||||
https://gitlab.com/lib${PN}/${PN}/-/archive/${PV}/${P}.tar.bz2
|
||||
test? ( lapack? ( https://downloads.tuxfamily.org/${PN}/lapack_addons_3.4.1.tgz -> ${PN}-lapack_addons-3.4.1.tgz ) )
|
||||
"
|
||||
KEYWORDS="~alpha ~amd64 ~arm arm64 ~hppa ~ia64 ~loong ~ppc ~ppc64 ~riscv ~s390 sparc ~x86 ~amd64-linux ~x86-linux ~arm64-macos ~x64-macos"
|
||||
fi
|
||||
|
||||
LICENSE="MPL-2.0"
|
||||
SLOT="3"
|
||||
|
||||
# The following lines are shamelessly stolen from ffmpeg-9999.ebuild with modifications
|
||||
ARM_CPU_FEATURES=(
|
||||
neon:NEON
|
||||
)
|
||||
PPC_CPU_FEATURES=(
|
||||
altivec:ALTIVEC
|
||||
vsx:VSX
|
||||
)
|
||||
X86_CPU_FEATURES=(
|
||||
avx:AVX
|
||||
avx2:AVX2
|
||||
avx512f:AVX512
|
||||
avx512dq:AVX512DQ
|
||||
f16c:FP16C
|
||||
fma3:FMA
|
||||
popcnt:POPCNT
|
||||
sse:SSE
|
||||
sse2:SSE2
|
||||
sse3:SSE3
|
||||
ssse3:SSSE3
|
||||
sse4_1:SSE4_1
|
||||
sse4_2:SSE4_2
|
||||
)
|
||||
# MIPS_CPU_FEATURES=(
|
||||
# msa:MSA
|
||||
# )
|
||||
# S390_CPU_FEATURES=(
|
||||
# z13:Z13
|
||||
# z14:Z14
|
||||
# )
|
||||
|
||||
CPU_FEATURES_MAP=(
|
||||
"${ARM_CPU_FEATURES[@]/#/cpu_flags_arm_}"
|
||||
"${PPC_CPU_FEATURES[@]/#/cpu_flags_ppc_}"
|
||||
"${X86_CPU_FEATURES[@]/#/cpu_flags_x86_}"
|
||||
# "${MIPS_CPU_FEATURES[@]/#/cpu_flags_mips_}"
|
||||
# "${S390_CPU_FEATURES[@]/#/cpu_flags_s390_}"
|
||||
)
|
||||
|
||||
IUSE_TEST_BACKENDS=(
|
||||
"adolc"
|
||||
"boost"
|
||||
"cholmod"
|
||||
"fftw"
|
||||
"klu"
|
||||
"opengl"
|
||||
"openmp"
|
||||
"pastix"
|
||||
"sparsehash"
|
||||
"spqr"
|
||||
"superlu"
|
||||
"umfpack"
|
||||
)
|
||||
|
||||
IUSE="${CPU_FEATURES_MAP[*]%:*} clang cuda hip debug doc lapack mathjax test ${IUSE_TEST_BACKENDS[*]}" #zvector
|
||||
|
||||
# Tests failing again because of compiler issues
|
||||
RESTRICT="!test? ( test )"
|
||||
|
||||
BDEPEND="
|
||||
doc? (
|
||||
app-text/doxygen[dot]
|
||||
dev-texlive/texlive-bibtexextra
|
||||
dev-texlive/texlive-fontsextra
|
||||
dev-texlive/texlive-fontutils
|
||||
dev-texlive/texlive-latex
|
||||
dev-texlive/texlive-latexextra
|
||||
mathjax? ( dev-libs/mathjax )
|
||||
)
|
||||
test? ( virtual/pkgconfig )
|
||||
"
|
||||
|
||||
# METIS
|
||||
# MPREAL
|
||||
# dev-libs/mpfr:0
|
||||
# dev-libs/gmp:0
|
||||
|
||||
TEST_BACKENDS="
|
||||
boost? ( dev-libs/boost )
|
||||
adolc? ( sci-libs/adolc[sparse] )
|
||||
cholmod? ( sci-libs/cholmod:= )
|
||||
fftw? ( sci-libs/fftw )
|
||||
spqr? ( sci-libs/spqr )
|
||||
klu? ( sci-libs/klu )
|
||||
opengl? (
|
||||
media-libs/freeglut
|
||||
media-libs/glew
|
||||
media-libs/libglvnd
|
||||
)
|
||||
pastix? ( sci-libs/pastix[-mpi] )
|
||||
sparsehash? (
|
||||
amd64? ( dev-cpp/sparsehash )
|
||||
arm64? ( dev-cpp/sparsehash )
|
||||
ppc64? ( dev-cpp/sparsehash )
|
||||
x86? ( dev-cpp/sparsehash )
|
||||
)
|
||||
superlu? ( sci-libs/superlu )
|
||||
umfpack? ( sci-libs/umfpack )
|
||||
"
|
||||
DEPEND="
|
||||
test? (
|
||||
cuda? (
|
||||
!clang? (
|
||||
dev-util/nvidia-cuda-toolkit
|
||||
)
|
||||
clang? (
|
||||
sys-devel/clang[llvm_targets_NVPTX]
|
||||
openmp? ( sys-libs/libomp[llvm_targets_NVPTX,offload] )
|
||||
)
|
||||
)
|
||||
hip? ( dev-util/hip )
|
||||
lapack? ( virtual/lapacke )
|
||||
${TEST_BACKENDS}
|
||||
)
|
||||
"
|
||||
|
||||
REQUIRED_USE="
|
||||
test? ( !lapack )
|
||||
|| ( ${IUSE_TEST_BACKENDS[*]} )
|
||||
"
|
||||
|
||||
PATCHES=(
|
||||
"${FILESDIR}/${PN}-3.3.9-max-macro.patch"
|
||||
"${FILESDIR}/${PN}-3.4.0-doc-nocompress.patch" # bug 830064
|
||||
"${FILESDIR}/${PN}-3.4.0-buildstring.patch"
|
||||
"${FILESDIR}/${PN}-3.4.0-noansi.patch"
|
||||
"${FILESDIR}/${PN}-3.4.0-cxxstandard.patch"
|
||||
"${FILESDIR}/${PN}-3.4.0-ppc-no-vsx.patch" # bug 936107
|
||||
)
|
||||
|
||||
# TODO should be in cuda.eclass
|
||||
cuda_set_CUDAHOSTCXX() {
|
||||
local compiler
|
||||
tc-is-gcc && compiler="gcc"
|
||||
tc-is-clang && compiler="clang"
|
||||
[[ -z "$compiler" ]] && die "no compiler specified"
|
||||
|
||||
local package="sys-devel/${compiler}"
|
||||
local version="${package}"
|
||||
local CUDAHOSTCXX_test
|
||||
while
|
||||
CUDAHOSTCXX="${CUDAHOSTCXX_test}"
|
||||
version=$(best_version "${version}")
|
||||
if [[ -z "${version}" ]]; then
|
||||
if [[ -z "${CUDAHOSTCXX}" ]]; then
|
||||
die "could not find supported version of ${package}"
|
||||
fi
|
||||
break
|
||||
fi
|
||||
CUDAHOSTCXX_test="$(
|
||||
dirname "$(
|
||||
realpath "$(
|
||||
which "${compiler}-$(echo "${version}" | grep -oP "(?<=${package}-)[0-9]*")"
|
||||
)"
|
||||
)"
|
||||
)"
|
||||
version="<${version}"
|
||||
do ! echo "int main(){}" | nvcc "-ccbin ${CUDAHOSTCXX_test}" - -x cu &>/dev/null; done
|
||||
|
||||
export CUDAHOSTCXX
|
||||
}
|
||||
|
||||
pkg_setup() {
|
||||
use test && use cuda && use clang && llvm_pkg_setup
|
||||
}
|
||||
|
||||
src_unpack() {
|
||||
if [[ ${PV} = *9999* ]] ; then
|
||||
git-r3_src_unpack
|
||||
else
|
||||
unpack "${P}.tar.bz2"
|
||||
|
||||
if use test && use lapack; then
|
||||
cd "${S}/lapack" || die
|
||||
unpack "${PN}-lapack_addons-3.4.1.tgz"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
src_prepare() {
|
||||
cmake_src_prepare
|
||||
|
||||
sed \
|
||||
-e "/add_subdirectory(bench\/spbench/s/^/#DONOTCOMPILE /g" \
|
||||
-e "/add_subdirectory(demos/s/^/#DONOTCOMPILE /g" \
|
||||
-i CMakeLists.txt || die
|
||||
|
||||
if ! use test; then
|
||||
sed \
|
||||
-e "/add_subdirectory(test/s/^/#DONOTCOMPILE /g" \
|
||||
-e "/add_subdirectory(scripts/s/^/#DONOTCOMPILE /g" \
|
||||
-e "/add_subdirectory(failtest/s/^/#DONOTCOMPILE /g" \
|
||||
-e "/add_subdirectory(blas/s/^/#DONOTCOMPILE /g" \
|
||||
-e "/add_subdirectory(lapack/s/^/#DONOTCOMPILE /g" \
|
||||
-i CMakeLists.txt || die
|
||||
fi
|
||||
}
|
||||
|
||||
src_configure() {
|
||||
local mycmakeargs=(
|
||||
-DBUILD_SHARED_LIBS="yes"
|
||||
-DBUILD_TESTING="$(usex test)"
|
||||
|
||||
-DEIGEN_BUILD_DOC="$(usex doc)" # Enable creation of Eigen documentation
|
||||
-DEIGEN_BUILD_PKGCONFIG="yes" # Build pkg-config .pc file for Eigen
|
||||
)
|
||||
if use doc || use test; then
|
||||
mycmakeargs+=(
|
||||
# needs Qt4
|
||||
-DEIGEN_TEST_NOQT="yes" # Disable Qt support in unit tests
|
||||
)
|
||||
fi
|
||||
|
||||
if use doc; then
|
||||
mycmakeargs+=(
|
||||
-DEIGEN_DOC_USE_MATHJAX="$(usex mathjax)" # Use MathJax for rendering math in HTML docs
|
||||
-DEIGEN_INTERNAL_DOCUMENTATION=no # Build internal documentation
|
||||
)
|
||||
fi
|
||||
|
||||
if use test; then
|
||||
mycmakeargs+=(
|
||||
# the OpenGL testsuite is extremely brittle, bug #712808
|
||||
-DOpenGL_GL_PREFERENCE="GLVND"
|
||||
-DEIGEN_TEST_OPENGL="$(usex opengl)" # Enable OpenGL support in unit tests
|
||||
-DEIGEN_TEST_OPENMP="$(usex openmp)" # Enable/Disable OpenMP in tests/examples
|
||||
|
||||
-DCMAKE_DISABLE_FIND_PACKAGE_MPREAL=ON
|
||||
|
||||
-DEIGEN_TEST_CXX11=yes
|
||||
|
||||
# -DEIGEN_TEST_CUSTOM_CXX_FLAGS= # Additional compiler flags when compiling unit tests.
|
||||
# -DEIGEN_TEST_CUSTOM_LINKER_FLAGS= # Additional linker flags when linking unit tests.
|
||||
# -DEIGEN_TEST_BUILD_FLAGS= # Options passed to the build command of unit tests
|
||||
|
||||
# -DEIGEN_BUILD_BTL=yes # Build benchmark suite
|
||||
|
||||
-DEIGEN_TEST_BUILD_DOCUMENTATION="$(usex doc)" # Test building the doxygen documentation
|
||||
|
||||
# -DEIGEN_COVERAGE_TESTING=no # Enable/disable gcov
|
||||
# -DEIGEN_CTEST_ERROR_EXCEPTION= # Regular expression for build error messages to be filtered out
|
||||
# -DEIGEN_DEBUG_ASSERTS=no # Enable advanced debugging of assertions
|
||||
# -DEIGEN_NO_ASSERTION_CHECKING=no # Disable checking of assertions using exceptions
|
||||
# -DEIGEN_TEST_NO_EXCEPTIONS=no # Disables C++ exceptions
|
||||
# -DEIGEN_TEST_NO_EXPLICIT_ALIGNMENT=no # Disable explicit alignment (hence vectorization) in tests/examples
|
||||
# -DEIGEN_TEST_NO_EXPLICIT_VECTORIZATION=no # Disable explicit vectorization in tests/examples
|
||||
|
||||
# -DEIGEN_DASHBOARD_BUILD_TARGET=buildtests # Target to be built in dashboard mode, default is buildtests
|
||||
|
||||
# -DEIGEN_DEFAULT_TO_ROW_MAJOR=no # Use row-major as default matrix storage order
|
||||
|
||||
# -DEIGEN_TEST_MATRIX_DIR=yes # Enable testing of realword sparse matrices contained in the specified path
|
||||
# -DEIGEN_TEST_MAX_SIZE=320 # Maximal matrix/vector size, default is 320
|
||||
# -DEIGEN_SPLIT_LARGE_TESTS=no # Split large tests into smaller executables
|
||||
)
|
||||
|
||||
use !adolc && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_Adolc="TRUE" )
|
||||
use !boost && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_Boost="TRUE" )
|
||||
use !cholmod && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_CHOLMOD="TRUE" )
|
||||
use !fftw && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_FFTW="TRUE" )
|
||||
use !sparsehash && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_GoogleHash="TRUE" )
|
||||
use !klu && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_KLU="TRUE" )
|
||||
use !opengl && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_OpenGL="TRUE" )
|
||||
use !openmp && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_OpenMP="TRUE" )
|
||||
use !pastix && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_PASTIX="TRUE" )
|
||||
use !spqr && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_SPQR="TRUE" )
|
||||
use !superlu && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_SuperLU="TRUE" )
|
||||
use !umfpack && mycmakeargs+=( -DCMAKE_DISABLE_FIND_PACKAGE_UMFPACK="TRUE" )
|
||||
|
||||
if use lapack; then
|
||||
mycmakeargs+=(
|
||||
-DEIGEN_ENABLE_LAPACK_TESTS=yes
|
||||
-DEIGEN_TEST_EXTERNAL_BLAS=yes # Use external BLAS library for testsuite
|
||||
-DCMAKE_DISABLE_FIND_PACKAGE_SuperLU=ON
|
||||
)
|
||||
fi
|
||||
|
||||
if use arm; then
|
||||
mycmakeargs+=(
|
||||
-DEIGEN_TEST_NEON="$(usex cpu_flags_arm_neon)"
|
||||
)
|
||||
fi
|
||||
|
||||
if use arm64; then
|
||||
mycmakeargs+=(
|
||||
-DEIGEN_TEST_NEON64="$(usex cpu_flags_arm_neon)"
|
||||
)
|
||||
fi
|
||||
|
||||
if use ppc || use ppc64; then
|
||||
mycmakeargs+=(
|
||||
-DEIGEN_TEST_ALTIVEC="$(usex cpu_flags_ppc_altivec)"
|
||||
-DEIGEN_TEST_VSX="$(usex cpu_flags_ppc_vsx)"
|
||||
)
|
||||
fi
|
||||
|
||||
if use amd64 || use x86; then
|
||||
mycmakeargs+=(
|
||||
# -DEIGEN_TEST_32BIT=no # Force generating 32bit code.
|
||||
# -DEIGEN_TEST_X87=no # Force using X87 instructions. Implies no vectorization.
|
||||
-DEIGEN_TEST_SSE2="$(usex cpu_flags_x86_sse2)"
|
||||
-DEIGEN_TEST_SSE3="$(usex cpu_flags_x86_sse3)"
|
||||
-DEIGEN_TEST_SSSE3="$(usex cpu_flags_x86_ssse3)"
|
||||
-DEIGEN_TEST_FMA="$(usex cpu_flags_x86_fma3)"
|
||||
-DEIGEN_TEST_SSE4_1="$(usex cpu_flags_x86_sse4_1)"
|
||||
-DEIGEN_TEST_SSE4_2="$(usex cpu_flags_x86_sse4_2)"
|
||||
-DEIGEN_TEST_AVX="$(usex cpu_flags_x86_avx)"
|
||||
-DEIGEN_TEST_F16C="$(usex cpu_flags_x86_f16c)"
|
||||
-DEIGEN_TEST_AVX2="$(usex cpu_flags_x86_avx2)"
|
||||
-DEIGEN_TEST_AVX512="$(usex cpu_flags_x86_avx512f)"
|
||||
-DEIGEN_TEST_AVX512DQ="$(usex cpu_flags_x86_avx512dq)"
|
||||
)
|
||||
fi
|
||||
|
||||
if use mips; then
|
||||
mycmakeargs+=(
|
||||
# -DEIGEN_TEST_MSA=no # Enable/Disable MSA in tests/examples
|
||||
)
|
||||
fi
|
||||
|
||||
if use s390; then
|
||||
mycmakeargs+=(
|
||||
# -DEIGEN_TEST_Z13=no # Enable/Disable S390X(zEC13) ZVECTOR in tests/examples
|
||||
# -DEIGEN_TEST_Z14=no # Enable/Disable S390X(zEC14) ZVECTOR in tests/examples
|
||||
)
|
||||
fi
|
||||
|
||||
mycmakeargs+=(
|
||||
-DEIGEN_TEST_CUDA="$(usex cuda)" # Enable CUDA support in unit tests
|
||||
-DEIGEN_TEST_CUDA_CLANG="$(usex cuda "$(usex clang)")" # Use clang instead of nvcc to compile the CUDA tests
|
||||
|
||||
-DEIGEN_TEST_HIP="$(usex hip)" # Add HIP support.
|
||||
|
||||
# -DEIGEN_TEST_SYCL=no # Add Sycl support.
|
||||
# -DEIGEN_SYCL_TRISYCL=no # Use the triSYCL Sycl implementation (ComputeCPP by default).
|
||||
)
|
||||
|
||||
if use cuda; then
|
||||
cuda_add_sandbox -w
|
||||
if use clang; then
|
||||
local llvm_prefix
|
||||
llvm_prefix="$(get_llvm_prefix -b)"
|
||||
export CC="${llvm_prefix}/bin/clang"
|
||||
export CXX="${llvm_prefix}/bin/clang++"
|
||||
export LIBRARY_PATH="${ESYSROOT}/usr/$(get_libdir)"
|
||||
else
|
||||
cuda_set_CUDAHOSTCXX
|
||||
mycmakeargs+=(
|
||||
-DCUDA_HOST_COMPILER="${CUDAHOSTCXX}"
|
||||
)
|
||||
fi
|
||||
if [[ "${CUDA_VERBOSE}" == true ]]; then
|
||||
mycmakeargs+=(
|
||||
-DCUDA_VERBOSE_BUILD=yes
|
||||
)
|
||||
NVCCFLAGS+=" -v"
|
||||
fi
|
||||
|
||||
export CUDAFLAGS="${NVCCFLAGS}"
|
||||
|
||||
[[ -z "${CUDAARCHS}" ]] && einfo "trying to determine host CUDAARCHS"
|
||||
: "${CUDAARCHS:=$(__nvcc_device_query)}"
|
||||
export CUDAARCHS
|
||||
|
||||
mycmakeargs+=(
|
||||
-DEIGEN_CUDA_COMPUTE_ARCH="${CUDAARCHS}"
|
||||
)
|
||||
fi
|
||||
fi
|
||||
|
||||
cmake_src_configure
|
||||
}
|
||||
|
||||
src_compile() {
|
||||
local targets=()
|
||||
if use doc; then
|
||||
targets+=( doc )
|
||||
HTML_DOCS=( "${BUILD_DIR}"/doc/html/. )
|
||||
fi
|
||||
if use test; then
|
||||
targets+=( buildtests )
|
||||
if ! use lapack; then
|
||||
targets+=( blas )
|
||||
fi
|
||||
# tests generate random data, which
|
||||
# obviously fails for some seeds
|
||||
export EIGEN_SEED=712808
|
||||
fi
|
||||
|
||||
if use doc || use test; then
|
||||
cmake_src_compile "${targets[@]}"
|
||||
fi
|
||||
}
|
||||
|
||||
src_test() {
|
||||
CMAKE_SKIP_TESTS=(
|
||||
product_small_32 # 143 (Subprocess aborted)
|
||||
product_small_33 # 144 (Subprocess aborted)
|
||||
|
||||
eigensolver_selfadjoint_13 # 452 (Subprocess aborted)
|
||||
|
||||
cholmod_support_21 # 726 (Subprocess aborted)
|
||||
cholmod_support_22 # 727 (Subprocess aborted)
|
||||
|
||||
NonLinearOptimization # 930 (Subprocess aborted)
|
||||
openglsupport # 990 (Failed)
|
||||
levenberg_marquardt # 1020 (Subprocess aborted)
|
||||
)
|
||||
|
||||
if use cuda ; then
|
||||
cuda_add_sandbox -w
|
||||
|
||||
CMAKE_SKIP_TESTS+=(
|
||||
cxx11_tensor_cast_float16_gpu
|
||||
cxx11_tensor_gpu_5
|
||||
)
|
||||
fi
|
||||
|
||||
if use lapack ; then
|
||||
CMAKE_SKIP_TESTS+=(
|
||||
"^LAPACK-.*$"
|
||||
)
|
||||
fi
|
||||
|
||||
local myctestargs=(
|
||||
-j1 # otherwise breaks due to cmake reruns
|
||||
)
|
||||
|
||||
cmake_src_test
|
||||
}
|
||||
195
dev-cpp/eigen/files/eigen-3.4.0-ppc-no-vsx.patch
Normal file
195
dev-cpp/eigen/files/eigen-3.4.0-ppc-no-vsx.patch
Normal file
@ -0,0 +1,195 @@
|
||||
https://gitlab.com/libeigen/eigen/-/merge_requests/1028
|
||||
https://bugs.gentoo.org/936107
|
||||
--- a/Eigen/Core
|
||||
+++ b/Eigen/Core
|
||||
@@ -346,7 +346,7 @@
|
||||
#include "src/Core/CoreIterators.h"
|
||||
#include "src/Core/ConditionEstimator.h"
|
||||
|
||||
-#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX)
|
||||
+#if defined(EIGEN_VECTORIZE_VSX)
|
||||
#include "src/Core/arch/AltiVec/MatrixProduct.h"
|
||||
#elif defined EIGEN_VECTORIZE_NEON
|
||||
#include "src/Core/arch/NEON/GeneralBlockPanelKernel.h"
|
||||
--- a/Eigen/src/Core/arch/AltiVec/Complex.h
|
||||
+++ b/Eigen/src/Core/arch/AltiVec/Complex.h
|
||||
@@ -100,6 +100,7 @@
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
+ HasSqrt = 1,
|
||||
#ifdef __VSX__
|
||||
HasBlend = 1,
|
||||
#endif
|
||||
@@ -320,6 +321,7 @@
|
||||
HasAbs2 = 0,
|
||||
HasMin = 0,
|
||||
HasMax = 0,
|
||||
+ HasSqrt = 1,
|
||||
HasSetLinear = 0
|
||||
};
|
||||
};
|
||||
--- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h
|
||||
+++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h
|
||||
@@ -40,16 +40,14 @@
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
+#ifdef __VSX__
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return vec_rsqrt(x);
|
||||
}
|
||||
-#endif
|
||||
|
||||
-#ifdef __VSX__
|
||||
-#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
@@ -74,6 +72,26 @@
|
||||
{
|
||||
return pexp_double(_x);
|
||||
}
|
||||
+
|
||||
+template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
|
||||
+ BF16_TO_F32_UNARY_OP_WRAPPER(psqrt<Packet4f>, a);
|
||||
+}
|
||||
+
|
||||
+#ifndef EIGEN_COMP_CLANG
|
||||
+template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
|
||||
+ BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
|
||||
+}
|
||||
+#endif
|
||||
+#else
|
||||
+template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
+Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
+{
|
||||
+ Packet4f a;
|
||||
+ for (Index i = 0; i < packet_traits<float>::size; i++) {
|
||||
+ a[i] = numext::sqrt(x[i]);
|
||||
+ }
|
||||
+ return a;
|
||||
+}
|
||||
#endif
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
|
||||
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
|
||||
@@ -175,16 +175,19 @@
|
||||
#else
|
||||
HasRsqrt = 0,
|
||||
#endif
|
||||
+ HasTanh = EIGEN_FAST_MATH,
|
||||
+ HasErf = EIGEN_FAST_MATH,
|
||||
+ HasRint = 1,
|
||||
#else
|
||||
HasSqrt = 0,
|
||||
HasRsqrt = 0,
|
||||
- HasTanh = EIGEN_FAST_MATH,
|
||||
- HasErf = EIGEN_FAST_MATH,
|
||||
+ HasTanh = 0,
|
||||
+ HasErf = 0,
|
||||
+ HasRint = 0,
|
||||
#endif
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
- HasRint = 1,
|
||||
HasNegate = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
@@ -217,16 +220,17 @@
|
||||
#else
|
||||
HasRsqrt = 0,
|
||||
#endif
|
||||
+ HasRint = 1,
|
||||
#else
|
||||
HasSqrt = 0,
|
||||
HasRsqrt = 0,
|
||||
- HasTanh = EIGEN_FAST_MATH,
|
||||
- HasErf = EIGEN_FAST_MATH,
|
||||
+ HasRint = 0,
|
||||
#endif
|
||||
+ HasTanh = 0,
|
||||
+ HasErf = 0,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
- HasRint = 1,
|
||||
HasNegate = 1,
|
||||
HasBlend = 1
|
||||
};
|
||||
@@ -872,19 +876,29 @@
|
||||
return vec_nor(c,c);
|
||||
}
|
||||
|
||||
+#ifdef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmple(a,b)); }
|
||||
+#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmplt(a,b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast<Packet4i>(vec_cmpeq(a,b)); }
|
||||
+#ifdef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE Packet8s pcmp_le(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmple(a,b)); }
|
||||
+#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet8s pcmp_lt(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmplt(a,b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8s pcmp_eq(const Packet8s& a, const Packet8s& b) { return reinterpret_cast<Packet8s>(vec_cmpeq(a,b)); }
|
||||
+#ifdef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE Packet8us pcmp_le(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmple(a,b)); }
|
||||
+#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet8us pcmp_lt(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmplt(a,b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet8us pcmp_eq(const Packet8us& a, const Packet8us& b) { return reinterpret_cast<Packet8us>(vec_cmpeq(a,b)); }
|
||||
+#ifdef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE Packet16c pcmp_le(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmple(a,b)); }
|
||||
+#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet16c pcmp_lt(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmplt(a,b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet16c pcmp_eq(const Packet16c& a, const Packet16c& b) { return reinterpret_cast<Packet16c>(vec_cmpeq(a,b)); }
|
||||
+#ifdef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE Packet16uc pcmp_le(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmple(a,b)); }
|
||||
+#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet16uc pcmp_lt(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmplt(a,b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet16uc pcmp_eq(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast<Packet16uc>(vec_cmpeq(a,b)); }
|
||||
|
||||
@@ -937,6 +951,7 @@
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
|
||||
+#ifdef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)
|
||||
{
|
||||
Packet4f res;
|
||||
@@ -947,6 +962,7 @@
|
||||
|
||||
return res;
|
||||
}
|
||||
+#endif
|
||||
|
||||
template<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from)
|
||||
{
|
||||
@@ -1341,12 +1357,6 @@
|
||||
BF16_TO_F32_BINARY_OP_WRAPPER(psub<Packet4f>, a, b);
|
||||
}
|
||||
|
||||
-template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
|
||||
- BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);
|
||||
-}
|
||||
-template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
|
||||
- BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
|
||||
-}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
|
||||
}
|
||||
@@ -1390,9 +1400,11 @@
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pround<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(pround<Packet4f>, a);
|
||||
}
|
||||
+#ifdef __VSX__
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf print<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(print<Packet4f>, a);
|
||||
}
|
||||
+#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pmadd(const Packet8bf& a, const Packet8bf& b, const Packet8bf& c) {
|
||||
Packet4f a_even = Bf16ToF32Even(a);
|
||||
Packet4f a_odd = Bf16ToF32Odd(a);
|
||||
Loading…
x
Reference in New Issue
Block a user