diff --git a/dev-libs/cutlass/Manifest b/dev-libs/cutlass/Manifest index 2488122271e3..e8812553bf07 100644 --- a/dev-libs/cutlass/Manifest +++ b/dev-libs/cutlass/Manifest @@ -1,3 +1,4 @@ DIST cutlass-3.8.0.tar.gz 31021072 BLAKE2B 4dd85f7c0d3452c2a194902fcd0afd7de3a3f17f86f477628d5e5f416ac885a86ed1fbbf2a9959a46e60e38a93400a7ec99bad1f980b0a4be36fad0de887ec0b SHA512 a08aac281fb3bdea82c0a044dc643c40e4803d02e55bbea450021cb7a5472aed86e79c5df41cd981976af8403f18cc48d8069045c4e68339430d3a3caeb109ac DIST cutlass-3.9.2.tar.gz 31534258 BLAKE2B 04462b3c6983f96b2027821408c4de30bf6b2e18e986ddebaf4f9d5572df354273116603ccc0ac618c61e03b981972e6d7786f354aa4f5e08d185cf7e4ad8e1d SHA512 d45a9e4908b5886259acc1ffd4c8e4c6072801ad45909f365d599510b9989d3313438f2fa5cbee5c1e916e496a0b95bda85f79de3c38502d73e2b9206f868822 DIST cutlass-4.1.0.tar.gz 33083022 BLAKE2B 0a30c28ab7539481a47b2a667c585eaa763ebafa15463cf50a8c57300e8dccd31d1790d00ae091e0d317fe57bb48955a3309de48cebb2529a850099ea4acc1f7 SHA512 a8c2cdf772ea3b1a35bfc948ca70240477d6e8ee004ae9e487275a7b35e40424b2820396cbc827482ddb75172fcdf56372ea0d4d96ae6f3253369bd315de3ce6 +DIST cutlass-4.2.1.tar.gz 33331894 BLAKE2B b7691615acc0743f0bab6b916aece5bfafad4af40a0f6e7b736821c1689d021945e2fed2b280032324fe5902700f8bab1b4a5251842d709cba9aeb632305abf3 SHA512 9afeda7aafdf7541f1ad678f38d3695877d6f8b1f4bd24458f93b369418cee60b4dec27d89b2135dd39326cccab40187d7365d1066468cef96d3188242feda55 diff --git a/dev-libs/cutlass/cutlass-4.2.1.ebuild b/dev-libs/cutlass/cutlass-4.2.1.ebuild new file mode 100644 index 000000000000..617ccdfe2fbf --- /dev/null +++ b/dev-libs/cutlass/cutlass-4.2.1.ebuild @@ -0,0 +1,161 @@ +# Copyright 2023-2025 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +PYTHON_COMPAT=( python3_{11..14} ) + +inherit cuda cmake python-any-r1 flag-o-matic toolchain-funcs + +DESCRIPTION="CUDA Templates for Linear Algebra Subroutines" +HOMEPAGE="https://github.com/NVIDIA/cutlass" + +if [[ "${PV}" == *9999* ]]; then + inherit git-r3 + EGIT_REPO_URI="https://github.com/NVIDIA/${PN}" +else + SRC_URI=" + https://github.com/NVIDIA/${PN}/archive/refs/tags/v${PV}.tar.gz -> ${P}.tar.gz + " + KEYWORDS="~amd64" +fi + +LICENSE="BSD" +SLOT="0" + +X86_CPU_FEATURES=( + f16c:f16c +) +CPU_FEATURES=( "${X86_CPU_FEATURES[@]/#/cpu_flags_x86_}" ) + +IUSE="clang-cuda cublas cudnn doc dot examples +headers-only jumbo-build performance profiler test tools ${CPU_FEATURES[*]%:*}" + +REQUIRED_USE=" + headers-only? ( + !examples + !profiler + !test + ) + test? ( + tools + ) +" + +RESTRICT="!test? ( test )" + +RDEPEND=" + dev-util/nvidia-cuda-toolkit:= +" +DEPEND="${RDEPEND} + test? ( + ${PYTHON_DEPS} + dev-cpp/gtest + cudnn? ( + dev-libs/cudnn:= + ) + ) + tools? ( + ${PYTHON_DEPS} + ) +" + +pkg_setup() { + if use test || use tools; then + python-any-r1_pkg_setup + fi +} + +src_configure() { + # we can use clang as default + if use clang-cuda && ! tc-is-clang; then + export CC="${CHOST}-clang" + export CXX="${CHOST}-clang++" + else + tc-export CXX CC + fi + + cuda_add_sandbox + addpredict "/dev/char/" + + local mycmakeargs=( + -DCMAKE_POLICY_DEFAULT_CMP0156="OLD" # cutlass_add_library + + -DCMAKE_DISABLE_FIND_PACKAGE_Doxygen="$(usex !doc)" + + -DCUTLASS_REVISION="${PVR}" + -DCUTLASS_ENABLE_CUBLAS="$(usex cublas)" + -DCUTLASS_ENABLE_CUDNN="$(usex cudnn)" + -DCUTLASS_ENABLE_EXAMPLES="$(usex examples)" + -DCUTLASS_ENABLE_F16C="$(usex cpu_flags_x86_f16c)" + -DCUTLASS_ENABLE_GTEST_UNIT_TESTS="$(usex test)" + -DCUTLASS_ENABLE_HEADERS_ONLY="$(usex headers-only)" + -DCUTLASS_ENABLE_LIBRARY="$(usex !headers-only)" + -DCUTLASS_ENABLE_PERFORMANCE="$(usex performance)" + -DCUTLASS_ENABLE_PROFILER="$(usex profiler)" + -DCUTLASS_ENABLE_PROFILER_UNIT_TESTS="$(usex test "$(usex profiler)")" + -DCUTLASS_ENABLE_TESTS="$(usex test)" + -DCUTLASS_ENABLE_TOOLS="$(usex tools)" + -DCUTLASS_INSTALL_TESTS="no" + -DCUTLASS_NVCC_ARCHS="${CUDAARCHS:-all-major}" + -DCUTLASS_UNITY_BUILD_ENABLED="$(usex jumbo-build)" + -DCUTLASS_USE_SYSTEM_GOOGLETEST="yes" + -DIMPLICIT_CMAKE_CXX_STANDARD="yes" + ) + + # clang-cuda needs to filter mfpmath + if use clang-cuda; then + filter-mfpmath sse + filter-mfpmath i386 + + mycmakeargs+=( + -DCMAKE_CUDA_HOST_COMPILER="${CHOST}-clang++" + ) + else + mycmakeargs+=( + -DCMAKE_CUDA_HOST_COMPILER="$(cuda_gccdir)" + ) + fi + + if use cudnn; then + mycmakeargs+=( + -DCUDNN_INCLUDE_DIR="${CUDNN_PATH:-${ESYSROOT}/opt/cuda}/linux/include" + -DCUDNN_LIBRARY="${CUDNN_PATH:-${ESYSROOT}/opt/cuda}/$(get_libdir)/libcudnn.so" + ) + fi + + if use doc; then + mycmakeargs+=( + -DCUTLASS_ENABLE_DOXYGEN_DOT="$(usex dot)" + ) + fi + + if use test; then + mycmakeargs+=( + -DCUTLASS_TEST_LEVEL="0" + ) + + append-cxxflags -DNDEBUG + fi + + cmake_src_configure +} + +src_test() { + cuda_add_sandbox -w + + local myctestargs=( + ) + + local CMAKE_SKIP_TESTS=( + "ctest_examples_41_fmha_backward_python$" + ) + + cmake_src_test -j1 + cmake_build test_unit "${myctestargs[@]}" -j1 +} + +src_install() { + cmake_src_install + + rm -r "${ED}/usr/test" || die +}