From a75d5082e1a80f4c7fdaf901368b2e050850cbd3 Mon Sep 17 00:00:00 2001
From: Alfredo Tupone <tupone@gentoo.org>
Date: Thu, 11 Dec 2025 07:58:56 +0100
Subject: [PATCH] sci-ml/caffe2: Fix for CCCL

Signed-off-by: Alfredo Tupone <tupone@gentoo.org>
---
 sci-ml/caffe2/caffe2-2.9.1-r1.ebuild          |  1 +
 sci-ml/caffe2/files/caffe2-2.9.1-CCCL31.patch | 34 +++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 sci-ml/caffe2/files/caffe2-2.9.1-CCCL31.patch

diff --git a/sci-ml/caffe2/caffe2-2.9.1-r1.ebuild b/sci-ml/caffe2/caffe2-2.9.1-r1.ebuild
index 129aa0721e75..92cced9c2575 100644
--- a/sci-ml/caffe2/caffe2-2.9.1-r1.ebuild
+++ b/sci-ml/caffe2/caffe2-2.9.1-r1.ebuild
@@ -163,6 +163,7 @@ PATCHES=(
 	"${FILESDIR}"/${PN}-2.8.0-rocm-minus-flash.patch
 	"${FILESDIR}"/${PN}-2.9.0-cmake.patch
 	"${FILESDIR}"/${PN}-2.9.0-rocm-distributed-link.patch
+	"${FILESDIR}"/${P}-CCCL31.patch
 )
 
 src_prepare() {
diff --git a/sci-ml/caffe2/files/caffe2-2.9.1-CCCL31.patch b/sci-ml/caffe2/files/caffe2-2.9.1-CCCL31.patch
new file mode 100644
index 000000000000..08c1925b7a13
--- /dev/null
+++ b/sci-ml/caffe2/files/caffe2-2.9.1-CCCL31.patch
@@ -0,0 +1,34 @@
+From a20afb61007a94f5c28294e9ae20043657152ef6 Mon Sep 17 00:00:00 2001
+From: Bernhard Manfred Gruber <bernhardmgruber@gmail.com>
+Date: Wed, 15 Oct 2025 01:40:49 +0000
+Subject: [PATCH] Allow at::native::offset_t to be offset using `operator+=`
+ (#164570)
+
+This will be required by CCCL 3.1.
+Pull Request resolved: https://github.com/pytorch/pytorch/pull/164570
+Approved by: https://github.com/Skylion007, https://github.com/eqy
+---
+ aten/src/ATen/native/cuda/SortStable.cu | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/aten/src/ATen/native/cuda/SortStable.cu b/aten/src/ATen/native/cuda/SortStable.cu
+index 4d956616371de..8117eeeec558e 100644
+--- a/aten/src/ATen/native/cuda/SortStable.cu
++++ b/aten/src/ATen/native/cuda/SortStable.cu
+@@ -21,9 +21,15 @@ namespace {
+ struct offset_t {
+   int stride;
+   int begin;
+-  __device__ int operator[](int i) {
++  __device__ int operator[](int i) const {
+     return stride * (begin + i);
+   }
++#if CCCL_VERSION >= 3001000
++  __device__ offset_t& operator+=(int i) {
++    begin += i;
++    return *this;
++  }
++#endif
+ };
+ // Segmented sort by full sort algorithm:.
+ // Say we are sorting a (2, 3) tensor. We have in flattened form: