GpuBuffer.hpp
Go to the documentation of this file.
1/*
2 Copyright 2024 SINTEF AS
3
4 This file is part of the Open Porous Media project (OPM).
5
6 OPM is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 OPM is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with OPM. If not, see <http://www.gnu.org/licenses/>.
18*/
19#ifndef OPM_GPUBUFFER_HEADER_HPP
20#define OPM_GPUBUFFER_HEADER_HPP
21#include <dune/common/fvector.hh>
22#include <dune/istl/bvector.hh>
23#include <exception>
24#include <fmt/core.h>
25#include <opm/common/ErrorMacros.hpp>
29#include <vector>
30#include <string>
31#include <cuda_runtime.h>
32
33
34namespace Opm::gpuistl
35{
36
56template <typename T>
57class GpuBuffer
58{
59public:
60 using field_type = T;
61 using size_type = size_t;
62 using value_type = T;
63
67 GpuBuffer() = default;
68
77 GpuBuffer(const GpuBuffer<T>& other)
78 : GpuBuffer(other.m_numberOfElements)
79 {
80 assertSameSize(other);
81 if (m_numberOfElements == 0) {
82 return;
83 }
84 OPM_GPU_SAFE_CALL(cudaMemcpy(m_dataOnDevice,
85 other.m_dataOnDevice,
86 m_numberOfElements * sizeof(T),
87 cudaMemcpyDeviceToDevice));
88 }
89
99 explicit GpuBuffer(const std::vector<T>& data)
100 : GpuBuffer(data.size())
101 {
102 copyFromHost(data);
103 }
104
110 explicit GpuBuffer(const size_t numberOfElements)
111 : m_numberOfElements(numberOfElements)
112 {
113 OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * m_numberOfElements));
114 }
115
116
126 GpuBuffer(const T* dataOnHost, const size_t numberOfElements)
127 : GpuBuffer(numberOfElements)
128 {
129 OPM_GPU_SAFE_CALL(cudaMemcpy(
130 m_dataOnDevice, dataOnHost, m_numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
131 }
132
133
137 virtual ~GpuBuffer()
138 {
139 OPM_GPU_WARN_IF_ERROR(cudaFree(m_dataOnDevice));
140 }
141
145 T* data()
146 {
147 return m_dataOnDevice;
148 }
149
153 const T* data() const
154 {
155 return m_dataOnDevice;
156 }
157
165 template <int BlockDimension>
166 void copyFromHost(const Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector)
167 {
168 // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
169 if (m_numberOfElements != bvector.size()) {
170 OPM_THROW(std::runtime_error,
171 fmt::format("Given incompatible vector size. GpuBuffer has size {}, \n"
172 "however, BlockVector has N() = {}, and size = {}.",
173 m_numberOfElements,
174 bvector.N(),
175 bvector.size()));
176 }
177 const auto dataPointer = static_cast<const T*>(&(bvector[0][0]));
178 copyFromHost(dataPointer, m_numberOfElements);
179 }
180
188 template <int BlockDimension>
189 void copyToHost(Dune::BlockVector<Dune::FieldVector<T, BlockDimension>>& bvector) const
190 {
191 // TODO: [perf] vector.size() can be replaced by bvector.N() * BlockDimension
192 if (m_numberOfElements != bvector.size()) {
193 OPM_THROW(std::runtime_error,
194 fmt::format("Given incompatible vector size. GpuBuffer has size {},\n however, the BlockVector "
195 "has has N() = {}, and size() = {}.",
196 m_numberOfElements,
197 bvector.N(),
198 bvector.size()));
199 }
200 const auto dataPointer = static_cast<T*>(&(bvector[0][0]));
201 copyToHost(dataPointer, m_numberOfElements);
202 }
203
211 void copyFromHost(const T* dataPointer, size_t numberOfElements)
212 {
213 if (numberOfElements > size()) {
214 OPM_THROW(std::runtime_error,
215 fmt::format("Requesting to copy too many elements. buffer has {} elements, while {} was requested.",
216 size(),
217 numberOfElements));
218 }
219 OPM_GPU_SAFE_CALL(cudaMemcpy(data(), dataPointer, numberOfElements * sizeof(T), cudaMemcpyHostToDevice));
220 }
221
229 void copyToHost(T* dataPointer, size_t numberOfElements) const
230 {
231 assertSameSize(numberOfElements);
232 OPM_GPU_SAFE_CALL(cudaMemcpy(dataPointer, data(), numberOfElements * sizeof(T), cudaMemcpyDeviceToHost));
233 }
234
242 void copyFromHost(const std::vector<T>& data)
243 {
244 assertSameSize(data.size());
245
246 if (data.empty()) {
247 return;
248 }
249
250 if constexpr (std::is_same_v<T, bool>)
251 {
252 auto tmp = std::make_unique<bool[]>(data.size());
253 for (size_t i = 0; i < data.size(); ++i) {
254 tmp[i] = static_cast<bool>(data[i]);
255 }
256 copyFromHost(tmp.get(), data.size());
257 }
258 else {
259 copyFromHost(data.data(), data.size());
260 }
261 }
262
270 void copyToHost(std::vector<T>& data) const
271 {
272 assertSameSize(data.size());
273
274 if (data.empty()) {
275 return;
276 }
277
278 if constexpr (std::is_same_v<T, bool>)
279 {
280 auto tmp = std::make_unique<bool[]>(data.size());
281 copyToHost(tmp.get(), data.size());
282 for (size_t i = 0; i < data.size(); ++i) {
283 data[i] = static_cast<bool>(tmp[i]);
284 }
285 return;
286 }
287 else {
288 copyToHost(data.data(), data.size());
289 }
290 }
291
296 size_type size() const
297 {
298 return m_numberOfElements;
299 }
300
305 void resize(size_t newSize)
306 {
307 if (newSize < 1) {
308 OPM_THROW(std::invalid_argument, "Setting a GpuBuffer size to a non-positive number is not allowed");
309 }
310
311 if (m_numberOfElements == 0) {
312 // We have no data, so we can just allocate new memory
313 OPM_GPU_SAFE_CALL(cudaMalloc(&m_dataOnDevice, sizeof(T) * newSize));
314 }
315 else {
316 // Allocate memory for temporary buffer
317 T* tmpBuffer = nullptr;
318 OPM_GPU_SAFE_CALL(cudaMalloc(&tmpBuffer, sizeof(T) * m_numberOfElements));
319
320 // Move the data from the old to the new buffer with truncation
321 size_t sizeOfMove = std::min({m_numberOfElements, newSize});
322 OPM_GPU_SAFE_CALL(cudaMemcpy(tmpBuffer,
323 m_dataOnDevice,
324 sizeOfMove * sizeof(T),
325 cudaMemcpyDeviceToDevice));
326
327 // free the old buffer
328 OPM_GPU_SAFE_CALL(cudaFree(m_dataOnDevice));
329
330 // swap the buffers
331 m_dataOnDevice = tmpBuffer;
332 }
333
334 // update size
335 m_numberOfElements = newSize;
336 }
337
342 std::vector<T> asStdVector() const
343 {
344 std::vector<T> temporary(m_numberOfElements);
345 copyToHost(temporary);
346 return temporary;
347 }
348
349private:
350 T* m_dataOnDevice = nullptr;
351 size_t m_numberOfElements = 0;
352
353 void assertSameSize(const GpuBuffer<T>& other) const
354 {
355 assertSameSize(other.m_numberOfElements);
356 }
357
358 void assertSameSize(size_t size) const
359 {
360 if (size != m_numberOfElements) {
361 OPM_THROW(std::invalid_argument,
362 fmt::format("Given buffer has {}, while we have {}.", size, m_numberOfElements));
363 }
364 }
365
366 void assertHasElements() const
367 {
368 if (m_numberOfElements <= 0) {
369 OPM_THROW(std::invalid_argument, "We have 0 elements");
370 }
371 }
372};
373
374template <class T>
375GpuView<T> make_view(GpuBuffer<T>& buf) {
376 return GpuView<T>(buf.data(), buf.size());
377}
378
379template <class T>
380GpuView<const T> make_view(const GpuBuffer<T>& buf) {
381 return GpuView<const T>(buf.data(), buf.size());
382}
383
384} // namespace Opm::gpuistl
385#endif
#define OPM_GPU_SAFE_CALL(expression)
OPM_GPU_SAFE_CALL checks the return type of the GPU expression (function call) and throws an exceptio...
Definition: gpu_safe_call.hpp:150
#define OPM_GPU_WARN_IF_ERROR(expression)
OPM_GPU_WARN_IF_ERROR checks the return type of the GPU expression (function call) and issues a warni...
Definition: gpu_safe_call.hpp:171
A small, fixed‑dimension MiniVector class backed by std::array that can be used in both host and CUDA...
Definition: AmgxInterface.hpp:38
PointerView< T > make_view(const std::shared_ptr< T > &ptr)
Definition: gpu_smart_pointer.hpp:318