BioDynaMo  v1.05.120-25dc9790
mechanical_forces_op_opencl.cc
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------
2 //
3 // Copyright (C) 2021 CERN & University of Surrey for the benefit of the
4 // BioDynaMo collaboration. All Rights Reserved.
5 //
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 //
9 // See the LICENSE file distributed with this work for details.
10 // See the NOTICE file distributed with this work for additional information
11 // regarding copyright ownership.
12 //
13 // -----------------------------------------------------------------------------
14 
15 #ifdef USE_OPENCL
16 #include <vector>
17 
18 #ifdef __APPLE__
19 #define CL_HPP_ENABLE_EXCEPTIONS
20 #define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY
21 #define CL_HPP_MINIMUM_OPENCL_VERSION 120
22 #define CL_HPP_TARGET_OPENCL_VERSION 120
23 #include "cl2.hpp"
24 #else
25 #define CL_HPP_TARGET_OPENCL_VERSION 210
26 #define CL_HPP_ENABLE_EXCEPTIONS
27 #include <CL/cl2.hpp>
28 #endif
29 
30 #ifndef BDM_REALT
31 using cl_real_t = cl_double;
32 #else
33 using cl_real_t = BDM_CL_REALT;
34 #endif
35 
36 #include "core/agent/cell.h"
39 #include "core/gpu/opencl_state.h"
44 #include "core/real_t.h"
45 #include "core/shape.h"
46 #include "core/util/thread_info.h"
47 #include "core/util/type.h"
48 
49 namespace bdm {
50 
52  bool* answer) {
53  if (agent->GetShape() != Shape::kSphere) {
54  *answer = true;
55  }
56 }
57 
59  auto* sim = Simulation::GetActive();
60  auto* grid = dynamic_cast<UniformGridEnvironment*>(sim->GetEnvironment());
61  auto* param = sim->GetParam();
62  auto* rm = sim->GetResourceManager();
63  auto* ocl_state = sim->GetOpenCLState();
64 
65  if (!grid) {
66  Log::Fatal(
67  "MechanicalForcesOpOpenCL::operator()",
68  "MechanicalForcesOpOpenCL only works with UniformGridEnvironement.");
69  }
70 
71  // Check the number of NUMA domains on the system. Currently only 1 is
72  // supported for GPU execution.
73  if (ThreadInfo::GetInstance()->GetNumaNodes() > 1) {
74  Log::Fatal("MechanicalForcesOpOpenCL",
75  "\nThe GPU execution only supports systems with 1 NUMA domain.");
76  return;
77  }
78 
79  uint32_t num_objects = rm->GetNumAgents();
80 
81  auto context = ocl_state->GetOpenCLContext();
82  auto queue = ocl_state->GetOpenCLCommandQueue();
83  auto programs = ocl_state->GetOpenCLProgramList();
84 
85  // Cannot use Real3 here, because the `data()` function returns a const
86  // pointer to the underlying array, whereas the CUDA kernel will cast it to
87  // a void pointer. The conversion of `const real_t *` to `void *` is
88  // illegal.
89  std::vector<std::array<cl_real_t, 3>> cell_movements(num_objects);
90  std::vector<std::array<cl_real_t, 3>> cell_positions(num_objects);
91  std::vector<cl_real_t> cell_diameters(num_objects);
92  std::vector<cl_real_t> cell_adherence(num_objects);
93  std::vector<std::array<cl_real_t, 3>> cell_tractor_force(num_objects);
94  std::vector<cl_uint> cell_boxid(num_objects);
95  std::vector<cl_real_t> mass(num_objects);
96  std::vector<cl_uint> starts;
97  std::vector<cl_ushort> lengths;
98  std::vector<cl_uint> successors(num_objects);
99  std::array<cl_uint, 3> num_boxes_axis;
100  cl_real_t squared_radius =
101  grid->GetLargestAgentSize() * grid->GetLargestAgentSize();
102 
103  bool is_non_spherical_object = false;
104 
105  rm->ForEachAgent([&](Agent* agent, AgentHandle ah) {
106  // Check if there are any non-spherical objects in our simulation, because
107  // GPU accelerations currently supports only sphere-sphere interactions
108  IsNonSphericalObjectPresent(agent, &is_non_spherical_object);
109  if (is_non_spherical_object) {
110  Log::Fatal("MechanicalForcesOpOpenCL",
111  "\nWe detected a non-spherical object during the GPU "
112  "execution. This is currently not supported.");
113  return;
114  }
115  auto* cell = bdm_static_cast<Cell*>(agent);
116  auto idx = ah.GetElementIdx();
117  mass[idx] = cell->GetMass();
118  cell_diameters[idx] = cell->GetDiameter();
119  cell_adherence[idx] = cell->GetAdherence();
120  cell_boxid[idx] = cell->GetBoxIdx();
121  cell_tractor_force[idx][0] = cell->GetTractorForce()[0];
122  cell_tractor_force[idx][1] = cell->GetTractorForce()[1];
123  cell_tractor_force[idx][2] = cell->GetTractorForce()[2];
124  cell_positions[idx][0] = cell->GetPosition()[0];
125  cell_positions[idx][1] = cell->GetPosition()[1];
126  cell_positions[idx][2] = cell->GetPosition()[2];
127  });
128 
129  uint16_t numa_node = 0; // GPU code only supports 1 NUMA domain currently
130  for (size_t i = 0; i < grid->successors_.size(numa_node); i++) {
131  auto sh = AgentHandle(numa_node, i);
132  successors[i] = grid->successors_[sh].GetElementIdx();
133  }
134 
135  starts.resize(grid->boxes_.size());
136  lengths.resize(grid->boxes_.size());
137  size_t i = 0;
138  for (auto& box : grid->boxes_) {
139  starts[i] = box.start_.GetElementIdx();
140  lengths[i] = box.length_;
141  i++;
142  }
143  grid->GetNumBoxesAxis(num_boxes_axis.data());
144 
145  // Allocate GPU buffers
146  cl::Buffer positions_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
147  num_objects * 3 * sizeof(cl_real_t),
148  cell_positions.data()->data());
149  cl::Buffer diameters_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
150  num_objects * sizeof(cl_real_t),
151  cell_diameters.data());
152  cl::Buffer tractor_force_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
153  num_objects * 3 * sizeof(cl_real_t),
154  cell_tractor_force.data()->data());
155  cl::Buffer adherence_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
156  num_objects * sizeof(cl_real_t),
157  cell_adherence.data());
158  cl::Buffer box_id_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
159  num_objects * sizeof(cl_uint), cell_boxid.data());
160  cl::Buffer mass_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
161  num_objects * sizeof(cl_real_t), mass.data());
162  cl::Buffer cell_movements_arg(
163  *context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
164  num_objects * 3 * sizeof(cl_real_t), cell_movements.data()->data());
165  cl::Buffer starts_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
166  starts.size() * sizeof(cl_uint), starts.data());
167  cl::Buffer lengths_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
168  lengths.size() * sizeof(cl_short), lengths.data());
169  cl::Buffer successors_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
170  successors.size() * sizeof(cl_uint),
171  successors.data());
172  cl::Buffer nba_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
173  3 * sizeof(cl_uint), num_boxes_axis.data());
174 
175  // Create the kernel object from our program
176  // TODO(ahmad): generalize the program selection, in case we have more than
177  // one. We can maintain an unordered map of programs maybe
178  cl::Kernel collide((*programs)[0], "collide");
179 
180  // Set kernel parameters
181  collide.setArg(0, positions_arg);
182  collide.setArg(1, diameters_arg);
183  collide.setArg(2, tractor_force_arg);
184  collide.setArg(3, adherence_arg);
185  collide.setArg(4, box_id_arg);
186  collide.setArg(5, mass_arg);
187  collide.setArg(6, param->simulation_time_step);
188  collide.setArg(7, param->simulation_max_displacement);
189  collide.setArg(8, squared_radius);
190 
191  collide.setArg(9, static_cast<cl_int>(num_objects));
192  collide.setArg(10, starts_arg);
193  collide.setArg(11, lengths_arg);
194  collide.setArg(12, successors_arg);
195  collide.setArg(13, nba_arg);
196  collide.setArg(14, cell_movements_arg);
197 
198  // The amount of threads for each work group (similar to CUDA thread block)
199  int block_size = 256;
200 
201  try {
202  // The global size determines the total number of threads that will be
203  // spawned on the GPU, in groups of local_size
204  cl::NDRange global_size =
205  cl::NDRange(num_objects + (block_size - (num_objects % block_size)));
206  cl::NDRange local_size = cl::NDRange(block_size);
207  queue->enqueueNDRangeKernel(collide, cl::NullRange, global_size,
208  local_size);
209  } catch (const cl::Error& err) {
210  Log::Error("MechanicalForcesOpOpenCL", err.what(), "(", err.err(),
211  ") = ", ocl_state->GetErrorString(err.err()));
212  throw;
213  }
214 
215  try {
216  queue->enqueueReadBuffer(cell_movements_arg, CL_TRUE, 0,
217  num_objects * 3 * sizeof(cl_real_t),
218  cell_movements.data()->data());
219  } catch (const cl::Error& err) {
220  Log::Error("MechanicalForcesOpOpenCL", err.what(), "(", err.err(),
221  ") = ", ocl_state->GetErrorString(err.err()));
222  throw;
223  }
224 
225  // set new positions after all updates have been calculated
226  // otherwise some cells would see neighbors with already updated positions
227  // which would lead to inconsistencies
228  rm->ForEachAgent([&](Agent* agent, AgentHandle ah) {
229  auto* cell = dynamic_cast<Cell*>(agent);
230  auto idx = ah.GetElementIdx();
231  Real3 new_pos;
232  new_pos[0] = cell_movements[idx][0];
233  new_pos[1] = cell_movements[idx][1];
234  new_pos[2] = cell_movements[idx][2];
235  cell->UpdatePosition(new_pos);
236  if (param->bound_space) {
237  ApplyBoundingBox(agent, param->bound_space, param->min_bound,
238  param->max_bound);
239  }
240  });
241 }
242 
243 } // namespace bdm
244 
245 #endif // USE_OPENCL
shape.h
opencl_state.h
bdm::MechanicalForcesOpOpenCL::IsNonSphericalObjectPresent
void IsNonSphericalObjectPresent(const Agent *agent, bool *answer)
bdm::ThreadInfo::GetInstance
static ThreadInfo * GetInstance()
Definition: thread_info.cc:21
bdm
Definition: agent.cc:39
operation.h
thread_info.h
type.h
operation_registry.h
bdm::ApplyBoundingBox
void ApplyBoundingBox(Agent *agent, Param::BoundSpaceMode mode, real_t lb, real_t rb)
Definition: bound_space_op.h:26
uniform_grid_environment.h
bdm::MechanicalForcesOpOpenCL::operator()
void operator()() override
bdm::kSphere
@ kSphere
Definition: shape.h:20
mechanical_forces_op_opencl.h
bdm::Log::Fatal
static void Fatal(const std::string &location, const Args &... parts)
Prints fatal error message.
Definition: log.h:115
bound_space_op.h
bdm::Log::Error
static void Error(const std::string &location, const Args &... parts)
Prints error message.
Definition: log.h:79
bdm::Real3
MathArray< real_t, 3 > Real3
Aliases for a size 3 MathArray.
Definition: math_array.h:434
real_t.h
environment.h
bdm::Simulation::GetActive
static Simulation * GetActive()
This function returns the currently active Simulation simulation.
Definition: simulation.cc:68
cell.h