19 #define CL_HPP_ENABLE_EXCEPTIONS
20 #define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY
21 #define CL_HPP_MINIMUM_OPENCL_VERSION 120
22 #define CL_HPP_TARGET_OPENCL_VERSION 120
25 #define CL_HPP_TARGET_OPENCL_VERSION 210
26 #define CL_HPP_ENABLE_EXCEPTIONS
31 using cl_real_t = cl_double;
33 using cl_real_t = BDM_CL_REALT;
60 auto* grid =
dynamic_cast<UniformGridEnvironment*
>(sim->GetEnvironment());
61 auto* param = sim->GetParam();
62 auto* rm = sim->GetResourceManager();
63 auto* ocl_state = sim->GetOpenCLState();
67 "MechanicalForcesOpOpenCL::operator()",
68 "MechanicalForcesOpOpenCL only works with UniformGridEnvironement.");
75 "\nThe GPU execution only supports systems with 1 NUMA domain.");
79 uint32_t num_objects = rm->GetNumAgents();
81 auto context = ocl_state->GetOpenCLContext();
82 auto queue = ocl_state->GetOpenCLCommandQueue();
83 auto programs = ocl_state->GetOpenCLProgramList();
89 std::vector<std::array<cl_real_t, 3>> cell_movements(num_objects);
90 std::vector<std::array<cl_real_t, 3>> cell_positions(num_objects);
91 std::vector<cl_real_t> cell_diameters(num_objects);
92 std::vector<cl_real_t> cell_adherence(num_objects);
93 std::vector<std::array<cl_real_t, 3>> cell_tractor_force(num_objects);
94 std::vector<cl_uint> cell_boxid(num_objects);
95 std::vector<cl_real_t> mass(num_objects);
96 std::vector<cl_uint> starts;
97 std::vector<cl_ushort> lengths;
98 std::vector<cl_uint> successors(num_objects);
99 std::array<cl_uint, 3> num_boxes_axis;
100 cl_real_t squared_radius =
101 grid->GetLargestAgentSize() * grid->GetLargestAgentSize();
103 bool is_non_spherical_object =
false;
105 rm->ForEachAgent([&](Agent* agent, AgentHandle ah) {
109 if (is_non_spherical_object) {
111 "\nWe detected a non-spherical object during the GPU "
112 "execution. This is currently not supported.");
115 auto* cell = bdm_static_cast<Cell*>(agent);
116 auto idx = ah.GetElementIdx();
117 mass[idx] = cell->GetMass();
118 cell_diameters[idx] = cell->GetDiameter();
119 cell_adherence[idx] = cell->GetAdherence();
120 cell_boxid[idx] = cell->GetBoxIdx();
121 cell_tractor_force[idx][0] = cell->GetTractorForce()[0];
122 cell_tractor_force[idx][1] = cell->GetTractorForce()[1];
123 cell_tractor_force[idx][2] = cell->GetTractorForce()[2];
124 cell_positions[idx][0] = cell->GetPosition()[0];
125 cell_positions[idx][1] = cell->GetPosition()[1];
126 cell_positions[idx][2] = cell->GetPosition()[2];
129 uint16_t numa_node = 0;
130 for (
size_t i = 0; i < grid->successors_.size(numa_node); i++) {
131 auto sh = AgentHandle(numa_node, i);
132 successors[i] = grid->successors_[sh].GetElementIdx();
135 starts.resize(grid->boxes_.size());
136 lengths.resize(grid->boxes_.size());
138 for (
auto& box : grid->boxes_) {
139 starts[i] = box.start_.GetElementIdx();
140 lengths[i] = box.length_;
143 grid->GetNumBoxesAxis(num_boxes_axis.data());
146 cl::Buffer positions_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
147 num_objects * 3 *
sizeof(cl_real_t),
148 cell_positions.data()->data());
149 cl::Buffer diameters_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
150 num_objects *
sizeof(cl_real_t),
151 cell_diameters.data());
152 cl::Buffer tractor_force_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
153 num_objects * 3 *
sizeof(cl_real_t),
154 cell_tractor_force.data()->data());
155 cl::Buffer adherence_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
156 num_objects *
sizeof(cl_real_t),
157 cell_adherence.data());
158 cl::Buffer box_id_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
159 num_objects *
sizeof(cl_uint), cell_boxid.data());
160 cl::Buffer mass_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
161 num_objects *
sizeof(cl_real_t), mass.data());
162 cl::Buffer cell_movements_arg(
163 *context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
164 num_objects * 3 *
sizeof(cl_real_t), cell_movements.data()->data());
165 cl::Buffer starts_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
166 starts.size() *
sizeof(cl_uint), starts.data());
167 cl::Buffer lengths_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
168 lengths.size() *
sizeof(cl_short), lengths.data());
169 cl::Buffer successors_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
170 successors.size() *
sizeof(cl_uint),
172 cl::Buffer nba_arg(*context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
173 3 *
sizeof(cl_uint), num_boxes_axis.data());
178 cl::Kernel collide((*programs)[0],
"collide");
181 collide.setArg(0, positions_arg);
182 collide.setArg(1, diameters_arg);
183 collide.setArg(2, tractor_force_arg);
184 collide.setArg(3, adherence_arg);
185 collide.setArg(4, box_id_arg);
186 collide.setArg(5, mass_arg);
187 collide.setArg(6, param->simulation_time_step);
188 collide.setArg(7, param->simulation_max_displacement);
189 collide.setArg(8, squared_radius);
191 collide.setArg(9,
static_cast<cl_int
>(num_objects));
192 collide.setArg(10, starts_arg);
193 collide.setArg(11, lengths_arg);
194 collide.setArg(12, successors_arg);
195 collide.setArg(13, nba_arg);
196 collide.setArg(14, cell_movements_arg);
199 int block_size = 256;
204 cl::NDRange global_size =
205 cl::NDRange(num_objects + (block_size - (num_objects % block_size)));
206 cl::NDRange local_size = cl::NDRange(block_size);
207 queue->enqueueNDRangeKernel(collide, cl::NullRange, global_size,
209 }
catch (
const cl::Error& err) {
210 Log::Error(
"MechanicalForcesOpOpenCL", err.what(),
"(", err.err(),
211 ") = ", ocl_state->GetErrorString(err.err()));
216 queue->enqueueReadBuffer(cell_movements_arg, CL_TRUE, 0,
217 num_objects * 3 *
sizeof(cl_real_t),
218 cell_movements.data()->data());
219 }
catch (
const cl::Error& err) {
220 Log::Error(
"MechanicalForcesOpOpenCL", err.what(),
"(", err.err(),
221 ") = ", ocl_state->GetErrorString(err.err()));
228 rm->ForEachAgent([&](Agent* agent, AgentHandle ah) {
229 auto* cell =
dynamic_cast<Cell*
>(agent);
230 auto idx = ah.GetElementIdx();
232 new_pos[0] = cell_movements[idx][0];
233 new_pos[1] = cell_movements[idx][1];
234 new_pos[2] = cell_movements[idx][2];
235 cell->UpdatePosition(new_pos);
236 if (param->bound_space) {