1 #include <../../nrnconf.h>
10 #define loc(x, y, z) ((z) + (y) *grid->size_z + (x) *grid->size_z * grid->size_y)
77 g->clear_multicompartment_reaction();
93 unsigned char* subregion,
94 uint64_t* mc3d_start_indices,
110 if (
i == species_ids[0]) {
111 if (mc3d_region_size > 0) {
115 (num_species + num_params));
118 sizeof(uint64_t) * (num_species + num_params));
119 r->
mc3d_mults = (
double**) malloc(
sizeof(
double*) * (num_species + num_params));
121 for (
int row = 0;
row < num_species + num_params;
row++) {
122 r->
mc3d_mults[
row] = (
double*) malloc(
sizeof(
double) * mc3d_region_size);
123 for (
int c = 0;
c < mc3d_region_size;
c++, mult_idx++) {
129 }
else if (subregion ==
NULL) {
147 for (
i = 0;
i < num_species + num_params;
i++) {
151 if (
j == species_ids[
i])
169 uint64_t* mc3d_start_indices,
170 int mc3d_region_size,
212 unsigned char* my_subregion,
215 list_idx, num_species, num_params, species_id, f, my_subregion,
NULL, 0,
NULL);
234 if (react_count == 0)
238 const int tasks_per_thread = (react_count) /
n;
240 const int extra = react_count %
n;
250 if (load >= tasks_per_thread + (extra >
k)) {
279 unsigned int i,
j,
k,
n, start_idx, stop_idx, offset_idx;
280 double temp, ge_value, val_to_set;
284 double* states_cache =
NULL;
285 double* params_cache =
NULL;
286 double* states_cache_dx =
NULL;
287 double* results_array =
NULL;
288 double* results_array_dx =
NULL;
289 double* mc_mults_array =
NULL;
290 double dx = FLT_EPSILON;
329 for (
i = start_idx;
i <= stop_idx;
i++) {
346 react->
reaction(states_cache, params_cache, results_array, mc_mults_array);
349 states_cache_dx[
j] += dx;
359 pd = (results_array_dx[
k] - results_array[
k]) / dx;
362 states_cache_dx[
j] -= dx;
467 for (
i = start_idx;
i <= stop_idx;
i++) {
480 react->
reaction(states_cache, params_cache, results_array,
NULL);
483 states_cache_dx[
j] += dx;
486 react->
reaction(states_cache_dx, params_cache, results_array_dx,
NULL);
490 pd = (results_array_dx[
k] - results_array[
k]) / dx;
493 states_cache_dx[
j] -= dx;
592 double dt = (*dt_ptr);
606 g->do_multicompartment_reactions(
NULL);
655 for (
i = 0;
i < grid_size;
i++) {
671 grid_states = grid->
states;
674 for (
i = 0;
i < grid_size;
i++) {
675 y[
i] = grid_states[
i];
680 g->initialize_multicompartment_reaction();
694 const unsigned char calculate_rhs = ydot ==
NULL ? 0 : 1;
699 grid_states = grid->
states;
703 for (
i = 0;
i < grid_size;
i++) {
710 if (!calculate_rhs) {
721 grid_states = grid->
states;
723 for (
i = 0;
i < grid_size;
i++) {
737 g->do_multicompartment_reactions(ydot);
759 int num_states_x =
g->size_x, num_states_y =
g->size_y, num_states_z =
g->size_z;
760 double dx =
g->dx, dy =
g->dy, dz =
g->dz;
761 int i,
j,
k, stop_i, stop_j, stop_k;
762 double dc_x =
g->dc_x, dc_y =
g->dc_y, dc_z =
g->dc_z;
764 double rate_x = dc_x / (dx * dx);
765 double rate_y = dc_y / (dy * dy);
766 double rate_z = dc_z / (dz * dz);
769 int index, prev_i, prev_j, prev_k, next_i, next_j, next_k;
770 double div_x, div_y, div_z;
773 stop_i = num_states_x - 1;
774 stop_j = num_states_y - 1;
775 stop_k = num_states_z - 1;
779 prev_i = num_states_y * num_states_z,
780 next_i = num_states_y * num_states_z;
784 div_x = (
i == 0 ||
i == stop_i) ? 2. : 1.;
786 for (
j = 0, prev_j =
index + num_states_z, next_j =
index + num_states_z;
789 div_y = (
j == 0 ||
j == stop_j) ? 2. : 1.;
791 for (
k = 0, prev_k =
index + 1, next_k =
index + 1;
k < num_states_z;
792 k++,
index++, prev_i++, next_i++, prev_j++, next_j++) {
793 div_z = (
k == 0 ||
k == stop_k) ? 2. : 1.;
796 ydot[
index] += rate_x *
801 ydot[
index] += rate_y *
806 ydot[
index] += rate_z *
813 prev_j =
index - num_states_z;
814 next_j = (
j == stop_j - 1) ? prev_j :
index + num_states_z;
816 prev_i =
index - num_states_y * num_states_z;
817 next_i = (
i == stop_i - 1) ? prev_i :
index + num_states_y * num_states_z;
820 for (
i = 0,
index = 0, prev_i = 0, next_i = num_states_y * num_states_z;
i < num_states_x;
822 for (
j = 0, prev_j =
index - num_states_z, next_j =
index + num_states_z;
825 for (
k = 0, prev_k =
index - 1, next_k =
index + 1;
k < num_states_z;
826 k++,
index++, prev_i++, next_i++, prev_j++, next_j++, next_k++, prev_k++) {
827 if (
i == 0 ||
i == stop_i ||
j == 0 ||
j == stop_j ||
k == 0 ||
k == stop_k) {
831 ydot[
index] += rate_x *
834 ydot[
index] += rate_y *
837 ydot[
index] += rate_z *
841 prev_j =
index - num_states_z;
842 next_j =
index + num_states_z;
844 prev_i =
index - num_states_y * num_states_z;
845 next_i =
index + num_states_y * num_states_z;
921 const unsigned char calculate_rhs =
RHS ==
NULL ? 0 : 1;
928 grid_states = grid->
states;
932 for (
i = 0;
i < grid_size;
i++) {
939 if (!calculate_rhs) {
985 const double lbc_diag,
986 const double lbc_u_diag,
987 const double ubc_l_diag,
988 const double ubc_diag,
994 c[0] = lbc_u_diag / lbc_diag;
995 b[0] = b[0] / lbc_diag;
997 for (
i = 1;
i < N - 1;
i++) {
998 c[
i] = u_diag / (
diag - l_diag *
c[
i - 1]);
999 b[
i] = (b[
i] - l_diag * b[
i - 1]) / (
diag - l_diag *
c[
i - 1]);
1001 b[N - 1] = (b[N - 1] - ubc_l_diag * b[N - 2]) / (ubc_diag - ubc_l_diag *
c[N - 2]);
1004 for (
i = N - 2;
i >= 0;
i--) {
1005 b[
i] = b[
i] -
c[
i] * b[
i + 1];
1068 double const*
const state,
1073 double r =
g->dc_x *
dt /
SQ(
g->dx);
1074 double div_y, div_z;
1077 (y == 0 || z == 0 || y ==
g->size_y - 1 || z ==
g->size_z - 1)) {
1078 for (x = 0; x <
g->size_x; x++)
1079 RHS[x] =
g->bc->value;
1083 if (
g->size_y > 1) {
1084 yp = (y ==
g->size_y - 1) ? y - 1 : y + 1;
1085 ym = (y == 0) ? y + 1 : y - 1;
1086 div_y = (y == 0 || y ==
g->size_y - 1) ? 2. : 1.;
1092 if (
g->size_z > 1) {
1093 zp = (z ==
g->size_z - 1) ? z - 1 : z + 1;
1094 zm = (z == 0) ? z + 1 : z - 1;
1095 div_z = (z == 0 || z ==
g->size_z - 1) ? 2. : 1.;
1104 RHS[0] = state[
IDX(0, y, z)] +
g->states_cur[
IDX(0, y, z)] +
1106 ((
g->dc_y /
SQ(
g->dy)) *
1107 (state[
IDX(0, yp, z)] - 2. * state[
IDX(0, y, z)] + state[
IDX(0, ym, z)]) /
1109 (
g->dc_z /
SQ(
g->dz)) *
1110 (state[
IDX(0, y, zp)] - 2. * state[
IDX(0, y, z)] + state[
IDX(0, y, zm)]) /
1112 if (
g->size_x > 1) {
1113 RHS[0] +=
dt * (
g->dc_x /
SQ(
g->dx)) * (state[
IDX(1, y, z)] - state[
IDX(0, y, z)]);
1116 state[
IDX(x, y, z)] +
g->states_cur[
IDX(x, y, z)] +
1117 dt * ((
g->dc_x /
SQ(
g->dx)) * (state[
IDX(x - 1, y, z)] - state[
IDX(x, y, z)]) +
1118 (
g->dc_y /
SQ(
g->dy)) *
1119 (state[
IDX(x, yp, z)] - 2. * state[
IDX(x, y, z)] + state[
IDX(x, ym, z)]) /
1121 (
g->dc_z /
SQ(
g->dz)) *
1122 (state[
IDX(x, y, zp)] - 2. * state[
IDX(x, y, z)] + state[
IDX(x, y, zm)]) /
1126 RHS[0] =
g->bc->value;
1127 RHS[
g->size_x - 1] =
g->bc->value;
1129 for (x = 1; x <
g->size_x - 1; x++) {
1131 __builtin_prefetch(&(state[
IDX(x +
PREFETCH, y, z)]), 0, 1);
1132 __builtin_prefetch(&(state[
IDX(x +
PREFETCH, yp, z)]), 0, 0);
1133 __builtin_prefetch(&(state[
IDX(x +
PREFETCH, ym, z)]), 0, 0);
1135 RHS[x] = state[
IDX(x, y, z)] +
1137 ((
g->dc_x /
SQ(
g->dx)) *
1138 (state[
IDX(x + 1, y, z)] - 2. * state[
IDX(x, y, z)] +
1139 state[
IDX(x - 1, y, z)]) /
1141 (
g->dc_y /
SQ(
g->dy)) *
1142 (state[
IDX(x, yp, z)] - 2. * state[
IDX(x, y, z)] + state[
IDX(x, ym, z)]) /
1144 (
g->dc_z /
SQ(
g->dz)) *
1145 (state[
IDX(x, y, zp)] - 2. * state[
IDX(x, y, z)] + state[
IDX(x, y, zm)]) /
1147 g->states_cur[
IDX(x, y, z)];
1149 if (
g->size_x > 1) {
1163 g->size_x, -r / 2.0, 1.0 + r, -r / 2.0, 1.0, 0, 0, 1.0,
RHS,
scratch);
1181 double const*
const state,
1185 double r = (
g->dc_y *
dt /
SQ(
g->dy));
1188 (x == 0 || z == 0 || x ==
g->size_x - 1 || z ==
g->size_z - 1)) {
1189 for (y = 0; y <
g->size_y; y++)
1190 RHS[y] =
g->bc->value;
1193 if (
g->size_y == 1) {
1195 RHS[0] = state[x + z *
g->size_x];
1197 RHS[0] =
g->bc->value;
1202 RHS[0] = state[x + z *
g->size_x] -
1203 (
g->dc_y *
dt /
SQ(
g->dy)) *
1204 (
g->states[
IDX(x, 1, z)] - 2.0 *
g->states[
IDX(x, 0, z)] +
1205 g->states[
IDX(x, 1, z)]) /
1208 RHS[y] = state[x + (z + y *
g->size_z) *
g->size_x] -
1209 (
g->dc_y *
dt /
SQ(
g->dy)) *
1210 (
g->states[
IDX(x, y - 1, z)] - 2. *
g->states[
IDX(x, y, z)] +
1211 g->states[
IDX(x, y - 1, z)]) /
1214 RHS[0] =
g->bc->value;
1215 RHS[
g->size_y - 1] =
g->bc->value;
1217 for (y = 1; y <
g->size_y - 1; y++) {
1219 __builtin_prefetch(&state[x + (z + (y +
PREFETCH) *
g->size_z) *
g->size_x], 0, 0);
1220 __builtin_prefetch(&(
g->states[
IDX(x, y +
PREFETCH, z)]), 0, 1);
1222 RHS[y] = state[x + (z + y *
g->size_z) *
g->size_x] -
1223 (
g->dc_y *
dt /
SQ(
g->dy)) *
1224 (
g->states[
IDX(x, y + 1, z)] - 2. *
g->states[
IDX(x, y, z)] +
1225 g->states[
IDX(x, y - 1, z)]) /
1257 double const*
const state,
1261 double r =
g->dc_z *
dt /
SQ(
g->dz);
1264 (x == 0 || y == 0 || x ==
g->size_x - 1 || y ==
g->size_y - 1)) {
1265 for (z = 0; z <
g->size_z; z++)
1266 RHS[z] =
g->bc->value;
1270 if (
g->size_z == 1) {
1272 RHS[0] = state[y +
g->size_y * x];
1274 RHS[0] =
g->bc->value;
1280 RHS[0] = state[y +
g->size_y * (x *
g->size_z)] -
1281 (
g->dc_z *
dt /
SQ(
g->dz)) *
1282 (
g->states[
IDX(x, y, 1)] - 2.0 *
g->states[
IDX(x, y, 0)] +
1283 g->states[
IDX(x, y, 1)]) /
1286 RHS[z] = state[y +
g->size_y * (x *
g->size_z + z)] -
1287 (
g->dc_z *
dt /
SQ(
g->dz)) *
1288 (
g->states[
IDX(x, y, z - 1)] - 2.0 *
g->states[
IDX(x, y, z)] +
1289 g->states[
IDX(x, y, z - 1)]) /
1293 RHS[0] =
g->bc->value;
1294 RHS[
g->size_z - 1] =
g->bc->value;
1296 for (z = 1; z <
g->size_z - 1; z++) {
1297 RHS[z] = state[y +
g->size_y * (x *
g->size_z + z)] -
1298 (
g->dc_z *
dt /
SQ(
g->dz)) *
1299 (
g->states[
IDX(x, y, z + 1)] - 2. *
g->states[
IDX(x, y, z)] +
1300 g->states[
IDX(x, y, z - 1)]) /
1326 int sizej =
data->sizej;
1328 double* state_in = ecs_adi_dir->
states_in;
1331 double* scratchpad =
data->scratchpad;
1332 void (*ecs_dg_adi_dir)(
1333 ECS_Grid_node*, double, int, int,
double const*
const,
double*
const,
double*
const) =
1338 ecs_dg_adi_dir(
g,
dt,
i,
j, state_in, &state_out[
k * offset], scratchpad);
1352 const int tasks_per_thread = (
g->size_x *
g->size_y *
g->size_z /
n) /
NUM_THREADS;
1353 const int extra = (
g->size_x *
g->size_y *
g->size_z /
n) %
NUM_THREADS;
1355 g->ecs_tasks[0].start = 0;
1356 g->ecs_tasks[0].stop = tasks_per_thread + (extra > 0);
1357 g->ecs_tasks[0].sizej =
j;
1358 g->ecs_tasks[0].ecs_adi_dir = ecs_adi_dir;
1360 g->ecs_tasks[
k].start =
g->ecs_tasks[
k - 1].stop;
1361 g->ecs_tasks[
k].stop =
g->ecs_tasks[
k].start + tasks_per_thread + (extra >
k);
1362 g->ecs_tasks[
k].sizej =
j;
1363 g->ecs_tasks[
k].ecs_adi_dir = ecs_adi_dir;
virtual void apply_node_flux3D(double dt, double *states)=0
virtual void hybrid_connections()=0
virtual void variable_step_diffusion(const double *states, double *ydot)=0
virtual void set_num_threads(const int n)=0
virtual void do_grid_currents(double *, double, int)=0
virtual void variable_step_ode_solve(double *RHS, double dt)=0
virtual void scatter_grid_concentrations()=0
static double jacobian(void *v)
Grid_node * Parallel_grids[100]
void(* ECSReactionRate)(double *, double *, double *, double *)
static double v_get(void *v)
#define m_set_val(A, i, j, val)
#define v_set_val(x, i, val)
int const size_t const size_t n
static philox4x32_key_t k
static char scratch[MAXLINE+1]
void TaskQueue_add_task(TaskQueue *q, void *(*task)(void *), void *args, void *result)
void TaskQueue_sync(TaskQueue *q)
#define m_get_val(A, i, j)
void(* ecs_dg_adi_dir)(ECS_Grid_node *, const double, const int, const int, double const *const, double *const, double *const)
uint64_t * mc3d_indices_offsets
unsigned char * subregion
unsigned int num_params_involved
unsigned int num_species_involved