1 #include <../../nrnconf.h>
45 for (
i = 0;
i < len;
i++ )
50 from += len; to += len;
51 for (
i = 0;
i < len;
i++ )
64 for (
i = 0;
i < len;
i++ )
87 static char rcsid[] =
"$Id: extras.c,v 1.4 1995/06/08 15:13:15 des Exp $";
91 #define REGISTER_RICH 1
103 for (
i = 0;
i < len;
i++ )
115 for (
i = 0;
i < len;
i++ )
130 for (
i = 0;
i < len;
i++ )
140 register int i, len4;
150 for (
i = 0;
i < len4;
i++ )
157 x += 4*len4; y += 4*len4;
159 for (
i = 0;
i < len;
i++ )
168 register int i, len4;
171 #ifndef REGISTER_RICH
176 register Real sum0, sum1, sum2, sum3;
178 sum0 = sum1 = sum2 = sum3 = 0.0;
183 for (
i = 0;
i < len4;
i++ )
185 sum0 += x[4*
i ]*y[4*
i ];
186 sum1 += x[4*
i+1]*y[4*
i+1];
187 sum2 += x[4*
i+2]*y[4*
i+2];
188 sum3 += x[4*
i+3]*y[4*
i+3];
190 sum = sum0 + sum1 + sum2 + sum3;
191 x += 4*len4; y += 4*len4;
194 for (
i = 0;
i < len;
i++ )
201 #define ABS(x) ((x) >= 0 ? (x) : -(x))
210 register Real tmp, max_val;
213 for (
i = 0;
i < len;
i++ )
232 for (
i = 0;
i < len;
i++ )
244 register Real norm, invnorm, sum, tmp;
251 for (
i = 0;
i < len;
i++ )
268 register int i,
j, m4, n4;
269 register Real sum0, sum1, sum2, sum3, tmp0, tmp1, tmp2, tmp3;
270 register Real *dp0, *dp1, *dp2, *dp3;
285 for (
i = 0;
i < m4;
i++ )
287 sum0 = sum1 = sum2 = sum3 = 0.0;
288 dp0 = &(
A[4*
i ][j0]);
289 dp1 = &(
A[4*
i+1][j0]);
290 dp2 = &(
A[4*
i+2][j0]);
291 dp3 = &(
A[4*
i+3][j0]);
293 for (
j = 0;
j < n4;
j++ )
299 sum0 = sum0 + dp0[
j]*tmp0 + dp0[
j+1]*tmp1 +
300 dp0[
j+2]*tmp2 + dp0[
j+3]*tmp3;
301 sum1 = sum1 + dp1[
j]*tmp0 + dp1[
j+1]*tmp1 +
302 dp1[
j+2]*tmp2 + dp1[
j+3]*tmp3;
303 sum2 = sum2 + dp2[
j]*tmp0 + dp2[
j+1]*tmp1 +
304 dp2[
j+2]*tmp2 + dp2[
j+3]*tmp3;
305 sum3 = sum3 + dp3[
j]*tmp0 + dp3[
j+1]*tmp2 +
306 dp3[
j+2]*tmp2 + dp3[
j+3]*tmp3;
308 for (
j = 0;
j <
n;
j++ )
310 sum0 += dp0[4*n4+
j]*x[4*n4+
j];
311 sum1 += dp1[4*n4+
j]*x[4*n4+
j];
312 sum2 += dp2[4*n4+
j]*x[4*n4+
j];
313 sum3 += dp3[4*n4+
j]*x[4*n4+
j];
315 y[4*
i ] = beta*y[4*
i ] +
alpha*sum0;
316 y[4*
i+1] = beta*y[4*
i+1] +
alpha*sum1;
317 y[4*
i+2] = beta*y[4*
i+2] +
alpha*sum2;
318 y[4*
i+3] = beta*y[4*
i+3] +
alpha*sum3;
322 for (
i = 0;
i < m;
i++ )
332 register int i,
j, m4, n2;
337 register Real *Aref0, *Aref1;
338 register Real tmp0, tmp1;
339 register Real yval0, yval1, yval2, yval3;
356 for (
j = 0;
j < n2;
j++ )
360 Aref0 = &(
A[2*
j ][j0]);
361 Aref1 = &(
A[2*
j+1][j0]);
362 for (
i = 0;
i < m4;
i++ )
364 yval0 = y[4*
i ] + tmp0*Aref0[4*
i ];
365 yval1 = y[4*
i+1] + tmp0*Aref0[4*
i+1];
366 yval2 = y[4*
i+2] + tmp0*Aref0[4*
i+2];
367 yval3 = y[4*
i+3] + tmp0*Aref0[4*
i+3];
368 y[4*
i ] = yval0 + tmp1*Aref1[4*
i ];
369 y[4*
i+1] = yval1 + tmp1*Aref1[4*
i+1];
370 y[4*
i+2] = yval2 + tmp1*Aref1[4*
i+2];
371 y[4*
i+3] = yval3 + tmp1*Aref1[4*
i+3];
373 y += 4*m4; Aref0 += 4*m4; Aref1 += 4*m4;
374 for (
i = 0;
i < m;
i++ )
375 y[
i] += tmp0*Aref0[
i] + tmp1*Aref1[
i];
379 for (
j = 0;
j <
n;
j++ )
382 Aref = &(
A[2*n2+
j][j0]);
383 for (
i = 0;
i < m4;
i++ )
385 y[4*
i ] += tmp*Aref[4*
i ];
386 y[4*
i+1] += tmp*Aref[4*
i+1];
387 y[4*
i+2] += tmp*Aref[4*
i+2];
388 y[4*
i+3] += tmp*Aref[4*
i+3];
390 y += 4*m4; Aref += 4*m4;
391 for (
i = 0;
i < m;
i++ )
402 register int i,
j, n4;
413 for (
i = 0;
i < m;
i++ )
417 for (
j = 0;
j < n4;
j++ )
419 Aref[4*
j ] += tmp*y[4*
j ];
420 Aref[4*
j+1] += tmp*y[4*
j+1];
421 Aref[4*
j+2] += tmp*y[4*
j+2];
422 Aref[4*
j+3] += tmp*y[4*
j+3];
424 Aref += 4*n4; y += 4*n4;
425 for (
j = 0;
j <
n;
j++ )
439 register int i,
j,
k;
447 for (
i = 0;
i < m;
i++ )
458 register int i,
j,
k;
465 for (
k = 0;
k <
p;
k++ )
476 register int i,
j,
k;
483 for (
i = 0;
i < m;
i++ )
494 register int i,
j,
k;
496 for (
i = 0;
i < m;
i++ )
497 for (
j = 0;
j <
n;
j++ )
498 for (
k = 0;
k <
p;
k++ )
499 C[
i][Cj0+
j] +=
A[
i][Aj0+
k]*
B[
k][Bj0+
j];
int const size_t const size_t n
static philox4x32_key_t k