1 #include <../../nrnconf.h> 45 for ( i = 0; i < len; i++ )
50 from += len; to += len;
51 for ( i = 0; i < len; i++ )
64 for ( i = 0; i < len; i++ )
87 static char rcsid[] =
"$Id: extras.c,v 1.4 1995/06/08 15:13:15 des Exp $";
91 #define REGISTER_RICH 1 103 for ( i = 0; i < len; i++ )
115 for ( i = 0; i < len; i++ )
130 for ( i = 0; i < len; i++ )
140 register int i, len4;
150 for ( i = 0; i < len4; i++ )
153 y[4*i+1] +=
alpha*x[4*i+1];
154 y[4*i+2] +=
alpha*x[4*i+2];
155 y[4*i+3] +=
alpha*x[4*i+3];
157 x += 4*len4; y += 4*len4;
159 for ( i = 0; i < len; i++ )
168 register int i, len4;
171 #ifndef REGISTER_RICH 176 register Real sum0, sum1, sum2, sum3;
178 sum0 = sum1 = sum2 = sum3 = 0.0;
183 for ( i = 0; i < len4; i++ )
185 sum0 += x[4*
i ]*y[4*
i ];
186 sum1 += x[4*i+1]*y[4*i+1];
187 sum2 += x[4*i+2]*y[4*i+2];
188 sum3 += x[4*i+3]*y[4*i+3];
190 sum = sum0 + sum1 + sum2 + sum3;
191 x += 4*len4; y += 4*len4;
194 for ( i = 0; i < len; i++ )
201 #define ABS(x) ((x) >= 0 ? (x) : -(x)) 210 register Real tmp, max_val;
213 for ( i = 0; i < len; i++ )
232 for ( i = 0; i < len; i++ )
244 register Real norm, invnorm, sum, tmp;
251 for ( i = 0; i < len; i++ )
268 register int i,
j, m4, n4;
269 register Real sum0, sum1, sum2, sum3, tmp0, tmp1, tmp2, tmp3;
270 register Real *dp0, *dp1, *dp2, *dp3;
285 for ( i = 0; i < m4; i++ )
287 sum0 = sum1 = sum2 = sum3 = 0.0;
288 dp0 = &(
A[4*
i ][j0]);
289 dp1 = &(
A[4*i+1][j0]);
290 dp2 = &(
A[4*i+2][j0]);
291 dp3 = &(
A[4*i+3][j0]);
293 for ( j = 0; j < n4; j++ )
299 sum0 = sum0 + dp0[
j]*tmp0 + dp0[j+1]*tmp1 +
300 dp0[j+2]*tmp2 + dp0[j+3]*tmp3;
301 sum1 = sum1 + dp1[
j]*tmp0 + dp1[j+1]*tmp1 +
302 dp1[j+2]*tmp2 + dp1[j+3]*tmp3;
303 sum2 = sum2 + dp2[
j]*tmp0 + dp2[j+1]*tmp1 +
304 dp2[j+2]*tmp2 + dp2[j+3]*tmp3;
305 sum3 = sum3 + dp3[
j]*tmp0 + dp3[j+1]*tmp2 +
306 dp3[j+2]*tmp2 + dp3[j+3]*tmp3;
308 for ( j = 0; j <
n; j++ )
310 sum0 += dp0[4*n4+
j]*x[4*n4+
j];
311 sum1 += dp1[4*n4+
j]*x[4*n4+
j];
312 sum2 += dp2[4*n4+
j]*x[4*n4+
j];
313 sum3 += dp3[4*n4+
j]*x[4*n4+
j];
315 y[4*
i ] = beta*y[4*
i ] +
alpha*sum0;
316 y[4*i+1] = beta*y[4*i+1] +
alpha*sum1;
317 y[4*i+2] = beta*y[4*i+2] +
alpha*sum2;
318 y[4*i+3] = beta*y[4*i+3] +
alpha*sum3;
322 for ( i = 0; i < m; i++ )
323 y[4*m4+i] = beta*y[i] +
alpha*
Mdot(4*n4+
n,&(
A[4*m4+i][j0]),x);
332 register int i,
j, m4, n2;
337 register Real *Aref0, *Aref1;
338 register Real tmp0, tmp1;
339 register Real yval0, yval1, yval2, yval3;
356 for ( j = 0; j < n2; j++ )
359 tmp1 =
alpha*x[2*j+1];
360 Aref0 = &(
A[2*
j ][j0]);
361 Aref1 = &(
A[2*j+1][j0]);
362 for ( i = 0; i < m4; i++ )
364 yval0 = y[4*
i ] + tmp0*Aref0[4*
i ];
365 yval1 = y[4*i+1] + tmp0*Aref0[4*i+1];
366 yval2 = y[4*i+2] + tmp0*Aref0[4*i+2];
367 yval3 = y[4*i+3] + tmp0*Aref0[4*i+3];
368 y[4*
i ] = yval0 + tmp1*Aref1[4*
i ];
369 y[4*i+1] = yval1 + tmp1*Aref1[4*i+1];
370 y[4*i+2] = yval2 + tmp1*Aref1[4*i+2];
371 y[4*i+3] = yval3 + tmp1*Aref1[4*i+3];
373 y += 4*m4; Aref0 += 4*m4; Aref1 += 4*m4;
374 for ( i = 0; i < m; i++ )
375 y[i] += tmp0*Aref0[i] + tmp1*Aref1[i];
379 for ( j = 0; j <
n; j++ )
382 Aref = &(
A[2*n2+
j][j0]);
383 for ( i = 0; i < m4; i++ )
385 y[4*
i ] += tmp*Aref[4*
i ];
386 y[4*i+1] += tmp*Aref[4*i+1];
387 y[4*i+2] += tmp*Aref[4*i+2];
388 y[4*i+3] += tmp*Aref[4*i+3];
390 y += 4*m4; Aref += 4*m4;
391 for ( i = 0; i < m; i++ )
402 register int i,
j, n4;
413 for ( i = 0; i < m; i++ )
417 for ( j = 0; j < n4; j++ )
419 Aref[4*
j ] += tmp*y[4*
j ];
420 Aref[4*j+1] += tmp*y[4*j+1];
421 Aref[4*j+2] += tmp*y[4*j+2];
422 Aref[4*j+3] += tmp*y[4*j+3];
424 Aref += 4*n4; y += 4*n4;
425 for ( j = 0; j <
n; j++ )
439 register int i,
j,
k;
447 for ( i = 0; i < m; i++ )
458 register int i,
j,
k;
465 for ( k = 0; k <
p; k++ )
476 register int i,
j,
k;
483 for ( i = 0; i < m; i++ )
494 register int i,
j,
k;
496 for ( i = 0; i < m; i++ )
497 for ( j = 0; j <
n; j++ )
498 for ( k = 0; k <
p; k++ )
499 C[i][Cj0+j] +=
A[i][Aj0+k]*
B[k][Bj0+j];
static philox4x32_key_t k
int const size_t const size_t n