NEURON
extras.c
Go to the documentation of this file.
1 #include <../../nrnconf.h>
2 
3 /**************************************************************************
4 **
5 ** Copyright (C) 1993 David E. Steward & Zbigniew Leyk, all rights reserved.
6 **
7 ** Meschach Library
8 **
9 ** This Meschach Library is provided "as is" without any express
10 ** or implied warranty of any kind with respect to this software.
11 ** In particular the authors shall not be liable for any direct,
12 ** indirect, special, incidental or consequential damages arising
13 ** in any way from use of the software.
14 **
15 ** Everyone is granted permission to copy, modify and redistribute this
16 ** Meschach Library, provided:
17 ** 1. All copies contain this copyright notice.
18 ** 2. All modified copies shall carry a notice stating who
19 ** made the last modification and the date of such modification.
20 ** 3. No charge is made for this software or works derived from it.
21 ** This clause shall not be construed as constraining other software
22 ** distributed on the same medium as this software, nor is a
23 ** distribution fee considered a charge.
24 **
25 ***************************************************************************/
26 
27 
28 /*
29  Memory port routines: MEM_COPY and MEM_ZERO
30 */
31 
32 /* For BSD 4.[23] environments: using bcopy() and bzero() */
33 
34 #include "machine.h"
35 
36 #ifndef MEM_COPY
37 void MEM_COPY(from,to,len)
38 char *from, *to;
39 int len;
40 {
41  int i;
42 
43  if ( from < to )
44  {
45  for ( i = 0; i < len; i++ )
46  *to++ = *from++;
47  }
48  else
49  {
50  from += len; to += len;
51  for ( i = 0; i < len; i++ )
52  *(--to) = *(--from);
53  }
54 }
55 #endif
56 
57 #ifndef MEM_ZERO
58 void MEM_ZERO(ptr,len)
59 char *ptr;
60 int len;
61 {
62  int i;
63 
64  for ( i = 0; i < len; i++ )
65  *(ptr++) = '\0';
66 }
67 #endif
68 
69 /*
70  This file contains versions of something approximating the well-known
71  BLAS routines in C, suitable for Meschach (hence the `m').
72  These are "vanilla" implementations, at least with some consideration
73  of the effects of caching and paging, and maybe some loop unrolling
74  for register-rich machines
75 */
76 
77 /*
78  Organisation of matrices: it is assumed that matrices are represented
79  by Real **'s. To keep flexibility, there is also an "initial
80  column" parameter j0, so that the actual elements used are
81  A[0][j0], A[0][j0+1], ..., A[0][j0+n-1]
82  A[1][j0], A[1][j0+1], ..., A[1][j0+n-1]
83  .. .. ... ..
84  A[m-1][j0], A[m-1][j0+1], ..., A[m-1][j0+n-1]
85 */
86 
87 static char rcsid[] = "$Id: extras.c,v 1.4 1995/06/08 15:13:15 des Exp $";
88 
89 #include <math.h>
90 
91 #define REGISTER_RICH 1
92 
93 /* mblar-1 routines */
94 
95 /* Mscale -- sets x <- alpha.x */
96 void Mscale(len,alpha,x)
97 int len;
98 double alpha;
99 Real *x;
100 {
101  register int i;
102 
103  for ( i = 0; i < len; i++ )
104  x[i] *= alpha;
105 }
106 
107 /* Mswap -- swaps x and y */
108 void Mswap(len,x,y)
109 int len;
110 Real *x, *y;
111 {
112  register int i;
113  register Real tmp;
114 
115  for ( i = 0; i < len; i++ )
116  {
117  tmp = x[i];
118  x[i] = y[i];
119  y[i] = tmp;
120  }
121 }
122 
123 /* Mcopy -- copies x to y */
124 void Mcopy(len,x,y)
125 int len;
126 Real *x, *y;
127 {
128  register int i;
129 
130  for ( i = 0; i < len; i++ )
131  y[i] = x[i];
132 }
133 
134 /* Maxpy -- y <- y + alpha.x */
135 void Maxpy(len,alpha,x,y)
136 int len;
137 double alpha;
138 Real *x, *y;
139 {
140  register int i, len4;
141 
142  /****************************************
143  for ( i = 0; i < len; i++ )
144  y[i] += alpha*x[i];
145  ****************************************/
146 
147 #ifdef REGISTER_RICH
148  len4 = len / 4;
149  len = len % 4;
150  for ( i = 0; i < len4; i++ )
151  {
152  y[4*i] += alpha*x[4*i];
153  y[4*i+1] += alpha*x[4*i+1];
154  y[4*i+2] += alpha*x[4*i+2];
155  y[4*i+3] += alpha*x[4*i+3];
156  }
157  x += 4*len4; y += 4*len4;
158 #endif
159  for ( i = 0; i < len; i++ )
160  y[i] += alpha*x[i];
161 }
162 
163 /* Mdot -- returns x'.y */
164 double Mdot(len,x,y)
165 int len;
166 Real *x, *y;
167 {
168  register int i, len4;
169  register Real sum;
170 
171 #ifndef REGISTER_RICH
172  sum = 0.0;
173 #endif
174 
175 #ifdef REGISTER_RICH
176  register Real sum0, sum1, sum2, sum3;
177 
178  sum0 = sum1 = sum2 = sum3 = 0.0;
179 
180  len4 = len / 4;
181  len = len % 4;
182 
183  for ( i = 0; i < len4; i++ )
184  {
185  sum0 += x[4*i ]*y[4*i ];
186  sum1 += x[4*i+1]*y[4*i+1];
187  sum2 += x[4*i+2]*y[4*i+2];
188  sum3 += x[4*i+3]*y[4*i+3];
189  }
190  sum = sum0 + sum1 + sum2 + sum3;
191  x += 4*len4; y += 4*len4;
192 #endif
193 
194  for ( i = 0; i < len; i++ )
195  sum += x[i]*y[i];
196 
197  return sum;
198 }
199 
200 #ifndef ABS
201 #define ABS(x) ((x) >= 0 ? (x) : -(x))
202 #endif
203 
204 /* Mnorminf -- returns ||x||_inf */
205 double Mnorminf(len,x)
206 int len;
207 Real *x;
208 {
209  register int i;
210  register Real tmp, max_val;
211 
212  max_val = 0.0;
213  for ( i = 0; i < len; i++ )
214  {
215  tmp = ABS(x[i]);
216  if ( max_val < tmp )
217  max_val = tmp;
218  }
219 
220  return max_val;
221 }
222 
223 /* Mnorm1 -- returns ||x||_1 */
224 double Mnorm1(len,x)
225 int len;
226 Real *x;
227 {
228  register int i;
229  register Real sum;
230 
231  sum = 0.0;
232  for ( i = 0; i < len; i++ )
233  sum += ABS(x[i]);
234 
235  return sum;
236 }
237 
238 /* Mnorm2 -- returns ||x||_2 */
239 double Mnorm2(len,x)
240 int len;
241 Real *x;
242 {
243  register int i;
244  register Real norm, invnorm, sum, tmp;
245 
246  norm = Mnorminf(len,x);
247  if ( norm == 0.0 )
248  return 0.0;
249  invnorm = 1.0/norm;
250  sum = 0.0;
251  for ( i = 0; i < len; i++ )
252  {
253  tmp = x[i]*invnorm;
254  sum += tmp*tmp;
255  }
256 
257  return sum/invnorm;
258 }
259 
260 /* mblar-2 routines */
261 
262 /* Mmv -- y <- alpha.A.x + beta.y */
263 void Mmv(m,n,alpha,A,j0,x,beta,y)
264 int m, n, j0;
265 double alpha, beta;
266 Real **A, *x, *y;
267 {
268  register int i, j, m4, n4;
269  register Real sum0, sum1, sum2, sum3, tmp0, tmp1, tmp2, tmp3;
270  register Real *dp0, *dp1, *dp2, *dp3;
271 
272  /****************************************
273  for ( i = 0; i < m; i++ )
274  y[i] += alpha*Mdot(n,&(A[i][j0]),x);
275  ****************************************/
276 
277  m4 = n4 = 0;
278 
279 #ifdef REGISTER_RICH
280  m4 = m / 4;
281  m = m % 4;
282  n4 = n / 4;
283  n = n % 4;
284 
285  for ( i = 0; i < m4; i++ )
286  {
287  sum0 = sum1 = sum2 = sum3 = 0.0;
288  dp0 = &(A[4*i ][j0]);
289  dp1 = &(A[4*i+1][j0]);
290  dp2 = &(A[4*i+2][j0]);
291  dp3 = &(A[4*i+3][j0]);
292 
293  for ( j = 0; j < n4; j++ )
294  {
295  tmp0 = x[4*j ];
296  tmp1 = x[4*j+1];
297  tmp2 = x[4*j+2];
298  tmp3 = x[4*j+3];
299  sum0 = sum0 + dp0[j]*tmp0 + dp0[j+1]*tmp1 +
300  dp0[j+2]*tmp2 + dp0[j+3]*tmp3;
301  sum1 = sum1 + dp1[j]*tmp0 + dp1[j+1]*tmp1 +
302  dp1[j+2]*tmp2 + dp1[j+3]*tmp3;
303  sum2 = sum2 + dp2[j]*tmp0 + dp2[j+1]*tmp1 +
304  dp2[j+2]*tmp2 + dp2[j+3]*tmp3;
305  sum3 = sum3 + dp3[j]*tmp0 + dp3[j+1]*tmp2 +
306  dp3[j+2]*tmp2 + dp3[j+3]*tmp3;
307  }
308  for ( j = 0; j < n; j++ )
309  {
310  sum0 += dp0[4*n4+j]*x[4*n4+j];
311  sum1 += dp1[4*n4+j]*x[4*n4+j];
312  sum2 += dp2[4*n4+j]*x[4*n4+j];
313  sum3 += dp3[4*n4+j]*x[4*n4+j];
314  }
315  y[4*i ] = beta*y[4*i ] + alpha*sum0;
316  y[4*i+1] = beta*y[4*i+1] + alpha*sum1;
317  y[4*i+2] = beta*y[4*i+2] + alpha*sum2;
318  y[4*i+3] = beta*y[4*i+3] + alpha*sum3;
319  }
320 #endif
321 
322  for ( i = 0; i < m; i++ )
323  y[4*m4+i] = beta*y[i] + alpha*Mdot(4*n4+n,&(A[4*m4+i][j0]),x);
324 }
325 
326 /* Mvm -- y <- alpha.A^T.x + beta.y */
327 void Mvm(m,n,alpha,A,j0,x,beta,y)
328 int m, n, j0;
329 double alpha, beta;
330 Real **A, *x, *y;
331 {
332  register int i, j, m4, n2;
333  register Real *Aref;
334  register Real tmp;
335 
336 #ifdef REGISTER_RICH
337  register Real *Aref0, *Aref1;
338  register Real tmp0, tmp1;
339  register Real yval0, yval1, yval2, yval3;
340 #endif
341 
342  if ( beta != 1.0 )
343  Mscale(m,beta,y);
344  /****************************************
345  for ( j = 0; j < n; j++ )
346  Maxpy(m,alpha*x[j],&(A[j][j0]),y);
347  ****************************************/
348  m4 = n2 = 0;
349 
350  m4 = m / 4;
351  m = m % 4;
352 #ifdef REGISTER_RICH
353  n2 = n / 2;
354  n = n % 2;
355 
356  for ( j = 0; j < n2; j++ )
357  {
358  tmp0 = alpha*x[2*j];
359  tmp1 = alpha*x[2*j+1];
360  Aref0 = &(A[2*j ][j0]);
361  Aref1 = &(A[2*j+1][j0]);
362  for ( i = 0; i < m4; i++ )
363  {
364  yval0 = y[4*i ] + tmp0*Aref0[4*i ];
365  yval1 = y[4*i+1] + tmp0*Aref0[4*i+1];
366  yval2 = y[4*i+2] + tmp0*Aref0[4*i+2];
367  yval3 = y[4*i+3] + tmp0*Aref0[4*i+3];
368  y[4*i ] = yval0 + tmp1*Aref1[4*i ];
369  y[4*i+1] = yval1 + tmp1*Aref1[4*i+1];
370  y[4*i+2] = yval2 + tmp1*Aref1[4*i+2];
371  y[4*i+3] = yval3 + tmp1*Aref1[4*i+3];
372  }
373  y += 4*m4; Aref0 += 4*m4; Aref1 += 4*m4;
374  for ( i = 0; i < m; i++ )
375  y[i] += tmp0*Aref0[i] + tmp1*Aref1[i];
376  }
377 #endif
378 
379  for ( j = 0; j < n; j++ )
380  {
381  tmp = alpha*x[2*n2+j];
382  Aref = &(A[2*n2+j][j0]);
383  for ( i = 0; i < m4; i++ )
384  {
385  y[4*i ] += tmp*Aref[4*i ];
386  y[4*i+1] += tmp*Aref[4*i+1];
387  y[4*i+2] += tmp*Aref[4*i+2];
388  y[4*i+3] += tmp*Aref[4*i+3];
389  }
390  y += 4*m4; Aref += 4*m4;
391  for ( i = 0; i < m; i++ )
392  y[i] += tmp*Aref[i];
393  }
394 }
395 
396 /* Mupdate -- A <- A + alpha.x.y^T */
397 void Mupdate(m,n,alpha,x,y,A,j0)
398 int m, n, j0;
399 double alpha;
400 Real **A, *x, *y;
401 {
402  register int i, j, n4;
403  register Real *Aref;
404  register Real tmp;
405 
406  /****************************************
407  for ( i = 0; i < m; i++ )
408  Maxpy(n,alpha*x[i],y,&(A[i][j0]));
409  ****************************************/
410 
411  n4 = n / 4;
412  n = n % 4;
413  for ( i = 0; i < m; i++ )
414  {
415  tmp = alpha*x[i];
416  Aref = &(A[i][j0]);
417  for ( j = 0; j < n4; j++ )
418  {
419  Aref[4*j ] += tmp*y[4*j ];
420  Aref[4*j+1] += tmp*y[4*j+1];
421  Aref[4*j+2] += tmp*y[4*j+2];
422  Aref[4*j+3] += tmp*y[4*j+3];
423  }
424  Aref += 4*n4; y += 4*n4;
425  for ( j = 0; j < n; j++ )
426  Aref[j] += tmp*y[j];
427  }
428 }
429 
430 /* mblar-3 routines */
431 
432 /* Mmm -- C <- C + alpha.A.B */
433 void Mmm(m,n,p,alpha,A,Aj0,B,Bj0,C,Cj0)
434 int m, n, p; /* C is m x n */
435 double alpha;
436 Real **A, **B, **C;
437 int Aj0, Bj0, Cj0;
438 {
439  register int i, j, k;
440  /* register Real tmp, sum; */
441 
442  /****************************************
443  for ( i = 0; i < m; i++ )
444  for ( k = 0; k < p; k++ )
445  Maxpy(n,alpha*A[i][Aj0+k],&(B[k][Bj0]),&(C[i][Cj0]));
446  ****************************************/
447  for ( i = 0; i < m; i++ )
448  Mvm(p,n,alpha,B,Bj0,&(A[i][Aj0]),1.0,&(C[i][Cj0]));
449 }
450 
451 /* Mmtrm -- C <- C + alpha.A^T.B */
452 void Mmtrm(m,n,p,alpha,A,Aj0,B,Bj0,C,Cj0)
453 int m, n, p; /* C is m x n */
454 double alpha;
455 Real **A, **B, **C;
456 int Aj0, Bj0, Cj0;
457 {
458  register int i, j, k;
459 
460  /****************************************
461  for ( i = 0; i < m; i++ )
462  for ( k = 0; k < p; k++ )
463  Maxpy(n,alpha*A[k][Aj0+i],&(B[k][Bj0]),&(C[i][Cj0]));
464  ****************************************/
465  for ( k = 0; k < p; k++ )
466  Mupdate(m,n,alpha,&(A[k][Aj0]),&(B[k][Bj0]),C,Cj0);
467 }
468 
469 /* Mmmtr -- C <- C + alpha.A.B^T */
470 void Mmmtr(m,n,p,alpha,A,Aj0,B,Bj0,C,Cj0)
471 int m, n, p; /* C is m x n */
472 double alpha;
473 Real **A, **B, **C;
474 int Aj0, Bj0, Cj0;
475 {
476  register int i, j, k;
477 
478  /****************************************
479  for ( i = 0; i < m; i++ )
480  for ( j = 0; j < n; j++ )
481  C[i][Cj0+j] += alpha*Mdot(p,&(A[i][Aj0]),&(B[j][Bj0]));
482  ****************************************/
483  for ( i = 0; i < m; i++ )
484  Mmv(n,p,alpha,B,Bj0,&(A[i][Aj0]),1.0,&(C[i][Cj0]));
485 }
486 
487 /* Mmtrmtr -- C <- C + alpha.A^T.B^T */
488 void Mmtrmtr(m,n,p,alpha,A,Aj0,B,Bj0,C,Cj0)
489 int m, n, p; /* C is m x n */
490 double alpha;
491 Real **A, **B, **C;
492 int Aj0, Bj0, Cj0;
493 {
494  register int i, j, k;
495 
496  for ( i = 0; i < m; i++ )
497  for ( j = 0; j < n; j++ )
498  for ( k = 0; k < p; k++ )
499  C[i][Cj0+j] += A[i][Aj0+k]*B[k][Bj0+j];
500 }
501 
#define alpha
Definition: bkpfacto.c:43
void Mscale(int len, double alpha, Real *x)
Definition: extras.c:96
void Mmtrm(int m, int n, int p, double alpha, Real **A, int Aj0, Real **B, int Bj0, Real **C, int Cj0)
Definition: extras.c:452
void Mmmtr(int m, int n, int p, double alpha, Real **A, int Aj0, Real **B, int Bj0, Real **C, int Cj0)
Definition: extras.c:470
void Mcopy(int len, Real *x, Real *y)
Definition: extras.c:124
void Mmv(int m, int n, double alpha, Real **A, int j0, Real *x, double beta, Real *y)
Definition: extras.c:263
double Mnorm2(int len, Real *x)
Definition: extras.c:239
double Mnorm1(int len, Real *x)
Definition: extras.c:224
void MEM_COPY(char *from, char *to, int len)
Definition: extras.c:37
void Mmm(int m, int n, int p, double alpha, Real **A, int Aj0, Real **B, int Bj0, Real **C, int Cj0)
Definition: extras.c:433
void Mmtrmtr(int m, int n, int p, double alpha, Real **A, int Aj0, Real **B, int Bj0, Real **C, int Cj0)
Definition: extras.c:488
double Mdot(int len, Real *x, Real *y)
Definition: extras.c:164
#define ABS(x)
Definition: extras.c:201
void MEM_ZERO(char *ptr, int len)
Definition: extras.c:58
void Maxpy(int len, double alpha, Real *x, Real *y)
Definition: extras.c:135
void Mupdate(int m, int n, double alpha, Real *x, Real *y, Real **A, int j0)
Definition: extras.c:397
void Mswap(int len, Real *x, Real *y)
Definition: extras.c:108
double Mnorminf(int len, Real *x)
Definition: extras.c:205
void Mvm(int m, int n, double alpha, Real **A, int j0, Real *x, double beta, Real *y)
Definition: extras.c:327
static char rcsid[]
Definition: extras.c:87
#define Real
Definition: machine.h:189
#define i
Definition: md1redef.h:12
#define A(i)
Definition: multisplit.cpp:63
#define B(i)
Definition: multisplit.cpp:64
int const size_t const size_t n
Definition: nrngsl.h:11
size_t p
size_t j
static philox4x32_key_t k
Definition: nrnran123.cpp:11