View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.regression;
23  
24  import java.util.Arrays;
25  
26  import org.hipparchus.exception.LocalizedCoreFormats;
27  import org.hipparchus.exception.MathIllegalArgumentException;
28  import org.hipparchus.stat.LocalizedStatFormats;
29  import org.hipparchus.util.FastMath;
30  import org.hipparchus.util.MathUtils;
31  import org.hipparchus.util.Precision;
32  
33  /**
34   * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
35   *
36   * <p>The algorithm is described in:</p>
37   * <pre>
38   * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
39   * Author(s): Alan J. Miller
40   * Source: Journal of the Royal Statistical Society.
41   * Series C (Applied Statistics), Vol. 41, No. 2
42   * (1992), pp. 458-478
43   * Published by: Blackwell Publishing for the Royal Statistical Society
44   * Stable URL: http://www.jstor.org/stable/2347583 </pre>
45   *
46   * <p>This method for multiple regression forms the solution to the OLS problem
47   * by updating the QR decomposition as described by Gentleman.</p>
48   *
49   */
50  public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
51  
52      /** number of variables in regression */
53      private final int nvars;
54      /** diagonals of cross products matrix */
55      private final double[] d;
56      /** the elements of the R`Y */
57      private final double[] rhs;
58      /** the off diagonal portion of the R matrix */
59      private final double[] r;
60      /** the tolerance for each of the variables */
61      private final double[] tol;
62      /** residual sum of squares for all nested regressions */
63      private final double[] rss;
64      /** order of the regressors */
65      private final int[] vorder;
66      /** scratch space for tolerance calc */
67      private final double[] work_tolset;
68      /** number of observations entered */
69      private long nobs;
70      /** sum of squared errors of largest regression */
71      private double sserr;
72      /** has rss been called? */
73      private boolean rss_set;
74      /** has the tolerance setting method been called */
75      private boolean tol_set;
76      /** flags for variables with linear dependency problems */
77      private final boolean[] lindep;
78      /** singular x values */
79      private final double[] x_sing;
80      /** workspace for singularity method */
81      private final double[] work_sing;
82      /** summation of Y variable */
83      private double sumy;
84      /** summation of squared Y values */
85      private double sumsqy;
86      /** boolean flag whether a regression constant is added */
87      private final boolean hasIntercept;
88      /** zero tolerance */
89      private final double epsilon;
90      /**
91       *  Set the default constructor to private access
92       *  to prevent inadvertent instantiation
93       */
94      @SuppressWarnings("unused")
95      private MillerUpdatingRegression() {
96          this(-1, false, Double.NaN);
97      }
98  
99      /**
100      * This is the augmented constructor for the MillerUpdatingRegression class.
101      *
102      * @param numberOfVariables number of regressors to expect, not including constant
103      * @param includeConstant include a constant automatically
104      * @param errorTolerance  zero tolerance, how machine zero is determined
105      * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
106      */
107     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
108             throws MathIllegalArgumentException {
109         if (numberOfVariables < 1) {
110             throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
111         }
112         if (includeConstant) {
113             this.nvars = numberOfVariables + 1;
114         } else {
115             this.nvars = numberOfVariables;
116         }
117         this.hasIntercept = includeConstant;
118         this.nobs = 0;
119         this.d = new double[this.nvars];
120         this.rhs = new double[this.nvars];
121         this.r = new double[this.nvars * (this.nvars - 1) / 2];
122         this.tol = new double[this.nvars];
123         this.rss = new double[this.nvars];
124         this.vorder = new int[this.nvars];
125         this.x_sing = new double[this.nvars];
126         this.work_sing = new double[this.nvars];
127         this.work_tolset = new double[this.nvars];
128         this.lindep = new boolean[this.nvars];
129         for (int i = 0; i < this.nvars; i++) {
130             vorder[i] = i;
131         }
132         if (errorTolerance > 0) {
133             this.epsilon = errorTolerance;
134         } else {
135             this.epsilon = -errorTolerance;
136         }
137     }
138 
139     /**
140      * Primary constructor for the MillerUpdatingRegression.
141      *
142      * @param numberOfVariables maximum number of potential regressors
143      * @param includeConstant include a constant automatically
144      * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
145      */
146     public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
147             throws MathIllegalArgumentException {
148         this(numberOfVariables, includeConstant, Precision.EPSILON);
149     }
150 
151     /**
152      * A getter method which determines whether a constant is included.
153      * @return true regression has an intercept, false no intercept
154      */
155     @Override
156     public boolean hasIntercept() {
157         return this.hasIntercept;
158     }
159 
160     /**
161      * Gets the number of observations added to the regression model.
162      * @return number of observations
163      */
164     @Override
165     public long getN() {
166         return this.nobs;
167     }
168 
169     /**
170      * Adds an observation to the regression model.
171      * @param x the array with regressor values
172      * @param y  the value of dependent variable given these regressors
173      * @exception MathIllegalArgumentException if the length of {@code x} does not equal
174      * the number of independent variables in the model
175      */
176     @Override
177     public void addObservation(final double[] x, final double y)
178             throws MathIllegalArgumentException {
179 
180         if ((!this.hasIntercept && x.length != nvars) ||
181                (this.hasIntercept && x.length + 1 != nvars)) {
182             throw new MathIllegalArgumentException(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION,
183                     x.length, nvars);
184         }
185         if (!this.hasIntercept) {
186             include(x.clone(), 1.0, y);
187         } else {
188             final double[] tmp = new double[x.length + 1];
189             System.arraycopy(x, 0, tmp, 1, x.length);
190             tmp[0] = 1.0;
191             include(tmp, 1.0, y);
192         }
193         ++nobs;
194 
195     }
196 
197     /**
198      * Adds multiple observations to the model.
199      * @param x observations on the regressors
200      * @param y observations on the regressand
201      * @throws MathIllegalArgumentException if {@code x} is not rectangular, does not match
202      * the length of {@code y} or does not contain sufficient data to estimate the model
203      */
204     @Override
205     public void addObservations(double[][] x, double[] y) throws MathIllegalArgumentException {
206         MathUtils.checkNotNull(x, LocalizedCoreFormats.INPUT_ARRAY);
207         MathUtils.checkNotNull(y, LocalizedCoreFormats.INPUT_ARRAY);
208         MathUtils.checkDimension(x.length, y.length);
209         if (x.length == 0) {  // Must be no y data either
210             throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
211         }
212         if (x[0].length + 1 > x.length) {
213             throw new MathIllegalArgumentException(
214                   LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
215                   x.length, x[0].length);
216         }
217         for (int i = 0; i < x.length; i++) {
218             addObservation(x[i], y[i]);
219         }
220     }
221 
222     /**
223      * The include method is where the QR decomposition occurs. This statement forms all
224      * intermediate data which will be used for all derivative measures.
225      * According to the miller paper, note that in the original implementation the x vector
226      * is overwritten. In this implementation, the include method is passed a copy of the
227      * original data vector so that there is no contamination of the data. Additionally,
228      * this method differs slightly from Gentleman's method, in that the assumption is
229      * of dense design matrices, there is some advantage in using the original gentleman algorithm
230      * on sparse matrices.
231      *
232      * @param x observations on the regressors
233      * @param wi weight of the this observation (-1,1)
234      * @param yi observation on the regressand
235      */
236     private void include(final double[] x, final double wi, final double yi) {
237         int nextr = 0;
238         double w = wi;
239         double y = yi;
240         double xi;
241         double di;
242         double wxi;
243         double dpi;
244         double xk;
245         double _w;
246         this.rss_set = false;
247         sumy = smartAdd(yi, sumy);
248         sumsqy = smartAdd(sumsqy, yi * yi);
249         for (int i = 0; i < x.length; i++) {
250             if (w == 0.0) {
251                 return;
252             }
253             xi = x[i];
254 
255             if (xi == 0.0) {
256                 nextr += nvars - i - 1;
257                 continue;
258             }
259             di = d[i];
260             wxi = w * xi;
261             _w = w;
262             if (di != 0.0) {
263                 dpi = smartAdd(di, wxi * xi);
264                 final double tmp = wxi * xi / di;
265                 if (FastMath.abs(tmp) > Precision.EPSILON) {
266                     w = (di * w) / dpi;
267                 }
268             } else {
269                 dpi = wxi * xi;
270                 w = 0.0;
271             }
272             d[i] = dpi;
273             for (int k = i + 1; k < nvars; k++) {
274                 xk = x[k];
275                 x[k] = smartAdd(xk, -xi * r[nextr]);
276                 if (di != 0.0) {
277                     r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
278                 } else {
279                     r[nextr] = xk / xi;
280                 }
281                 ++nextr;
282             }
283             xk = y;
284             y = smartAdd(xk, -xi * rhs[i]);
285             if (di != 0.0) {
286                 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
287             } else {
288                 rhs[i] = xk / xi;
289             }
290         }
291         sserr = smartAdd(sserr, w * y * y);
292     }
293 
294     /**
295      * Adds to number a and b such that the contamination due to
296      * numerical smallness of one addend does not corrupt the sum.
297      * @param a - an addend
298      * @param b - an addend
299      * @return the sum of the a and b
300      */
301     private double smartAdd(double a, double b) {
302         final double _a = FastMath.abs(a);
303         final double _b = FastMath.abs(b);
304         if (_a > _b) {
305             final double eps = _a * Precision.EPSILON;
306             if (_b > eps) {
307                 return a + b;
308             }
309             return a;
310         } else {
311             final double eps = _b * Precision.EPSILON;
312             if (_a > eps) {
313                 return a + b;
314             }
315             return b;
316         }
317     }
318 
319     /**
320      * As the name suggests,  clear wipes the internals and reorders everything in the
321      * canonical order.
322      */
323     @Override
324     public void clear() {
325         Arrays.fill(this.d, 0.0);
326         Arrays.fill(this.rhs, 0.0);
327         Arrays.fill(this.r, 0.0);
328         Arrays.fill(this.tol, 0.0);
329         Arrays.fill(this.rss, 0.0);
330         Arrays.fill(this.work_tolset, 0.0);
331         Arrays.fill(this.work_sing, 0.0);
332         Arrays.fill(this.x_sing, 0.0);
333         Arrays.fill(this.lindep, false);
334         for (int i = 0; i < nvars; i++) {
335             this.vorder[i] = i;
336         }
337         this.nobs = 0;
338         this.sserr = 0.0;
339         this.sumy = 0.0;
340         this.sumsqy = 0.0;
341         this.rss_set = false;
342         this.tol_set = false;
343     }
344 
345     /**
346      * This sets up tolerances for singularity testing.
347      */
348     private void tolset() {
349         int pos;
350         double total;
351         final double eps = this.epsilon;
352         for (int i = 0; i < nvars; i++) {
353             this.work_tolset[i] = FastMath.sqrt(d[i]);
354         }
355         tol[0] = eps * this.work_tolset[0];
356         for (int col = 1; col < nvars; col++) {
357             pos = col - 1;
358             total = work_tolset[col];
359             for (int row = 0; row < col; row++) {
360                 total += FastMath.abs(r[pos]) * work_tolset[row];
361                 pos += nvars - row - 2;
362             }
363             tol[col] = eps * total;
364         }
365         tol_set = true;
366     }
367 
368     /**
369      * The regcf method conducts the linear regression and extracts the
370      * parameter vector. Notice that the algorithm can do subset regression
371      * with no alteration.
372      *
373      * @param nreq how many of the regressors to include (either in canonical
374      * order, or in the current reordered state)
375      * @return an array with the estimated slope coefficients
376      * @throws MathIllegalArgumentException if {@code nreq} is less than 1
377      * or greater than the number of independent variables
378      */
379     private double[] regcf(int nreq) throws MathIllegalArgumentException {
380         int nextr;
381         if (nreq < 1) {
382             throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
383         }
384         if (nreq > this.nvars) {
385             throw new MathIllegalArgumentException(
386                     LocalizedStatFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
387         }
388         if (!this.tol_set) {
389             tolset();
390         }
391         final double[] ret = new double[nreq];
392         boolean rankProblem = false;
393         for (int i = nreq - 1; i > -1; i--) {
394             if (FastMath.sqrt(d[i]) < tol[i]) {
395                 ret[i] = 0.0;
396                 d[i] = 0.0;
397                 rankProblem = true;
398             } else {
399                 ret[i] = rhs[i];
400                 nextr = i * (nvars + nvars - i - 1) / 2;
401                 for (int j = i + 1; j < nreq; j++) {
402                     ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
403                     ++nextr;
404                 }
405             }
406         }
407         if (rankProblem) {
408             for (int i = 0; i < nreq; i++) {
409                 if (this.lindep[i]) {
410                     ret[i] = Double.NaN;
411                 }
412             }
413         }
414         return ret;
415     }
416 
417     /**
418      * The method which checks for singularities and then eliminates the offending
419      * columns.
420      */
421     private void singcheck() {
422         int pos;
423         for (int i = 0; i < nvars; i++) {
424             work_sing[i] = FastMath.sqrt(d[i]);
425         }
426         for (int col = 0; col < nvars; col++) {
427             // Set elements within R to zero if they are less than tol(col) in
428             // absolute value after being scaled by the square root of their row
429             // multiplier
430             final double temp = tol[col];
431             pos = col - 1;
432             for (int row = 0; row < col - 1; row++) {
433                 if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
434                     r[pos] = 0.0;
435                 }
436                 pos += nvars - row - 2;
437             }
438             // If diagonal element is near zero, set it to zero, set appropriate
439             // element of LINDEP, and use INCLUD to augment the projections in
440             // the lower rows of the orthogonalization.
441             lindep[col] = false;
442             if (work_sing[col] < temp) {
443                 lindep[col] = true;
444                 if (col < nvars - 1) {
445                     Arrays.fill(x_sing, 0.0);
446                     int _pi = col * (nvars + nvars - col - 1) / 2;
447                     for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
448                         x_sing[_xi] = r[_pi];
449                         r[_pi] = 0.0;
450                     }
451                     final double y = rhs[col];
452                     final double weight = d[col];
453                     d[col] = 0.0;
454                     rhs[col] = 0.0;
455                     this.include(x_sing, weight, y);
456                 } else {
457                     sserr += d[col] * rhs[col] * rhs[col];
458                 }
459             }
460         }
461     }
462 
463     /**
464      * Calculates the sum of squared errors for the full regression
465      * and all subsets in the following manner: <pre>
466      * rss[] ={
467      * ResidualSumOfSquares_allNvars,
468      * ResidualSumOfSquares_FirstNvars-1,
469      * ResidualSumOfSquares_FirstNvars-2,
470      * ..., ResidualSumOfSquares_FirstVariable} </pre>
471      */
472     private void ss() {
473         double total = sserr;
474         rss[nvars - 1] = sserr;
475         for (int i = nvars - 1; i > 0; i--) {
476             total += d[i] * rhs[i] * rhs[i];
477             rss[i - 1] = total;
478         }
479         rss_set = true;
480     }
481 
482     /**
483      * Calculates the cov matrix assuming only the first nreq variables are
484      * included in the calculation. The returned array contains a symmetric
485      * matrix stored in lower triangular form. The matrix will have
486      * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
487      * cov =
488      * {
489      *  cov_00,
490      *  cov_10, cov_11,
491      *  cov_20, cov_21, cov22,
492      *  ...
493      * } </pre>
494      *
495      * @param nreq how many of the regressors to include (either in canonical
496      * order, or in the current reordered state)
497      * @return an array with the variance covariance of the included
498      * regressors in lower triangular form
499      */
500     private double[] cov(int nreq) {
501         if (this.nobs <= nreq) {
502             return null; // NOPMD
503         }
504         double rnk = 0.0;
505         for (int i = 0; i < nreq; i++) {
506             if (!this.lindep[i]) {
507                 rnk += 1.0;
508             }
509         }
510         final double var = rss[nreq - 1] / (nobs - rnk);
511         final double[] rinv = new double[nreq * (nreq - 1) / 2];
512         inverse(rinv, nreq);
513         final double[] covmat = new double[nreq * (nreq + 1) / 2];
514         Arrays.fill(covmat, Double.NaN);
515         int pos2;
516         int pos1;
517         int start = 0;
518         for (int row = 0; row < nreq; row++) {
519             pos2 = start;
520             if (!this.lindep[row]) {
521                 for (int col = row; col < nreq; col++) {
522                     if (!this.lindep[col]) {
523                         pos1 = start + col - row;
524                         double total;
525                         if (row == col) {
526                             total = 1.0 / d[col];
527                         } else {
528                             total = rinv[pos1 - 1] / d[col];
529                         }
530                         for (int k = col + 1; k < nreq; k++) {
531                             if (!this.lindep[k]) {
532                                 total += rinv[pos1] * rinv[pos2] / d[k];
533                             }
534                             ++pos1;
535                             ++pos2;
536                         }
537                         covmat[ (col + 1) * col / 2 + row] = total * var;
538                     } else {
539                         pos2 += nreq - col - 1;
540                     }
541                 }
542             }
543             start += nreq - row - 1;
544         }
545         return covmat;
546     }
547 
548     /**
549      * This internal method calculates the inverse of the upper-triangular portion
550      * of the R matrix.
551      * @param rinv  the storage for the inverse of r
552      * @param nreq how many of the regressors to include (either in canonical
553      * order, or in the current reordered state)
554      */
555     private void inverse(double[] rinv, int nreq) {
556         int pos = nreq * (nreq - 1) / 2 - 1;
557         Arrays.fill(rinv, Double.NaN);
558         for (int row = nreq - 1; row > 0; --row) {
559             if (!this.lindep[row]) {
560                 final int start = (row - 1) * (nvars + nvars - row) / 2;
561                 for (int col = nreq; col > row; --col) {
562                     int pos1 = start;
563                     int pos2 = pos;
564                     double total = 0.0;
565                     for (int k = row; k < col - 1; k++) {
566                         pos2 += nreq - k - 1;
567                         if (!this.lindep[k]) {
568                             total += -r[pos1] * rinv[pos2];
569                         }
570                         ++pos1;
571                     }
572                     rinv[pos] = total - r[pos1];
573                     --pos;
574                 }
575             } else {
576                 pos -= nreq - row;
577             }
578         }
579     }
580 
581     /**
582      * In the original algorithm only the partial correlations of the regressors
583      * is returned to the user. In this implementation, we have <pre>
584      * corr =
585      * {
586      *   corrxx - lower triangular
587      *   corrxy - bottom row of the matrix
588      * }
589      * Replaces subroutines PCORR and COR of:
590      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
591      *
592      * <p>Calculate partial correlations after the variables in rows
593      * 1, 2, ..., IN have been forced into the regression.
594      * If IN = 1, and the first row of R represents a constant in the
595      * model, then the usual simple correlations are returned.</p>
596      *
597      * <p>If IN = 0, the value returned in array CORMAT for the correlation
598      * of variables Xi &amp; Xj is:</p><pre>
599      * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre>
600      *
601      * <p>On return, array CORMAT contains the upper triangle of the matrix of
602      * partial correlations stored by rows, excluding the 1's on the diagonal.
603      * e.g. if IN = 2, the consecutive elements returned are:
604      * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
605      * Array YCORR stores the partial correlations with the Y-variable
606      * starting with YCORR(IN+1) = partial correlation with the variable in
607      * position (IN+1). </p>
608      *
609      * @param in how many of the regressors to include (either in canonical
610      * order, or in the current reordered state)
611      * @return an array with the partial correlations of the remainder of
612      * regressors with each other and the regressand, in lower triangular form
613      */
614     public double[] getPartialCorrelations(int in) {
615         final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
616         int pos;
617         int pos1;
618         int pos2;
619         final int rms_off = -in;
620         final int wrk_off = -(in + 1);
621         final double[] rms = new double[nvars - in];
622         final double[] work = new double[nvars - in - 1];
623         double sumxx;
624         double sumxy;
625         double sumyy;
626         final int offXX = (nvars - in) * (nvars - in - 1) / 2;
627         if (in < -1 || in >= nvars) {
628             return null; // NOPMD
629         }
630         final int nvm = nvars - 1;
631         final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
632         if (d[in] > 0.0) {
633             rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
634         }
635         for (int col = in + 1; col < nvars; col++) {
636             pos = base_pos + col - 1 - in;
637             sumxx = d[col];
638             for (int row = in; row < col; row++) {
639                 sumxx += d[row] * r[pos] * r[pos];
640                 pos += nvars - row - 2;
641             }
642             if (sumxx > 0.0) {
643                 rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
644             } else {
645                 rms[col + rms_off] = 0.0;
646             }
647         }
648         sumyy = sserr;
649         for (int row = in; row < nvars; row++) {
650             sumyy += d[row] * rhs[row] * rhs[row];
651         }
652         if (sumyy > 0.0) {
653             sumyy = 1.0 / FastMath.sqrt(sumyy);
654         }
655         pos = 0;
656         for (int col1 = in; col1 < nvars; col1++) {
657             sumxy = 0.0;
658             Arrays.fill(work, 0.0);
659             pos1 = base_pos + col1 - in - 1;
660             for (int row = in; row < col1; row++) {
661                 pos2 = pos1 + 1;
662                 for (int col2 = col1 + 1; col2 < nvars; col2++) {
663                     work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
664                     pos2++;
665                 }
666                 sumxy += d[row] * r[pos1] * rhs[row];
667                 pos1 += nvars - row - 2;
668             }
669             pos2 = pos1 + 1;
670             for (int col2 = col1 + 1; col2 < nvars; col2++) {
671                 work[col2 + wrk_off] += d[col1] * r[pos2];
672                 ++pos2;
673                 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
674                         work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
675                 ++pos;
676             }
677             sumxy += d[col1] * rhs[col1];
678             output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
679         }
680 
681         return output;
682     }
683 
684     /**
685      * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
686      * Move variable from position FROM to position TO in an
687      * orthogonal reduction produced by AS75.1.
688      *
689      * @param from initial position
690      * @param to destination
691      */
692     private void vmove(int from, int to) {
693         double d1;
694         double d2;
695         double X;
696         double d1new;
697         double d2new;
698         double cbar;
699         double sbar;
700         double Y;
701         int first;
702         int inc;
703         int m1;
704         int m2;
705         int mp1;
706         int pos;
707         boolean bSkipTo40 = false;
708         if (from == to) {
709             return;
710         }
711         if (!this.rss_set) {
712             ss();
713         }
714         final int count;
715         if (from < to) {
716             first = from;
717             inc = 1;
718             count = to - from;
719         } else {
720             first = from - 1;
721             inc = -1;
722             count = from - to;
723         }
724 
725         int m = first;
726         int idx = 0;
727         while (idx < count) {
728             m1 = m * (nvars + nvars - m - 1) / 2;
729             m2 = m1 + nvars - m - 1;
730             mp1 = m + 1;
731 
732             d1 = d[m];
733             d2 = d[mp1];
734             // Special cases.
735             if (d1 > this.epsilon || d2 > this.epsilon) {
736                 X = r[m1];
737                 if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
738                     X = 0.0;
739                 }
740                 if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
741                     d[m] = d2;
742                     d[mp1] = d1;
743                     r[m1] = 0.0;
744                     for (int col = m + 2; col < nvars; col++) {
745                         ++m1;
746                         X = r[m1];
747                         r[m1] = r[m2];
748                         r[m2] = X;
749                         ++m2;
750                     }
751                     X = rhs[m];
752                     rhs[m] = rhs[mp1];
753                     rhs[mp1] = X;
754                     bSkipTo40 = true;
755                     //break;
756                 } else if (d2 < this.epsilon) {
757                     d[m] = d1 * X * X;
758                     r[m1] = 1.0 / X;
759                     for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
760                         r[_i] /= X;
761                     }
762                     rhs[m] /= X;
763                     bSkipTo40 = true;
764                     //break;
765                 }
766                 if (!bSkipTo40) {
767                     d1new = d2 + d1 * X * X;
768                     cbar = d2 / d1new;
769                     sbar = X * d1 / d1new;
770                     d2new = d1 * cbar;
771                     d[m] = d1new;
772                     d[mp1] = d2new;
773                     r[m1] = sbar;
774                     for (int col = m + 2; col < nvars; col++) {
775                         ++m1;
776                         Y = r[m1];
777                         r[m1] = cbar * r[m2] + sbar * Y;
778                         r[m2] = Y - X * r[m2];
779                         ++m2;
780                     }
781                     Y = rhs[m];
782                     rhs[m] = cbar * rhs[mp1] + sbar * Y;
783                     rhs[mp1] = Y - X * rhs[mp1];
784                 }
785             }
786             if (m > 0) {
787                 pos = m;
788                 for (int row = 0; row < m; row++) {
789                     X = r[pos];
790                     r[pos] = r[pos - 1];
791                     r[pos - 1] = X;
792                     pos += nvars - row - 2;
793                 }
794             }
795             // Adjust variable order (VORDER), the tolerances (TOL) and
796             // the vector of residual sums of squares (RSS).
797             m1 = vorder[m];
798             vorder[m] = vorder[mp1];
799             vorder[mp1] = m1;
800             X = tol[m];
801             tol[m] = tol[mp1];
802             tol[mp1] = X;
803             rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
804 
805             m += inc;
806             ++idx;
807         }
808     }
809 
810     /**
811      * ALGORITHM AS274  APPL. STATIST. (1992) VOL.41, NO. 2
812      *
813      * <p> Re-order the variables in an orthogonal reduction produced by
814      * AS75.1 so that the N variables in LIST start at position POS1,
815      * though will not necessarily be in the same order as in LIST.
816      * Any variables in VORDER before position POS1 are not moved.
817      * Auxiliary routine called: VMOVE. </p>
818      *
819      * <p>This internal method reorders the regressors.</p>
820      *
821      * @param list the regressors to move
822      * @param pos1 where the list will be placed
823      * @return -1 error, 0 everything ok
824      */
825     private int reorderRegressors(int[] list, int pos1) {
826         int next;
827         int i;
828         int l;
829         if (list.length < 1 || list.length > nvars + 1 - pos1) {
830             return -1;
831         }
832         next = pos1;
833         i = pos1;
834         while (i < nvars) {
835             l = vorder[i];
836             for (int j = 0; j < list.length; j++) {
837                 if (l == list[j] && i > next) {
838                     this.vmove(i, next);
839                     ++next;
840                     if (next >= list.length + pos1) {
841                         return 0;
842                     } else {
843                         break;
844                     }
845                 }
846             }
847             ++i;
848         }
849         return 0;
850     }
851 
852     /**
853      * Gets the diagonal of the Hat matrix also known as the leverage matrix.
854      *
855      * @param  row_data returns the diagonal of the hat matrix for this observation
856      * @return the diagonal element of the hatmatrix
857      */
858     public double getDiagonalOfHatMatrix(double[] row_data) {
859         double[] wk = new double[this.nvars];
860         int pos;
861         double total;
862 
863         if (row_data.length > nvars) {
864             return Double.NaN;
865         }
866         double[] xrow;
867         if (this.hasIntercept) {
868             xrow = new double[row_data.length + 1];
869             xrow[0] = 1.0;
870             System.arraycopy(row_data, 0, xrow, 1, row_data.length);
871         } else {
872             xrow = row_data;
873         }
874         double hii = 0.0;
875         for (int col = 0; col < xrow.length; col++) {
876             if (FastMath.sqrt(d[col]) < tol[col]) {
877                 wk[col] = 0.0;
878             } else {
879                 pos = col - 1;
880                 total = xrow[col];
881                 for (int row = 0; row < col; row++) {
882                     total = smartAdd(total, -wk[row] * r[pos]);
883                     pos += nvars - row - 2;
884                 }
885                 wk[col] = total;
886                 hii = smartAdd(hii, (total * total) / d[col]);
887             }
888         }
889         return hii;
890     }
891 
892     /**
893      * Gets the order of the regressors, useful if some type of reordering
894      * has been called. Calling regress with int[]{} args will trigger
895      * a reordering.
896      *
897      * @return int[] with the current order of the regressors
898      */
899     public int[] getOrderOfRegressors(){
900         return vorder.clone();
901     }
902 
903     /**
904      * Conducts a regression on the data in the model, using all regressors.
905      *
906      * @return RegressionResults the structure holding all regression results
907      * @exception  MathIllegalArgumentException - thrown if number of observations is
908      * less than the number of variables
909      */
910     @Override
911     public RegressionResults regress() throws MathIllegalArgumentException {
912         return regress(this.nvars);
913     }
914 
915     /**
916      * Conducts a regression on the data in the model, using a subset of regressors.
917      *
918      * @param numberOfRegressors many of the regressors to include (either in canonical
919      * order, or in the current reordered state)
920      * @return RegressionResults the structure holding all regression results
921      * @exception  MathIllegalArgumentException - thrown if number of observations is
922      * less than the number of variables or number of regressors requested
923      * is greater than the regressors in the model
924      */
925     public RegressionResults regress(int numberOfRegressors) throws MathIllegalArgumentException {
926         if (this.nobs <= numberOfRegressors) {
927            throw new MathIllegalArgumentException(
928                    LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
929                    this.nobs, numberOfRegressors);
930         }
931         if( numberOfRegressors > this.nvars ){
932             throw new MathIllegalArgumentException(
933                     LocalizedStatFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
934         }
935 
936         tolset();
937         singcheck();
938 
939         double[] beta = this.regcf(numberOfRegressors);
940 
941         ss();
942 
943         double[] cov = this.cov(numberOfRegressors);
944 
945         int rnk = 0;
946         for (int i = 0; i < this.lindep.length; i++) {
947             if (!this.lindep[i]) {
948                 ++rnk;
949             }
950         }
951 
952         boolean needsReorder = false;
953         for (int i = 0; i < numberOfRegressors; i++) {
954             if (this.vorder[i] != i) {
955                 needsReorder = true;
956                 break;
957             }
958         }
959         if (!needsReorder) {
960             return new RegressionResults(
961                     beta, new double[][]{cov}, true, this.nobs, rnk,
962                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
963         } else {
964             double[] betaNew = new double[beta.length];
965             double[] covNew = new double[cov.length];
966 
967             int[] newIndices = new int[beta.length];
968             for (int i = 0; i < nvars; i++) {
969                 for (int j = 0; j < numberOfRegressors; j++) {
970                     if (this.vorder[j] == i) {
971                         betaNew[i] = beta[ j];
972                         newIndices[i] = j;
973                     }
974                 }
975             }
976 
977             int idx1 = 0;
978             int idx2;
979             int _i;
980             int _j;
981             for (int i = 0; i < beta.length; i++) {
982                 _i = newIndices[i];
983                 for (int j = 0; j <= i; j++, idx1++) {
984                     _j = newIndices[j];
985                     if (_i > _j) {
986                         idx2 = _i * (_i + 1) / 2 + _j;
987                     } else {
988                         idx2 = _j * (_j + 1) / 2 + _i;
989                     }
990                     covNew[idx1] = cov[idx2];
991                 }
992             }
993             return new RegressionResults(
994                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
995                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
996         }
997     }
998 
999     /**
1000      * Conducts a regression on the data in the model, using regressors in array
1001      * Calling this method will change the internal order of the regressors
1002      * and care is required in interpreting the hatmatrix.
1003      *
1004      * @param  variablesToInclude array of variables to include in regression
1005      * @return RegressionResults the structure holding all regression results
1006      * @exception  MathIllegalArgumentException - thrown if number of observations is
1007      * less than the number of variables, the number of regressors requested
1008      * is greater than the regressors in the model or a regressor index in
1009      * regressor array does not exist
1010      */
1011     @Override
1012     public RegressionResults regress(int[] variablesToInclude) throws MathIllegalArgumentException {
1013         if (variablesToInclude.length > this.nvars) {
1014             throw new MathIllegalArgumentException(
1015                     LocalizedStatFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1016         }
1017         if (this.nobs <= this.nvars) {
1018             throw new MathIllegalArgumentException(
1019                     LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1020                     this.nobs, this.nvars);
1021         }
1022         Arrays.sort(variablesToInclude);
1023         int iExclude = 0;
1024         for (int i = 0; i < variablesToInclude.length; i++) {
1025             if (i >= this.nvars) {
1026                 throw new MathIllegalArgumentException(
1027                         LocalizedCoreFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1028             }
1029             if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1030                 variablesToInclude[i] = -1;
1031                 ++iExclude;
1032             }
1033         }
1034         int[] series;
1035         if (iExclude > 0) {
1036             int j = 0;
1037             series = new int[variablesToInclude.length - iExclude];
1038             for (int i = 0; i < variablesToInclude.length; i++) {
1039                 if (variablesToInclude[i] > -1) {
1040                     series[j] = variablesToInclude[i];
1041                     ++j;
1042                 }
1043             }
1044         } else {
1045             series = variablesToInclude;
1046         }
1047 
1048         reorderRegressors(series, 0);
1049         tolset();
1050         singcheck();
1051 
1052         double[] beta = this.regcf(series.length);
1053 
1054         ss();
1055 
1056         double[] cov = this.cov(series.length);
1057 
1058         int rnk = 0;
1059         for (int i = 0; i < this.lindep.length; i++) {
1060             if (!this.lindep[i]) {
1061                 ++rnk;
1062             }
1063         }
1064 
1065         boolean needsReorder = false;
1066         for (int i = 0; i < this.nvars; i++) {
1067             if (this.vorder[i] != series[i]) {
1068                 needsReorder = true;
1069                 break;
1070             }
1071         }
1072         if (!needsReorder) {
1073             return new RegressionResults(
1074                     beta, new double[][]{cov}, true, this.nobs, rnk,
1075                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1076         } else {
1077             double[] betaNew = new double[beta.length];
1078             int[] newIndices = new int[beta.length];
1079             for (int i = 0; i < series.length; i++) {
1080                 for (int j = 0; j < this.vorder.length; j++) {
1081                     if (this.vorder[j] == series[i]) {
1082                         betaNew[i] = beta[ j];
1083                         newIndices[i] = j;
1084                     }
1085                 }
1086             }
1087             double[] covNew = new double[cov.length];
1088             int idx1 = 0;
1089             int idx2;
1090             int _i;
1091             int _j;
1092             for (int i = 0; i < beta.length; i++) {
1093                 _i = newIndices[i];
1094                 for (int j = 0; j <= i; j++, idx1++) {
1095                     _j = newIndices[j];
1096                     if (_i > _j) {
1097                         idx2 = _i * (_i + 1) / 2 + _j;
1098                     } else {
1099                         idx2 = _j * (_j + 1) / 2 + _i;
1100                     }
1101                     covNew[idx1] = cov[idx2];
1102                 }
1103             }
1104             return new RegressionResults(
1105                     betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1106                     this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1107         }
1108     }
1109 }