1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.regression;
23
24 import java.util.Arrays;
25
26 import org.hipparchus.exception.LocalizedCoreFormats;
27 import org.hipparchus.exception.MathIllegalArgumentException;
28 import org.hipparchus.stat.LocalizedStatFormats;
29 import org.hipparchus.util.FastMath;
30 import org.hipparchus.util.MathUtils;
31 import org.hipparchus.util.Precision;
32
33 /**
34 * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
35 *
36 * <p>The algorithm is described in:</p>
37 * <pre>
38 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
39 * Author(s): Alan J. Miller
40 * Source: Journal of the Royal Statistical Society.
41 * Series C (Applied Statistics), Vol. 41, No. 2
42 * (1992), pp. 458-478
43 * Published by: Blackwell Publishing for the Royal Statistical Society
44 * Stable URL: http://www.jstor.org/stable/2347583 </pre>
45 *
46 * <p>This method for multiple regression forms the solution to the OLS problem
47 * by updating the QR decomposition as described by Gentleman.</p>
48 *
49 */
50 public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
51
52 /** number of variables in regression */
53 private final int nvars;
54 /** diagonals of cross products matrix */
55 private final double[] d;
56 /** the elements of the R`Y */
57 private final double[] rhs;
58 /** the off diagonal portion of the R matrix */
59 private final double[] r;
60 /** the tolerance for each of the variables */
61 private final double[] tol;
62 /** residual sum of squares for all nested regressions */
63 private final double[] rss;
64 /** order of the regressors */
65 private final int[] vorder;
66 /** scratch space for tolerance calc */
67 private final double[] work_tolset;
68 /** number of observations entered */
69 private long nobs;
70 /** sum of squared errors of largest regression */
71 private double sserr;
72 /** has rss been called? */
73 private boolean rss_set;
74 /** has the tolerance setting method been called */
75 private boolean tol_set;
76 /** flags for variables with linear dependency problems */
77 private final boolean[] lindep;
78 /** singular x values */
79 private final double[] x_sing;
80 /** workspace for singularity method */
81 private final double[] work_sing;
82 /** summation of Y variable */
83 private double sumy;
84 /** summation of squared Y values */
85 private double sumsqy;
86 /** boolean flag whether a regression constant is added */
87 private final boolean hasIntercept;
88 /** zero tolerance */
89 private final double epsilon;
90 /**
91 * Set the default constructor to private access
92 * to prevent inadvertent instantiation
93 */
94 @SuppressWarnings("unused")
95 private MillerUpdatingRegression() {
96 this(-1, false, Double.NaN);
97 }
98
99 /**
100 * This is the augmented constructor for the MillerUpdatingRegression class.
101 *
102 * @param numberOfVariables number of regressors to expect, not including constant
103 * @param includeConstant include a constant automatically
104 * @param errorTolerance zero tolerance, how machine zero is determined
105 * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
106 */
107 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
108 throws MathIllegalArgumentException {
109 if (numberOfVariables < 1) {
110 throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
111 }
112 if (includeConstant) {
113 this.nvars = numberOfVariables + 1;
114 } else {
115 this.nvars = numberOfVariables;
116 }
117 this.hasIntercept = includeConstant;
118 this.nobs = 0;
119 this.d = new double[this.nvars];
120 this.rhs = new double[this.nvars];
121 this.r = new double[this.nvars * (this.nvars - 1) / 2];
122 this.tol = new double[this.nvars];
123 this.rss = new double[this.nvars];
124 this.vorder = new int[this.nvars];
125 this.x_sing = new double[this.nvars];
126 this.work_sing = new double[this.nvars];
127 this.work_tolset = new double[this.nvars];
128 this.lindep = new boolean[this.nvars];
129 for (int i = 0; i < this.nvars; i++) {
130 vorder[i] = i;
131 }
132 if (errorTolerance > 0) {
133 this.epsilon = errorTolerance;
134 } else {
135 this.epsilon = -errorTolerance;
136 }
137 }
138
139 /**
140 * Primary constructor for the MillerUpdatingRegression.
141 *
142 * @param numberOfVariables maximum number of potential regressors
143 * @param includeConstant include a constant automatically
144 * @throws MathIllegalArgumentException if {@code numberOfVariables is less than 1}
145 */
146 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
147 throws MathIllegalArgumentException {
148 this(numberOfVariables, includeConstant, Precision.EPSILON);
149 }
150
151 /**
152 * A getter method which determines whether a constant is included.
153 * @return true regression has an intercept, false no intercept
154 */
155 @Override
156 public boolean hasIntercept() {
157 return this.hasIntercept;
158 }
159
160 /**
161 * Gets the number of observations added to the regression model.
162 * @return number of observations
163 */
164 @Override
165 public long getN() {
166 return this.nobs;
167 }
168
169 /**
170 * Adds an observation to the regression model.
171 * @param x the array with regressor values
172 * @param y the value of dependent variable given these regressors
173 * @exception MathIllegalArgumentException if the length of {@code x} does not equal
174 * the number of independent variables in the model
175 */
176 @Override
177 public void addObservation(final double[] x, final double y)
178 throws MathIllegalArgumentException {
179
180 if ((!this.hasIntercept && x.length != nvars) ||
181 (this.hasIntercept && x.length + 1 != nvars)) {
182 throw new MathIllegalArgumentException(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION,
183 x.length, nvars);
184 }
185 if (!this.hasIntercept) {
186 include(x.clone(), 1.0, y);
187 } else {
188 final double[] tmp = new double[x.length + 1];
189 System.arraycopy(x, 0, tmp, 1, x.length);
190 tmp[0] = 1.0;
191 include(tmp, 1.0, y);
192 }
193 ++nobs;
194
195 }
196
197 /**
198 * Adds multiple observations to the model.
199 * @param x observations on the regressors
200 * @param y observations on the regressand
201 * @throws MathIllegalArgumentException if {@code x} is not rectangular, does not match
202 * the length of {@code y} or does not contain sufficient data to estimate the model
203 */
204 @Override
205 public void addObservations(double[][] x, double[] y) throws MathIllegalArgumentException {
206 MathUtils.checkNotNull(x, LocalizedCoreFormats.INPUT_ARRAY);
207 MathUtils.checkNotNull(y, LocalizedCoreFormats.INPUT_ARRAY);
208 MathUtils.checkDimension(x.length, y.length);
209 if (x.length == 0) { // Must be no y data either
210 throw new MathIllegalArgumentException(LocalizedCoreFormats.NO_DATA);
211 }
212 if (x[0].length + 1 > x.length) {
213 throw new MathIllegalArgumentException(
214 LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
215 x.length, x[0].length);
216 }
217 for (int i = 0; i < x.length; i++) {
218 addObservation(x[i], y[i]);
219 }
220 }
221
222 /**
223 * The include method is where the QR decomposition occurs. This statement forms all
224 * intermediate data which will be used for all derivative measures.
225 * According to the miller paper, note that in the original implementation the x vector
226 * is overwritten. In this implementation, the include method is passed a copy of the
227 * original data vector so that there is no contamination of the data. Additionally,
228 * this method differs slightly from Gentleman's method, in that the assumption is
229 * of dense design matrices, there is some advantage in using the original gentleman algorithm
230 * on sparse matrices.
231 *
232 * @param x observations on the regressors
233 * @param wi weight of the this observation (-1,1)
234 * @param yi observation on the regressand
235 */
236 private void include(final double[] x, final double wi, final double yi) {
237 int nextr = 0;
238 double w = wi;
239 double y = yi;
240 double xi;
241 double di;
242 double wxi;
243 double dpi;
244 double xk;
245 double _w;
246 this.rss_set = false;
247 sumy = smartAdd(yi, sumy);
248 sumsqy = smartAdd(sumsqy, yi * yi);
249 for (int i = 0; i < x.length; i++) {
250 if (w == 0.0) {
251 return;
252 }
253 xi = x[i];
254
255 if (xi == 0.0) {
256 nextr += nvars - i - 1;
257 continue;
258 }
259 di = d[i];
260 wxi = w * xi;
261 _w = w;
262 if (di != 0.0) {
263 dpi = smartAdd(di, wxi * xi);
264 final double tmp = wxi * xi / di;
265 if (FastMath.abs(tmp) > Precision.EPSILON) {
266 w = (di * w) / dpi;
267 }
268 } else {
269 dpi = wxi * xi;
270 w = 0.0;
271 }
272 d[i] = dpi;
273 for (int k = i + 1; k < nvars; k++) {
274 xk = x[k];
275 x[k] = smartAdd(xk, -xi * r[nextr]);
276 if (di != 0.0) {
277 r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
278 } else {
279 r[nextr] = xk / xi;
280 }
281 ++nextr;
282 }
283 xk = y;
284 y = smartAdd(xk, -xi * rhs[i]);
285 if (di != 0.0) {
286 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
287 } else {
288 rhs[i] = xk / xi;
289 }
290 }
291 sserr = smartAdd(sserr, w * y * y);
292 }
293
294 /**
295 * Adds to number a and b such that the contamination due to
296 * numerical smallness of one addend does not corrupt the sum.
297 * @param a - an addend
298 * @param b - an addend
299 * @return the sum of the a and b
300 */
301 private double smartAdd(double a, double b) {
302 final double _a = FastMath.abs(a);
303 final double _b = FastMath.abs(b);
304 if (_a > _b) {
305 final double eps = _a * Precision.EPSILON;
306 if (_b > eps) {
307 return a + b;
308 }
309 return a;
310 } else {
311 final double eps = _b * Precision.EPSILON;
312 if (_a > eps) {
313 return a + b;
314 }
315 return b;
316 }
317 }
318
319 /**
320 * As the name suggests, clear wipes the internals and reorders everything in the
321 * canonical order.
322 */
323 @Override
324 public void clear() {
325 Arrays.fill(this.d, 0.0);
326 Arrays.fill(this.rhs, 0.0);
327 Arrays.fill(this.r, 0.0);
328 Arrays.fill(this.tol, 0.0);
329 Arrays.fill(this.rss, 0.0);
330 Arrays.fill(this.work_tolset, 0.0);
331 Arrays.fill(this.work_sing, 0.0);
332 Arrays.fill(this.x_sing, 0.0);
333 Arrays.fill(this.lindep, false);
334 for (int i = 0; i < nvars; i++) {
335 this.vorder[i] = i;
336 }
337 this.nobs = 0;
338 this.sserr = 0.0;
339 this.sumy = 0.0;
340 this.sumsqy = 0.0;
341 this.rss_set = false;
342 this.tol_set = false;
343 }
344
345 /**
346 * This sets up tolerances for singularity testing.
347 */
348 private void tolset() {
349 int pos;
350 double total;
351 final double eps = this.epsilon;
352 for (int i = 0; i < nvars; i++) {
353 this.work_tolset[i] = FastMath.sqrt(d[i]);
354 }
355 tol[0] = eps * this.work_tolset[0];
356 for (int col = 1; col < nvars; col++) {
357 pos = col - 1;
358 total = work_tolset[col];
359 for (int row = 0; row < col; row++) {
360 total += FastMath.abs(r[pos]) * work_tolset[row];
361 pos += nvars - row - 2;
362 }
363 tol[col] = eps * total;
364 }
365 tol_set = true;
366 }
367
368 /**
369 * The regcf method conducts the linear regression and extracts the
370 * parameter vector. Notice that the algorithm can do subset regression
371 * with no alteration.
372 *
373 * @param nreq how many of the regressors to include (either in canonical
374 * order, or in the current reordered state)
375 * @return an array with the estimated slope coefficients
376 * @throws MathIllegalArgumentException if {@code nreq} is less than 1
377 * or greater than the number of independent variables
378 */
379 private double[] regcf(int nreq) throws MathIllegalArgumentException {
380 int nextr;
381 if (nreq < 1) {
382 throw new MathIllegalArgumentException(LocalizedStatFormats.NO_REGRESSORS);
383 }
384 if (nreq > this.nvars) {
385 throw new MathIllegalArgumentException(
386 LocalizedStatFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
387 }
388 if (!this.tol_set) {
389 tolset();
390 }
391 final double[] ret = new double[nreq];
392 boolean rankProblem = false;
393 for (int i = nreq - 1; i > -1; i--) {
394 if (FastMath.sqrt(d[i]) < tol[i]) {
395 ret[i] = 0.0;
396 d[i] = 0.0;
397 rankProblem = true;
398 } else {
399 ret[i] = rhs[i];
400 nextr = i * (nvars + nvars - i - 1) / 2;
401 for (int j = i + 1; j < nreq; j++) {
402 ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
403 ++nextr;
404 }
405 }
406 }
407 if (rankProblem) {
408 for (int i = 0; i < nreq; i++) {
409 if (this.lindep[i]) {
410 ret[i] = Double.NaN;
411 }
412 }
413 }
414 return ret;
415 }
416
417 /**
418 * The method which checks for singularities and then eliminates the offending
419 * columns.
420 */
421 private void singcheck() {
422 int pos;
423 for (int i = 0; i < nvars; i++) {
424 work_sing[i] = FastMath.sqrt(d[i]);
425 }
426 for (int col = 0; col < nvars; col++) {
427 // Set elements within R to zero if they are less than tol(col) in
428 // absolute value after being scaled by the square root of their row
429 // multiplier
430 final double temp = tol[col];
431 pos = col - 1;
432 for (int row = 0; row < col - 1; row++) {
433 if (FastMath.abs(r[pos]) * work_sing[row] < temp) {
434 r[pos] = 0.0;
435 }
436 pos += nvars - row - 2;
437 }
438 // If diagonal element is near zero, set it to zero, set appropriate
439 // element of LINDEP, and use INCLUD to augment the projections in
440 // the lower rows of the orthogonalization.
441 lindep[col] = false;
442 if (work_sing[col] < temp) {
443 lindep[col] = true;
444 if (col < nvars - 1) {
445 Arrays.fill(x_sing, 0.0);
446 int _pi = col * (nvars + nvars - col - 1) / 2;
447 for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
448 x_sing[_xi] = r[_pi];
449 r[_pi] = 0.0;
450 }
451 final double y = rhs[col];
452 final double weight = d[col];
453 d[col] = 0.0;
454 rhs[col] = 0.0;
455 this.include(x_sing, weight, y);
456 } else {
457 sserr += d[col] * rhs[col] * rhs[col];
458 }
459 }
460 }
461 }
462
463 /**
464 * Calculates the sum of squared errors for the full regression
465 * and all subsets in the following manner: <pre>
466 * rss[] ={
467 * ResidualSumOfSquares_allNvars,
468 * ResidualSumOfSquares_FirstNvars-1,
469 * ResidualSumOfSquares_FirstNvars-2,
470 * ..., ResidualSumOfSquares_FirstVariable} </pre>
471 */
472 private void ss() {
473 double total = sserr;
474 rss[nvars - 1] = sserr;
475 for (int i = nvars - 1; i > 0; i--) {
476 total += d[i] * rhs[i] * rhs[i];
477 rss[i - 1] = total;
478 }
479 rss_set = true;
480 }
481
482 /**
483 * Calculates the cov matrix assuming only the first nreq variables are
484 * included in the calculation. The returned array contains a symmetric
485 * matrix stored in lower triangular form. The matrix will have
486 * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
487 * cov =
488 * {
489 * cov_00,
490 * cov_10, cov_11,
491 * cov_20, cov_21, cov22,
492 * ...
493 * } </pre>
494 *
495 * @param nreq how many of the regressors to include (either in canonical
496 * order, or in the current reordered state)
497 * @return an array with the variance covariance of the included
498 * regressors in lower triangular form
499 */
500 private double[] cov(int nreq) {
501 if (this.nobs <= nreq) {
502 return null; // NOPMD
503 }
504 double rnk = 0.0;
505 for (int i = 0; i < nreq; i++) {
506 if (!this.lindep[i]) {
507 rnk += 1.0;
508 }
509 }
510 final double var = rss[nreq - 1] / (nobs - rnk);
511 final double[] rinv = new double[nreq * (nreq - 1) / 2];
512 inverse(rinv, nreq);
513 final double[] covmat = new double[nreq * (nreq + 1) / 2];
514 Arrays.fill(covmat, Double.NaN);
515 int pos2;
516 int pos1;
517 int start = 0;
518 for (int row = 0; row < nreq; row++) {
519 pos2 = start;
520 if (!this.lindep[row]) {
521 for (int col = row; col < nreq; col++) {
522 if (!this.lindep[col]) {
523 pos1 = start + col - row;
524 double total;
525 if (row == col) {
526 total = 1.0 / d[col];
527 } else {
528 total = rinv[pos1 - 1] / d[col];
529 }
530 for (int k = col + 1; k < nreq; k++) {
531 if (!this.lindep[k]) {
532 total += rinv[pos1] * rinv[pos2] / d[k];
533 }
534 ++pos1;
535 ++pos2;
536 }
537 covmat[ (col + 1) * col / 2 + row] = total * var;
538 } else {
539 pos2 += nreq - col - 1;
540 }
541 }
542 }
543 start += nreq - row - 1;
544 }
545 return covmat;
546 }
547
548 /**
549 * This internal method calculates the inverse of the upper-triangular portion
550 * of the R matrix.
551 * @param rinv the storage for the inverse of r
552 * @param nreq how many of the regressors to include (either in canonical
553 * order, or in the current reordered state)
554 */
555 private void inverse(double[] rinv, int nreq) {
556 int pos = nreq * (nreq - 1) / 2 - 1;
557 Arrays.fill(rinv, Double.NaN);
558 for (int row = nreq - 1; row > 0; --row) {
559 if (!this.lindep[row]) {
560 final int start = (row - 1) * (nvars + nvars - row) / 2;
561 for (int col = nreq; col > row; --col) {
562 int pos1 = start;
563 int pos2 = pos;
564 double total = 0.0;
565 for (int k = row; k < col - 1; k++) {
566 pos2 += nreq - k - 1;
567 if (!this.lindep[k]) {
568 total += -r[pos1] * rinv[pos2];
569 }
570 ++pos1;
571 }
572 rinv[pos] = total - r[pos1];
573 --pos;
574 }
575 } else {
576 pos -= nreq - row;
577 }
578 }
579 }
580
581 /**
582 * In the original algorithm only the partial correlations of the regressors
583 * is returned to the user. In this implementation, we have <pre>
584 * corr =
585 * {
586 * corrxx - lower triangular
587 * corrxy - bottom row of the matrix
588 * }
589 * Replaces subroutines PCORR and COR of:
590 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
591 *
592 * <p>Calculate partial correlations after the variables in rows
593 * 1, 2, ..., IN have been forced into the regression.
594 * If IN = 1, and the first row of R represents a constant in the
595 * model, then the usual simple correlations are returned.</p>
596 *
597 * <p>If IN = 0, the value returned in array CORMAT for the correlation
598 * of variables Xi & Xj is:</p><pre>
599 * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre>
600 *
601 * <p>On return, array CORMAT contains the upper triangle of the matrix of
602 * partial correlations stored by rows, excluding the 1's on the diagonal.
603 * e.g. if IN = 2, the consecutive elements returned are:
604 * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
605 * Array YCORR stores the partial correlations with the Y-variable
606 * starting with YCORR(IN+1) = partial correlation with the variable in
607 * position (IN+1). </p>
608 *
609 * @param in how many of the regressors to include (either in canonical
610 * order, or in the current reordered state)
611 * @return an array with the partial correlations of the remainder of
612 * regressors with each other and the regressand, in lower triangular form
613 */
614 public double[] getPartialCorrelations(int in) {
615 final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
616 int pos;
617 int pos1;
618 int pos2;
619 final int rms_off = -in;
620 final int wrk_off = -(in + 1);
621 final double[] rms = new double[nvars - in];
622 final double[] work = new double[nvars - in - 1];
623 double sumxx;
624 double sumxy;
625 double sumyy;
626 final int offXX = (nvars - in) * (nvars - in - 1) / 2;
627 if (in < -1 || in >= nvars) {
628 return null; // NOPMD
629 }
630 final int nvm = nvars - 1;
631 final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
632 if (d[in] > 0.0) {
633 rms[in + rms_off] = 1.0 / FastMath.sqrt(d[in]);
634 }
635 for (int col = in + 1; col < nvars; col++) {
636 pos = base_pos + col - 1 - in;
637 sumxx = d[col];
638 for (int row = in; row < col; row++) {
639 sumxx += d[row] * r[pos] * r[pos];
640 pos += nvars - row - 2;
641 }
642 if (sumxx > 0.0) {
643 rms[col + rms_off] = 1.0 / FastMath.sqrt(sumxx);
644 } else {
645 rms[col + rms_off] = 0.0;
646 }
647 }
648 sumyy = sserr;
649 for (int row = in; row < nvars; row++) {
650 sumyy += d[row] * rhs[row] * rhs[row];
651 }
652 if (sumyy > 0.0) {
653 sumyy = 1.0 / FastMath.sqrt(sumyy);
654 }
655 pos = 0;
656 for (int col1 = in; col1 < nvars; col1++) {
657 sumxy = 0.0;
658 Arrays.fill(work, 0.0);
659 pos1 = base_pos + col1 - in - 1;
660 for (int row = in; row < col1; row++) {
661 pos2 = pos1 + 1;
662 for (int col2 = col1 + 1; col2 < nvars; col2++) {
663 work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
664 pos2++;
665 }
666 sumxy += d[row] * r[pos1] * rhs[row];
667 pos1 += nvars - row - 2;
668 }
669 pos2 = pos1 + 1;
670 for (int col2 = col1 + 1; col2 < nvars; col2++) {
671 work[col2 + wrk_off] += d[col1] * r[pos2];
672 ++pos2;
673 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
674 work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
675 ++pos;
676 }
677 sumxy += d[col1] * rhs[col1];
678 output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
679 }
680
681 return output;
682 }
683
684 /**
685 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
686 * Move variable from position FROM to position TO in an
687 * orthogonal reduction produced by AS75.1.
688 *
689 * @param from initial position
690 * @param to destination
691 */
692 private void vmove(int from, int to) {
693 double d1;
694 double d2;
695 double X;
696 double d1new;
697 double d2new;
698 double cbar;
699 double sbar;
700 double Y;
701 int first;
702 int inc;
703 int m1;
704 int m2;
705 int mp1;
706 int pos;
707 boolean bSkipTo40 = false;
708 if (from == to) {
709 return;
710 }
711 if (!this.rss_set) {
712 ss();
713 }
714 final int count;
715 if (from < to) {
716 first = from;
717 inc = 1;
718 count = to - from;
719 } else {
720 first = from - 1;
721 inc = -1;
722 count = from - to;
723 }
724
725 int m = first;
726 int idx = 0;
727 while (idx < count) {
728 m1 = m * (nvars + nvars - m - 1) / 2;
729 m2 = m1 + nvars - m - 1;
730 mp1 = m + 1;
731
732 d1 = d[m];
733 d2 = d[mp1];
734 // Special cases.
735 if (d1 > this.epsilon || d2 > this.epsilon) {
736 X = r[m1];
737 if (FastMath.abs(X) * FastMath.sqrt(d1) < tol[mp1]) {
738 X = 0.0;
739 }
740 if (d1 < this.epsilon || FastMath.abs(X) < this.epsilon) {
741 d[m] = d2;
742 d[mp1] = d1;
743 r[m1] = 0.0;
744 for (int col = m + 2; col < nvars; col++) {
745 ++m1;
746 X = r[m1];
747 r[m1] = r[m2];
748 r[m2] = X;
749 ++m2;
750 }
751 X = rhs[m];
752 rhs[m] = rhs[mp1];
753 rhs[mp1] = X;
754 bSkipTo40 = true;
755 //break;
756 } else if (d2 < this.epsilon) {
757 d[m] = d1 * X * X;
758 r[m1] = 1.0 / X;
759 for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
760 r[_i] /= X;
761 }
762 rhs[m] /= X;
763 bSkipTo40 = true;
764 //break;
765 }
766 if (!bSkipTo40) {
767 d1new = d2 + d1 * X * X;
768 cbar = d2 / d1new;
769 sbar = X * d1 / d1new;
770 d2new = d1 * cbar;
771 d[m] = d1new;
772 d[mp1] = d2new;
773 r[m1] = sbar;
774 for (int col = m + 2; col < nvars; col++) {
775 ++m1;
776 Y = r[m1];
777 r[m1] = cbar * r[m2] + sbar * Y;
778 r[m2] = Y - X * r[m2];
779 ++m2;
780 }
781 Y = rhs[m];
782 rhs[m] = cbar * rhs[mp1] + sbar * Y;
783 rhs[mp1] = Y - X * rhs[mp1];
784 }
785 }
786 if (m > 0) {
787 pos = m;
788 for (int row = 0; row < m; row++) {
789 X = r[pos];
790 r[pos] = r[pos - 1];
791 r[pos - 1] = X;
792 pos += nvars - row - 2;
793 }
794 }
795 // Adjust variable order (VORDER), the tolerances (TOL) and
796 // the vector of residual sums of squares (RSS).
797 m1 = vorder[m];
798 vorder[m] = vorder[mp1];
799 vorder[mp1] = m1;
800 X = tol[m];
801 tol[m] = tol[mp1];
802 tol[mp1] = X;
803 rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
804
805 m += inc;
806 ++idx;
807 }
808 }
809
810 /**
811 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
812 *
813 * <p> Re-order the variables in an orthogonal reduction produced by
814 * AS75.1 so that the N variables in LIST start at position POS1,
815 * though will not necessarily be in the same order as in LIST.
816 * Any variables in VORDER before position POS1 are not moved.
817 * Auxiliary routine called: VMOVE. </p>
818 *
819 * <p>This internal method reorders the regressors.</p>
820 *
821 * @param list the regressors to move
822 * @param pos1 where the list will be placed
823 * @return -1 error, 0 everything ok
824 */
825 private int reorderRegressors(int[] list, int pos1) {
826 int next;
827 int i;
828 int l;
829 if (list.length < 1 || list.length > nvars + 1 - pos1) {
830 return -1;
831 }
832 next = pos1;
833 i = pos1;
834 while (i < nvars) {
835 l = vorder[i];
836 for (int j = 0; j < list.length; j++) {
837 if (l == list[j] && i > next) {
838 this.vmove(i, next);
839 ++next;
840 if (next >= list.length + pos1) {
841 return 0;
842 } else {
843 break;
844 }
845 }
846 }
847 ++i;
848 }
849 return 0;
850 }
851
852 /**
853 * Gets the diagonal of the Hat matrix also known as the leverage matrix.
854 *
855 * @param row_data returns the diagonal of the hat matrix for this observation
856 * @return the diagonal element of the hatmatrix
857 */
858 public double getDiagonalOfHatMatrix(double[] row_data) {
859 double[] wk = new double[this.nvars];
860 int pos;
861 double total;
862
863 if (row_data.length > nvars) {
864 return Double.NaN;
865 }
866 double[] xrow;
867 if (this.hasIntercept) {
868 xrow = new double[row_data.length + 1];
869 xrow[0] = 1.0;
870 System.arraycopy(row_data, 0, xrow, 1, row_data.length);
871 } else {
872 xrow = row_data;
873 }
874 double hii = 0.0;
875 for (int col = 0; col < xrow.length; col++) {
876 if (FastMath.sqrt(d[col]) < tol[col]) {
877 wk[col] = 0.0;
878 } else {
879 pos = col - 1;
880 total = xrow[col];
881 for (int row = 0; row < col; row++) {
882 total = smartAdd(total, -wk[row] * r[pos]);
883 pos += nvars - row - 2;
884 }
885 wk[col] = total;
886 hii = smartAdd(hii, (total * total) / d[col]);
887 }
888 }
889 return hii;
890 }
891
892 /**
893 * Gets the order of the regressors, useful if some type of reordering
894 * has been called. Calling regress with int[]{} args will trigger
895 * a reordering.
896 *
897 * @return int[] with the current order of the regressors
898 */
899 public int[] getOrderOfRegressors(){
900 return vorder.clone();
901 }
902
903 /**
904 * Conducts a regression on the data in the model, using all regressors.
905 *
906 * @return RegressionResults the structure holding all regression results
907 * @exception MathIllegalArgumentException - thrown if number of observations is
908 * less than the number of variables
909 */
910 @Override
911 public RegressionResults regress() throws MathIllegalArgumentException {
912 return regress(this.nvars);
913 }
914
915 /**
916 * Conducts a regression on the data in the model, using a subset of regressors.
917 *
918 * @param numberOfRegressors many of the regressors to include (either in canonical
919 * order, or in the current reordered state)
920 * @return RegressionResults the structure holding all regression results
921 * @exception MathIllegalArgumentException - thrown if number of observations is
922 * less than the number of variables or number of regressors requested
923 * is greater than the regressors in the model
924 */
925 public RegressionResults regress(int numberOfRegressors) throws MathIllegalArgumentException {
926 if (this.nobs <= numberOfRegressors) {
927 throw new MathIllegalArgumentException(
928 LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
929 this.nobs, numberOfRegressors);
930 }
931 if( numberOfRegressors > this.nvars ){
932 throw new MathIllegalArgumentException(
933 LocalizedStatFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
934 }
935
936 tolset();
937 singcheck();
938
939 double[] beta = this.regcf(numberOfRegressors);
940
941 ss();
942
943 double[] cov = this.cov(numberOfRegressors);
944
945 int rnk = 0;
946 for (int i = 0; i < this.lindep.length; i++) {
947 if (!this.lindep[i]) {
948 ++rnk;
949 }
950 }
951
952 boolean needsReorder = false;
953 for (int i = 0; i < numberOfRegressors; i++) {
954 if (this.vorder[i] != i) {
955 needsReorder = true;
956 break;
957 }
958 }
959 if (!needsReorder) {
960 return new RegressionResults(
961 beta, new double[][]{cov}, true, this.nobs, rnk,
962 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
963 } else {
964 double[] betaNew = new double[beta.length];
965 double[] covNew = new double[cov.length];
966
967 int[] newIndices = new int[beta.length];
968 for (int i = 0; i < nvars; i++) {
969 for (int j = 0; j < numberOfRegressors; j++) {
970 if (this.vorder[j] == i) {
971 betaNew[i] = beta[ j];
972 newIndices[i] = j;
973 }
974 }
975 }
976
977 int idx1 = 0;
978 int idx2;
979 int _i;
980 int _j;
981 for (int i = 0; i < beta.length; i++) {
982 _i = newIndices[i];
983 for (int j = 0; j <= i; j++, idx1++) {
984 _j = newIndices[j];
985 if (_i > _j) {
986 idx2 = _i * (_i + 1) / 2 + _j;
987 } else {
988 idx2 = _j * (_j + 1) / 2 + _i;
989 }
990 covNew[idx1] = cov[idx2];
991 }
992 }
993 return new RegressionResults(
994 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
995 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
996 }
997 }
998
999 /**
1000 * Conducts a regression on the data in the model, using regressors in array
1001 * Calling this method will change the internal order of the regressors
1002 * and care is required in interpreting the hatmatrix.
1003 *
1004 * @param variablesToInclude array of variables to include in regression
1005 * @return RegressionResults the structure holding all regression results
1006 * @exception MathIllegalArgumentException - thrown if number of observations is
1007 * less than the number of variables, the number of regressors requested
1008 * is greater than the regressors in the model or a regressor index in
1009 * regressor array does not exist
1010 */
1011 @Override
1012 public RegressionResults regress(int[] variablesToInclude) throws MathIllegalArgumentException {
1013 if (variablesToInclude.length > this.nvars) {
1014 throw new MathIllegalArgumentException(
1015 LocalizedStatFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1016 }
1017 if (this.nobs <= this.nvars) {
1018 throw new MathIllegalArgumentException(
1019 LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1020 this.nobs, this.nvars);
1021 }
1022 Arrays.sort(variablesToInclude);
1023 int iExclude = 0;
1024 for (int i = 0; i < variablesToInclude.length; i++) {
1025 if (i >= this.nvars) {
1026 throw new MathIllegalArgumentException(
1027 LocalizedCoreFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1028 }
1029 if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1030 variablesToInclude[i] = -1;
1031 ++iExclude;
1032 }
1033 }
1034 int[] series;
1035 if (iExclude > 0) {
1036 int j = 0;
1037 series = new int[variablesToInclude.length - iExclude];
1038 for (int i = 0; i < variablesToInclude.length; i++) {
1039 if (variablesToInclude[i] > -1) {
1040 series[j] = variablesToInclude[i];
1041 ++j;
1042 }
1043 }
1044 } else {
1045 series = variablesToInclude;
1046 }
1047
1048 reorderRegressors(series, 0);
1049 tolset();
1050 singcheck();
1051
1052 double[] beta = this.regcf(series.length);
1053
1054 ss();
1055
1056 double[] cov = this.cov(series.length);
1057
1058 int rnk = 0;
1059 for (int i = 0; i < this.lindep.length; i++) {
1060 if (!this.lindep[i]) {
1061 ++rnk;
1062 }
1063 }
1064
1065 boolean needsReorder = false;
1066 for (int i = 0; i < this.nvars; i++) {
1067 if (this.vorder[i] != series[i]) {
1068 needsReorder = true;
1069 break;
1070 }
1071 }
1072 if (!needsReorder) {
1073 return new RegressionResults(
1074 beta, new double[][]{cov}, true, this.nobs, rnk,
1075 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1076 } else {
1077 double[] betaNew = new double[beta.length];
1078 int[] newIndices = new int[beta.length];
1079 for (int i = 0; i < series.length; i++) {
1080 for (int j = 0; j < this.vorder.length; j++) {
1081 if (this.vorder[j] == series[i]) {
1082 betaNew[i] = beta[ j];
1083 newIndices[i] = j;
1084 }
1085 }
1086 }
1087 double[] covNew = new double[cov.length];
1088 int idx1 = 0;
1089 int idx2;
1090 int _i;
1091 int _j;
1092 for (int i = 0; i < beta.length; i++) {
1093 _i = newIndices[i];
1094 for (int j = 0; j <= i; j++, idx1++) {
1095 _j = newIndices[j];
1096 if (_i > _j) {
1097 idx2 = _i * (_i + 1) / 2 + _j;
1098 } else {
1099 idx2 = _j * (_j + 1) / 2 + _i;
1100 }
1101 covNew[idx1] = cov[idx2];
1102 }
1103 }
1104 return new RegressionResults(
1105 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1106 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1107 }
1108 }
1109 }