View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.linear;
24  
25  import java.io.Serializable;
26  import java.util.Arrays;
27  
28  import org.hipparchus.exception.LocalizedCoreFormats;
29  import org.hipparchus.exception.MathIllegalArgumentException;
30  import org.hipparchus.exception.NullArgumentException;
31  import org.hipparchus.util.FastMath;
32  import org.hipparchus.util.MathUtils;
33  
34  /**
35   * Cache-friendly implementation of RealMatrix using a flat arrays to store
36   * square blocks of the matrix.
37   * <p>
38   * This implementation is specially designed to be cache-friendly. Square blocks are
39   * stored as small arrays and allow efficient traversal of data both in row major direction
40   * and columns major direction, one block at a time. This greatly increases performances
41   * for algorithms that use crossed directions loops like multiplication or transposition.
42   * </p>
43   * <p>
44   * The size of square blocks is a static parameter. It may be tuned according to the cache
45   * size of the target computer processor. As a rule of thumbs, it should be the largest
46   * value that allows three blocks to be simultaneously cached (this is necessary for example
47   * for matrix multiplication). The default value is to use 52x52 blocks which is well suited
48   * for processors with 64k L1 cache (one block holds 2704 values or 21632 bytes). This value
49   * could be lowered to 36x36 for processors with 32k L1 cache.
50   * </p>
51   * <p>
52   * The regular blocks represent {@link #BLOCK_SIZE} x {@link #BLOCK_SIZE} squares. Blocks
53   * at right hand side and bottom side may be smaller to fit matrix dimensions. The square
54   * blocks are flattened in row major order in single dimension arrays which are therefore
55   * {@link #BLOCK_SIZE}<sup>2</sup> elements long for regular blocks. The blocks are themselves
56   * organized in row major order.
57   * </p>
58   * <p>
59   * As an example, for a block size of 52x52, a 100x60 matrix would be stored in 4 blocks.
60   * Block 0 would be a {@code double[2704]} array holding the upper left 52x52 square, block 1
61   * would be a {@code double[416]} array holding the upper right 52x8 rectangle, block 2 would
62   * be a {@code double[2496]} array holding the lower left 48x52 rectangle and block 3 would
63   * be a {@code double[384]} array holding the lower right 48x8 rectangle.
64   * </p>
65   * <p>
66   * The layout complexity overhead versus simple mapping of matrices to java
67   * arrays is negligible for small matrices (about 1%). The gain from cache efficiency leads
68   * to up to 3-fold improvements for matrices of moderate to large size.
69   * </p>
70   */
71  public class BlockRealMatrix extends AbstractRealMatrix implements Serializable {
72      /** Block size. */
73      public static final int BLOCK_SIZE = 52;
74      /** Serializable version identifier */
75      private static final long serialVersionUID = 4991895511313664478L;
76      /** Blocks of matrix entries. */
77      private final double blocks[][];
78      /** Number of rows of the matrix. */
79      private final int rows;
80      /** Number of columns of the matrix. */
81      private final int columns;
82      /** Number of block rows of the matrix. */
83      private final int blockRows;
84      /** Number of block columns of the matrix. */
85      private final int blockColumns;
86  
87      /**
88       * Create a new matrix with the supplied row and column dimensions.
89       *
90       * @param rows  the number of rows in the new matrix
91       * @param columns  the number of columns in the new matrix
92       * @throws MathIllegalArgumentException if row or column dimension is not
93       * positive.
94       */
95      public BlockRealMatrix(final int rows, final int columns)
96          throws MathIllegalArgumentException {
97          super(rows, columns);
98          this.rows = rows;
99          this.columns = columns;
100 
101         // number of blocks
102         blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
103         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
104 
105         // allocate storage blocks, taking care of smaller ones at right and bottom
106         blocks = createBlocksLayout(rows, columns);
107     }
108 
109     /**
110      * Create a new dense matrix copying entries from raw layout data.
111      * <p>The input array <em>must</em> already be in raw layout.</p>
112      * <p>Calling this constructor is equivalent to call:</p>
113      * <pre>matrix = new BlockRealMatrix(rawData.length, rawData[0].length,
114      *                                   toBlocksLayout(rawData), false);</pre>
115      *
116      * @param rawData data for new matrix, in raw layout
117      * @throws MathIllegalArgumentException if the shape of {@code blockData} is
118      * inconsistent with block layout.
119      * @throws MathIllegalArgumentException if row or column dimension is not
120      * positive.
121      * @see #BlockRealMatrix(int, int, double[][], boolean)
122      */
123     public BlockRealMatrix(final double[][] rawData)
124         throws MathIllegalArgumentException {
125         this(rawData.length, rawData[0].length, toBlocksLayout(rawData), false);
126     }
127 
128     /**
129      * Create a new dense matrix copying entries from block layout data.
130      * <p>The input array <em>must</em> already be in blocks layout.</p>
131      *
132      * @param rows Number of rows in the new matrix.
133      * @param columns Number of columns in the new matrix.
134      * @param blockData data for new matrix
135      * @param copyArray Whether the input array will be copied or referenced.
136      * @throws MathIllegalArgumentException if the shape of {@code blockData} is
137      * inconsistent with block layout.
138      * @throws MathIllegalArgumentException if row or column dimension is not
139      * positive.
140      * @see #createBlocksLayout(int, int)
141      * @see #toBlocksLayout(double[][])
142      * @see #BlockRealMatrix(double[][])
143      */
144     public BlockRealMatrix(final int rows, final int columns,
145                            final double[][] blockData, final boolean copyArray)
146         throws MathIllegalArgumentException {
147         super(rows, columns);
148         this.rows = rows;
149         this.columns = columns;
150 
151         // number of blocks
152         blockRows = (rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
153         blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
154 
155         if (copyArray) {
156             // allocate storage blocks, taking care of smaller ones at right and bottom
157             blocks = new double[blockRows * blockColumns][];
158         } else {
159             // reference existing array
160             blocks = blockData; // NOPMD - array copy is taken care of by parameter
161         }
162 
163         int index = 0;
164         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
165             final int iHeight = blockHeight(iBlock);
166             for (int jBlock = 0; jBlock < blockColumns; ++jBlock, ++index) {
167                 if (blockData[index].length != iHeight * blockWidth(jBlock)) {
168                     throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
169                                                            blockData[index].length,
170                                                            iHeight * blockWidth(jBlock));
171                 }
172                 if (copyArray) {
173                     blocks[index] = blockData[index].clone();
174                 }
175             }
176         }
177     }
178 
179     /**
180      * Convert a data array from raw layout to blocks layout.
181      * <p>
182      * Raw layout is the straightforward layout where element at row i and
183      * column j is in array element <code>rawData[i][j]</code>. Blocks layout
184      * is the layout used in {@link BlockRealMatrix} instances, where the matrix
185      * is split in square blocks (except at right and bottom side where blocks may
186      * be rectangular to fit matrix size) and each block is stored in a flattened
187      * one-dimensional array.
188      * </p>
189      * <p>
190      * This method creates an array in blocks layout from an input array in raw layout.
191      * It can be used to provide the array argument of the {@link
192      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
193      * </p>
194      * @param rawData Data array in raw layout.
195      * @return a new data array containing the same entries but in blocks layout.
196      * @throws MathIllegalArgumentException if {@code rawData} is not rectangular.
197      * @see #createBlocksLayout(int, int)
198      * @see #BlockRealMatrix(int, int, double[][], boolean)
199      */
200     public static double[][] toBlocksLayout(final double[][] rawData)
201         throws MathIllegalArgumentException {
202         final int rows = rawData.length;
203         final int columns = rawData[0].length;
204         final int blockRows = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
205         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
206 
207         // safety checks
208         for (int i = 0; i < rawData.length; ++i) {
209             final int length = rawData[i].length;
210             if (length != columns) {
211                 throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
212                                                        columns, length);
213             }
214         }
215 
216         // convert array
217         final double[][] blocks = new double[blockRows * blockColumns][];
218         int blockIndex = 0;
219         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
220             final int pStart = iBlock * BLOCK_SIZE;
221             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
222             final int iHeight = pEnd - pStart;
223             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
224                 final int qStart = jBlock * BLOCK_SIZE;
225                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
226                 final int jWidth = qEnd - qStart;
227 
228                 // allocate new block
229                 final double[] block = new double[iHeight * jWidth];
230                 blocks[blockIndex] = block;
231 
232                 // copy data
233                 int index = 0;
234                 for (int p = pStart; p < pEnd; ++p) {
235                     System.arraycopy(rawData[p], qStart, block, index, jWidth);
236                     index += jWidth;
237                 }
238                 ++blockIndex;
239             }
240         }
241 
242         return blocks;
243     }
244 
245     /**
246      * Create a data array in blocks layout.
247      * <p>
248      * This method can be used to create the array argument of the {@link
249      * #BlockRealMatrix(int, int, double[][], boolean)} constructor.
250      * </p>
251      * @param rows Number of rows in the new matrix.
252      * @param columns Number of columns in the new matrix.
253      * @return a new data array in blocks layout.
254      * @see #toBlocksLayout(double[][])
255      * @see #BlockRealMatrix(int, int, double[][], boolean)
256      */
257     public static double[][] createBlocksLayout(final int rows, final int columns) {
258         final int blockRows = (rows    + BLOCK_SIZE - 1) / BLOCK_SIZE;
259         final int blockColumns = (columns + BLOCK_SIZE - 1) / BLOCK_SIZE;
260 
261         final double[][] blocks = new double[blockRows * blockColumns][];
262         int blockIndex = 0;
263         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
264             final int pStart = iBlock * BLOCK_SIZE;
265             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
266             final int iHeight = pEnd - pStart;
267             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
268                 final int qStart = jBlock * BLOCK_SIZE;
269                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
270                 final int jWidth = qEnd - qStart;
271                 blocks[blockIndex] = new double[iHeight * jWidth];
272                 ++blockIndex;
273             }
274         }
275 
276         return blocks;
277     }
278 
279     /** {@inheritDoc} */
280     @Override
281     public BlockRealMatrix createMatrix(final int rowDimension,
282                                         final int columnDimension)
283         throws MathIllegalArgumentException {
284         return new BlockRealMatrix(rowDimension, columnDimension);
285     }
286 
287     /** {@inheritDoc} */
288     @Override
289     public BlockRealMatrix copy() {
290         // create an empty matrix
291         BlockRealMatrix copied = new BlockRealMatrix(rows, columns);
292 
293         // copy the blocks
294         for (int i = 0; i < blocks.length; ++i) {
295             System.arraycopy(blocks[i], 0, copied.blocks[i], 0, blocks[i].length);
296         }
297 
298         return copied;
299     }
300 
301     /** {@inheritDoc} */
302     @Override
303     public BlockRealMatrix add(final RealMatrix m)
304         throws MathIllegalArgumentException {
305         if (m instanceof BlockRealMatrix) {
306             return add((BlockRealMatrix) m);
307         } else {
308             // safety check
309             MatrixUtils.checkAdditionCompatible(this, m);
310 
311             final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
312 
313             // perform addition block-wise, to ensure good cache behavior
314             int blockIndex = 0;
315             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
316                 final int pStart = iBlock * BLOCK_SIZE;
317                 final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
318                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
319 
320                     // perform addition on the current block
321                     final double[] outBlock = out.blocks[blockIndex];
322                     final double[] tBlock   = blocks[blockIndex];
323                     final int qStart = jBlock * BLOCK_SIZE;
324                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
325                     int k = 0;
326                     for (int p = pStart; p < pEnd; ++p) {
327                         for (int q = qStart; q < qEnd; ++q) {
328                             outBlock[k] = tBlock[k] + m.getEntry(p, q);
329                             ++k;
330                         }
331                     }
332                     // go to next block
333                     ++blockIndex;
334                 }
335             }
336 
337             return out;
338         }
339     }
340 
341     /**
342      * Compute the sum of this matrix and {@code m}.
343      *
344      * @param m Matrix to be added.
345      * @return {@code this} + m.
346      * @throws MathIllegalArgumentException if {@code m} is not the same
347      * size as this matrix.
348      */
349     public BlockRealMatrix add(final BlockRealMatrix m)
350         throws MathIllegalArgumentException {
351         // safety check
352         MatrixUtils.checkAdditionCompatible(this, m);
353 
354         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
355 
356         // perform addition block-wise, to ensure good cache behavior
357         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
358             final double[] outBlock = out.blocks[blockIndex];
359             final double[] tBlock = blocks[blockIndex];
360             final double[] mBlock = m.blocks[blockIndex];
361             for (int k = 0; k < outBlock.length; ++k) {
362                 outBlock[k] = tBlock[k] + mBlock[k];
363             }
364         }
365 
366         return out;
367     }
368 
369     /** {@inheritDoc} */
370     @Override
371     public BlockRealMatrix subtract(final RealMatrix m)
372         throws MathIllegalArgumentException {
373         if (m instanceof BlockRealMatrix) {
374             return subtract((BlockRealMatrix) m);
375         } else {
376             // safety check
377             MatrixUtils.checkSubtractionCompatible(this, m);
378 
379             final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
380 
381             // perform subtraction block-wise, to ensure good cache behavior
382             int blockIndex = 0;
383             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
384                 final int pStart = iBlock * BLOCK_SIZE;
385                 final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
386                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
387 
388                     // perform subtraction on the current block
389                     final double[] outBlock = out.blocks[blockIndex];
390                     final double[] tBlock = blocks[blockIndex];
391                     final int qStart = jBlock * BLOCK_SIZE;
392                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
393                     int k = 0;
394                     for (int p = pStart; p < pEnd; ++p) {
395                         for (int q = qStart; q < qEnd; ++q) {
396                             outBlock[k] = tBlock[k] - m.getEntry(p, q);
397                             ++k;
398                         }
399                     }
400                     // go to next block
401                     ++blockIndex;
402                 }
403             }
404 
405             return out;
406         }
407     }
408 
409     /**
410      * Subtract {@code m} from this matrix.
411      *
412      * @param m Matrix to be subtracted.
413      * @return {@code this} - m.
414      * @throws MathIllegalArgumentException if {@code m} is not the
415      * same size as this matrix.
416      */
417     public BlockRealMatrix subtract(final BlockRealMatrix m)
418         throws MathIllegalArgumentException {
419         // safety check
420         MatrixUtils.checkSubtractionCompatible(this, m);
421 
422         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
423 
424         // perform subtraction block-wise, to ensure good cache behavior
425         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
426             final double[] outBlock = out.blocks[blockIndex];
427             final double[] tBlock = blocks[blockIndex];
428             final double[] mBlock = m.blocks[blockIndex];
429             for (int k = 0; k < outBlock.length; ++k) {
430                 outBlock[k] = tBlock[k] - mBlock[k];
431             }
432         }
433 
434         return out;
435     }
436 
437     /** {@inheritDoc} */
438     @Override
439     public BlockRealMatrix scalarAdd(final double d) {
440 
441         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
442 
443         // perform subtraction block-wise, to ensure good cache behavior
444         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
445             final double[] outBlock = out.blocks[blockIndex];
446             final double[] tBlock = blocks[blockIndex];
447             for (int k = 0; k < outBlock.length; ++k) {
448                 outBlock[k] = tBlock[k] + d;
449             }
450         }
451 
452         return out;
453     }
454 
455     /** {@inheritDoc} */
456     @Override
457     public RealMatrix scalarMultiply(final double d) {
458         final BlockRealMatrix out = new BlockRealMatrix(rows, columns);
459 
460         // perform subtraction block-wise, to ensure good cache behavior
461         for (int blockIndex = 0; blockIndex < out.blocks.length; ++blockIndex) {
462             final double[] outBlock = out.blocks[blockIndex];
463             final double[] tBlock = blocks[blockIndex];
464             for (int k = 0; k < outBlock.length; ++k) {
465                 outBlock[k] = tBlock[k] * d;
466             }
467         }
468 
469         return out;
470     }
471 
472     /** {@inheritDoc} */
473     @Override
474     public BlockRealMatrix multiply(final RealMatrix m)
475         throws MathIllegalArgumentException {
476         if (m instanceof BlockRealMatrix) {
477             return multiply((BlockRealMatrix) m);
478         } else {
479             // safety check
480             MatrixUtils.checkMultiplicationCompatible(this, m);
481 
482             final BlockRealMatrix out = new BlockRealMatrix(rows, m.getColumnDimension());
483 
484             // perform multiplication block-wise, to ensure good cache behavior
485             int blockIndex = 0;
486             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
487                 final int pStart = iBlock * BLOCK_SIZE;
488                 final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
489 
490                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
491                     final int qStart = jBlock * BLOCK_SIZE;
492                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
493 
494                     // select current block
495                     final double[] outBlock = out.blocks[blockIndex];
496 
497                     // perform multiplication on current block
498                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
499                         final int kWidth = blockWidth(kBlock);
500                         final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
501                         final int rStart = kBlock * BLOCK_SIZE;
502                         int k = 0;
503                         for (int p = pStart; p < pEnd; ++p) {
504                             final int lStart = (p - pStart) * kWidth;
505                             final int lEnd = lStart + kWidth;
506                             for (int q = qStart; q < qEnd; ++q) {
507                                 double sum = 0;
508                                 int r = rStart;
509                                 for (int l = lStart; l < lEnd; ++l) {
510                                     sum += tBlock[l] * m.getEntry(r++, q);
511                                 }
512                                 outBlock[k] += sum;
513                                 ++k;
514                             }
515                         }
516                     }
517                     // go to next block
518                     ++blockIndex;
519                 }
520             }
521 
522             return out;
523         }
524     }
525 
526     /**
527      * Returns the result of postmultiplying this by {@code m}.
528      *
529      * @param m Matrix to postmultiply by.
530      * @return {@code this} * m.
531      * @throws MathIllegalArgumentException if the matrices are not compatible.
532      */
533     public BlockRealMatrix multiply(BlockRealMatrix m)
534         throws MathIllegalArgumentException {
535         // safety check
536         MatrixUtils.checkMultiplicationCompatible(this, m);
537 
538         final BlockRealMatrix out = new BlockRealMatrix(rows, m.columns);
539 
540         // perform multiplication block-wise, to ensure good cache behavior
541         int blockIndex = 0;
542         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
543 
544             final int pStart = iBlock * BLOCK_SIZE;
545             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
546 
547             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
548                 final int jWidth = out.blockWidth(jBlock);
549                 final int jWidth2 = jWidth  + jWidth;
550                 final int jWidth3 = jWidth2 + jWidth;
551                 final int jWidth4 = jWidth3 + jWidth;
552 
553                 // select current block
554                 final double[] outBlock = out.blocks[blockIndex];
555 
556                 // perform multiplication on current block
557                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
558                     final int kWidth = blockWidth(kBlock);
559                     final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
560                     final double[] mBlock = m.blocks[kBlock * m.blockColumns + jBlock];
561                     int k = 0;
562                     for (int p = pStart; p < pEnd; ++p) {
563                         final int lStart = (p - pStart) * kWidth;
564                         final int lEnd   = lStart + kWidth;
565                         for (int nStart = 0; nStart < jWidth; ++nStart) {
566                             double sum = 0;
567                             int l = lStart;
568                             int n = nStart;
569                             while (l < lEnd - 3) {
570                                 sum += tBlock[l] * mBlock[n] +
571                                        tBlock[l + 1] * mBlock[n + jWidth] +
572                                        tBlock[l + 2] * mBlock[n + jWidth2] +
573                                        tBlock[l + 3] * mBlock[n + jWidth3];
574                                 l += 4;
575                                 n += jWidth4;
576                             }
577                             while (l < lEnd) {
578                                 sum += tBlock[l++] * mBlock[n];
579                                 n += jWidth;
580                             }
581                             outBlock[k] += sum;
582                             ++k;
583                         }
584                     }
585                 }
586                 // go to next block
587                 ++blockIndex;
588             }
589         }
590 
591         return out;
592     }
593 
594     /**
595      * Returns the result of postmultiplying {@code this} by {@code m^T}.
596      * @param m matrix to first transpose and second postmultiply by
597      * @return {@code this * m^T}
598      * @throws MathIllegalArgumentException if
599      * {@code columnDimension(this) != columnDimension(m)}
600      * @since 1.3
601      */
602     public BlockRealMatrix multiplyTransposed(BlockRealMatrix m)
603         throws MathIllegalArgumentException {
604         // safety check
605         MatrixUtils.checkSameColumnDimension(this, m);
606 
607         final BlockRealMatrix out = new BlockRealMatrix(rows, m.rows);
608 
609         // perform multiplication block-wise, to ensure good cache behavior
610         int blockIndex = 0;
611         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
612 
613             final int pStart = iBlock * BLOCK_SIZE;
614             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
615 
616             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
617                 final int jWidth = out.blockWidth(jBlock);
618 
619                 // select current block
620                 final double[] outBlock = out.blocks[blockIndex];
621 
622                 // perform multiplication on current block
623                 for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
624                     final int kWidth = blockWidth(kBlock);
625                     final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
626                     final double[] mBlock = m.blocks[jBlock * m.blockColumns + kBlock];
627                     int k = 0;
628                     for (int p = pStart; p < pEnd; ++p) {
629                         final int lStart = (p - pStart) * kWidth;
630                         final int lEnd   = lStart + kWidth;
631                         for (int nStart = 0; nStart < jWidth * kWidth; nStart += kWidth) {
632                             double sum = 0;
633                             int l = lStart;
634                             int n = nStart;
635                             while (l < lEnd - 3) {
636                                 sum += tBlock[l]     * mBlock[n]     +
637                                        tBlock[l + 1] * mBlock[n + 1] +
638                                        tBlock[l + 2] * mBlock[n + 2] +
639                                        tBlock[l + 3] * mBlock[n + 3];
640                                 l += 4;
641                                 n += 4;
642                             }
643                             while (l < lEnd) {
644                                 sum += tBlock[l++] * mBlock[n++];
645                             }
646                             outBlock[k] += sum;
647                             ++k;
648                         }
649                     }
650                 }
651                 // go to next block
652                 ++blockIndex;
653             }
654         }
655 
656         return out;
657     }
658 
659     /** {@inheritDoc} */
660     @Override
661     public BlockRealMatrix multiplyTransposed(final RealMatrix m)
662         throws MathIllegalArgumentException {
663         if (m instanceof BlockRealMatrix) {
664             return multiplyTransposed((BlockRealMatrix) m);
665         } else {
666             // safety check
667             MatrixUtils.checkSameColumnDimension(this, m);
668 
669             final BlockRealMatrix out = new BlockRealMatrix(rows, m.getRowDimension());
670 
671             // perform multiplication block-wise, to ensure good cache behavior
672             int blockIndex = 0;
673             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
674                 final int pStart = iBlock * BLOCK_SIZE;
675                 final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
676 
677                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
678                     final int qStart = jBlock * BLOCK_SIZE;
679                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getRowDimension());
680 
681                     // select current block
682                     final double[] outBlock = out.blocks[blockIndex];
683 
684                     // perform multiplication on current block
685                     for (int kBlock = 0; kBlock < blockColumns; ++kBlock) {
686                         final int kWidth = blockWidth(kBlock);
687                         final double[] tBlock = blocks[iBlock * blockColumns + kBlock];
688                         final int rStart = kBlock * BLOCK_SIZE;
689                         int k = 0;
690                         for (int p = pStart; p < pEnd; ++p) {
691                             final int lStart = (p - pStart) * kWidth;
692                             final int lEnd = lStart + kWidth;
693                             for (int q = qStart; q < qEnd; ++q) {
694                                 double sum = 0;
695                                 int r = rStart;
696                                 for (int l = lStart; l < lEnd; ++l) {
697                                     sum += tBlock[l] * m.getEntry(q, r++);
698                                 }
699                                 outBlock[k] += sum;
700                                 ++k;
701                             }
702                         }
703                     }
704                     // go to next block
705                     ++blockIndex;
706                 }
707             }
708 
709             return out;
710         }
711     }
712 
713     /**
714      * Returns the result of postmultiplying {@code this^T} by {@code m}.
715      * @param m matrix to postmultiply by
716      * @return {@code this^T * m}
717      * @throws MathIllegalArgumentException if
718      * {@code columnDimension(this) != columnDimension(m)}
719      * @since 1.3
720      */
721     public BlockRealMatrix transposeMultiply(final BlockRealMatrix m)
722         throws MathIllegalArgumentException {
723         // safety check
724         MatrixUtils.checkSameRowDimension(this, m);
725 
726         final BlockRealMatrix out = new BlockRealMatrix(columns, m.columns);
727 
728         // perform multiplication block-wise, to ensure good cache behavior
729         int blockIndex = 0;
730         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
731 
732             final int iHeight  = out.blockHeight(iBlock);
733             final int iHeight2 = iHeight  + iHeight;
734             final int iHeight3 = iHeight2 + iHeight;
735             final int iHeight4 = iHeight3 + iHeight;
736             final int pStart   = iBlock * BLOCK_SIZE;
737             final int pEnd     = FastMath.min(pStart + BLOCK_SIZE, columns);
738 
739             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
740                 final int jWidth  = out.blockWidth(jBlock);
741                 final int jWidth2 = jWidth  + jWidth;
742                 final int jWidth3 = jWidth2 + jWidth;
743                 final int jWidth4 = jWidth3 + jWidth;
744 
745                 // select current block
746                 final double[] outBlock = out.blocks[blockIndex];
747 
748                 // perform multiplication on current block
749                 for (int kBlock = 0; kBlock < blockRows; ++kBlock) {
750                     final int      kHeight = blockHeight(kBlock);
751                     final double[] tBlock  = blocks[kBlock * blockColumns + iBlock];
752                     final double[] mBlock  = m.blocks[kBlock * m.blockColumns + jBlock];
753                     int k = 0;
754                     for (int p = pStart; p < pEnd; ++p) {
755                         final int lStart = p - pStart;
756                         final int lEnd   = lStart + iHeight * kHeight;
757                         for (int nStart = 0; nStart < jWidth; ++nStart) {
758                             double sum = 0;
759                             int l = lStart;
760                             int n = nStart;
761                             while (l < lEnd - iHeight3) {
762                                 sum += tBlock[l]            * mBlock[n] +
763                                        tBlock[l + iHeight]  * mBlock[n + jWidth] +
764                                        tBlock[l + iHeight2] * mBlock[n + jWidth2] +
765                                        tBlock[l + iHeight3] * mBlock[n + jWidth3];
766                                 l += iHeight4;
767                                 n += jWidth4;
768                             }
769                             while (l < lEnd) {
770                                 sum += tBlock[l] * mBlock[n];
771                                 l += iHeight;
772                                 n += jWidth;
773                             }
774                             outBlock[k] += sum;
775                             ++k;
776                         }
777                     }
778                 }
779                 // go to next block
780                 ++blockIndex;
781             }
782         }
783 
784         return out;
785     }
786 
787     /** {@inheritDoc} */
788     @Override
789     public BlockRealMatrix transposeMultiply(final RealMatrix m)
790         throws MathIllegalArgumentException {
791         if (m instanceof BlockRealMatrix) {
792             return transposeMultiply((BlockRealMatrix) m);
793         } else {
794             // safety check
795             MatrixUtils.checkSameRowDimension(this, m);
796 
797             final BlockRealMatrix out = new BlockRealMatrix(columns, m.getColumnDimension());
798 
799             // perform multiplication block-wise, to ensure good cache behavior
800             int blockIndex = 0;
801             for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
802 
803                 final int iHeight = out.blockHeight(iBlock);
804                 final int pStart  = iBlock * BLOCK_SIZE;
805                 final int pEnd    = FastMath.min(pStart + BLOCK_SIZE, columns);
806 
807                 for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
808                     final int qStart = jBlock * BLOCK_SIZE;
809                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, m.getColumnDimension());
810 
811                     // select current block
812                     final double[] outBlock = out.blocks[blockIndex];
813 
814                     // perform multiplication on current block
815                     for (int kBlock = 0; kBlock < blockRows; ++kBlock) {
816                         final int      kHeight = blockHeight(kBlock);
817                         final double[] tBlock  = blocks[kBlock * blockColumns + iBlock];
818                         final int      rStart  = kBlock * BLOCK_SIZE;
819                         int k = 0;
820                         for (int p = pStart; p < pEnd; ++p) {
821                             final int lStart = p - pStart;
822                             final int lEnd   = lStart + iHeight * kHeight;
823                             for (int q = qStart; q < qEnd; ++q) {
824                                 double sum = 0;
825                                 int r = rStart;
826                                 for (int l = lStart; l < lEnd; l += iHeight) {
827                                     sum += tBlock[l] * m.getEntry(r++, q);
828                                 }
829                                 outBlock[k] += sum;
830                                 ++k;
831                             }
832                         }
833                     }
834                     // go to next block
835                     ++blockIndex;
836                 }
837             }
838 
839             return out;
840         }
841 
842     }
843 
844     /** {@inheritDoc} */
845     @Override
846     public double[][] getData() {
847         final double[][] data = new double[getRowDimension()][getColumnDimension()];
848         final int lastColumns = columns - (blockColumns - 1) * BLOCK_SIZE;
849 
850         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
851             final int pStart = iBlock * BLOCK_SIZE;
852             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
853             int regularPos = 0;
854             int lastPos = 0;
855             for (int p = pStart; p < pEnd; ++p) {
856                 final double[] dataP = data[p];
857                 int blockIndex = iBlock * blockColumns;
858                 int dataPos = 0;
859                 for (int jBlock = 0; jBlock < blockColumns - 1; ++jBlock) {
860                     System.arraycopy(blocks[blockIndex++], regularPos, dataP, dataPos, BLOCK_SIZE);
861                     dataPos += BLOCK_SIZE;
862                 }
863                 System.arraycopy(blocks[blockIndex], lastPos, dataP, dataPos, lastColumns);
864                 regularPos += BLOCK_SIZE;
865                 lastPos    += lastColumns;
866             }
867         }
868 
869         return data;
870     }
871 
872     /** {@inheritDoc} */
873     @Override
874     public double getNorm1() {
875         final double[] colSums = new double[BLOCK_SIZE];
876         double maxColSum = 0;
877         for (int jBlock = 0; jBlock < blockColumns; jBlock++) {
878             final int jWidth = blockWidth(jBlock);
879             Arrays.fill(colSums, 0, jWidth, 0.0);
880             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
881                 final int iHeight = blockHeight(iBlock);
882                 final double[] block = blocks[iBlock * blockColumns + jBlock];
883                 for (int j = 0; j < jWidth; ++j) {
884                     double sum = 0;
885                     for (int i = 0; i < iHeight; ++i) {
886                         sum += FastMath.abs(block[i * jWidth + j]);
887                     }
888                     colSums[j] += sum;
889                 }
890             }
891             for (int j = 0; j < jWidth; ++j) {
892                 maxColSum = FastMath.max(maxColSum, colSums[j]);
893             }
894         }
895         return maxColSum;
896     }
897 
898     /** {@inheritDoc} */
899     @Override
900     public double getNormInfty() {
901         final double[] rowSums = new double[BLOCK_SIZE];
902         double maxRowSum = 0;
903         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
904             final int iHeight = blockHeight(iBlock);
905             Arrays.fill(rowSums, 0, iHeight, 0.0);
906             for (int jBlock = 0; jBlock < blockColumns; jBlock++) {
907                 final int jWidth = blockWidth(jBlock);
908                 final double[] block = blocks[iBlock * blockColumns + jBlock];
909                 for (int i = 0; i < iHeight; ++i) {
910                     double sum = 0;
911                     for (int j = 0; j < jWidth; ++j) {
912                         sum += FastMath.abs(block[i * jWidth + j]);
913                     }
914                     rowSums[i] += sum;
915                 }
916             }
917             for (int i = 0; i < iHeight; ++i) {
918                 maxRowSum = FastMath.max(maxRowSum, rowSums[i]);
919             }
920         }
921         return maxRowSum;
922     }
923 
924     /** {@inheritDoc} */
925     @Override
926     public double getFrobeniusNorm() {
927         double sum2 = 0;
928         for (int blockIndex = 0; blockIndex < blocks.length; ++blockIndex) {
929             for (final double entry : blocks[blockIndex]) {
930                 sum2 += entry * entry;
931             }
932         }
933         return FastMath.sqrt(sum2);
934     }
935 
936     /** {@inheritDoc} */
937     @Override
938     public BlockRealMatrix getSubMatrix(final int startRow, final int endRow,
939                                         final int startColumn,
940                                         final int endColumn)
941         throws MathIllegalArgumentException {
942         // safety checks
943         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
944 
945         // create the output matrix
946         final BlockRealMatrix out =
947             new BlockRealMatrix(endRow - startRow + 1, endColumn - startColumn + 1);
948 
949         // compute blocks shifts
950         final int blockStartRow = startRow / BLOCK_SIZE;
951         final int rowsShift = startRow % BLOCK_SIZE;
952         final int blockStartColumn = startColumn / BLOCK_SIZE;
953         final int columnsShift = startColumn % BLOCK_SIZE;
954 
955         // perform extraction block-wise, to ensure good cache behavior
956         int pBlock = blockStartRow;
957         for (int iBlock = 0; iBlock < out.blockRows; ++iBlock) {
958             final int iHeight = out.blockHeight(iBlock);
959             int qBlock = blockStartColumn;
960             for (int jBlock = 0; jBlock < out.blockColumns; ++jBlock) {
961                 final int jWidth = out.blockWidth(jBlock);
962 
963                 // handle one block of the output matrix
964                 final int outIndex = iBlock * out.blockColumns + jBlock;
965                 final double[] outBlock = out.blocks[outIndex];
966                 final int index = pBlock * blockColumns + qBlock;
967                 final int width = blockWidth(qBlock);
968 
969                 final int heightExcess = iHeight + rowsShift - BLOCK_SIZE;
970                 final int widthExcess = jWidth + columnsShift - BLOCK_SIZE;
971                 if (heightExcess > 0) {
972                     // the submatrix block spans on two blocks rows from the original matrix
973                     if (widthExcess > 0) {
974                         // the submatrix block spans on two blocks columns from the original matrix
975                         final int width2 = blockWidth(qBlock + 1);
976                         copyBlockPart(blocks[index], width,
977                                       rowsShift, BLOCK_SIZE,
978                                       columnsShift, BLOCK_SIZE,
979                                       outBlock, jWidth, 0, 0);
980                         copyBlockPart(blocks[index + 1], width2,
981                                       rowsShift, BLOCK_SIZE,
982                                       0, widthExcess,
983                                       outBlock, jWidth, 0, jWidth - widthExcess);
984                         copyBlockPart(blocks[index + blockColumns], width,
985                                       0, heightExcess,
986                                       columnsShift, BLOCK_SIZE,
987                                       outBlock, jWidth, iHeight - heightExcess, 0);
988                         copyBlockPart(blocks[index + blockColumns + 1], width2,
989                                       0, heightExcess,
990                                       0, widthExcess,
991                                       outBlock, jWidth, iHeight - heightExcess, jWidth - widthExcess);
992                     } else {
993                         // the submatrix block spans on one block column from the original matrix
994                         copyBlockPart(blocks[index], width,
995                                       rowsShift, BLOCK_SIZE,
996                                       columnsShift, jWidth + columnsShift,
997                                       outBlock, jWidth, 0, 0);
998                         copyBlockPart(blocks[index + blockColumns], width,
999                                       0, heightExcess,
1000                                       columnsShift, jWidth + columnsShift,
1001                                       outBlock, jWidth, iHeight - heightExcess, 0);
1002                     }
1003                 } else {
1004                     // the submatrix block spans on one block row from the original matrix
1005                     if (widthExcess > 0) {
1006                         // the submatrix block spans on two blocks columns from the original matrix
1007                         final int width2 = blockWidth(qBlock + 1);
1008                         copyBlockPart(blocks[index], width,
1009                                       rowsShift, iHeight + rowsShift,
1010                                       columnsShift, BLOCK_SIZE,
1011                                       outBlock, jWidth, 0, 0);
1012                         copyBlockPart(blocks[index + 1], width2,
1013                                       rowsShift, iHeight + rowsShift,
1014                                       0, widthExcess,
1015                                       outBlock, jWidth, 0, jWidth - widthExcess);
1016                     } else {
1017                         // the submatrix block spans on one block column from the original matrix
1018                         copyBlockPart(blocks[index], width,
1019                                       rowsShift, iHeight + rowsShift,
1020                                       columnsShift, jWidth + columnsShift,
1021                                       outBlock, jWidth, 0, 0);
1022                     }
1023                }
1024                 ++qBlock;
1025             }
1026             ++pBlock;
1027         }
1028 
1029         return out;
1030     }
1031 
1032     /**
1033      * Copy a part of a block into another one
1034      * <p>This method can be called only when the specified part fits in both
1035      * blocks, no verification is done here.</p>
1036      * @param srcBlock source block
1037      * @param srcWidth source block width ({@link #BLOCK_SIZE} or smaller)
1038      * @param srcStartRow start row in the source block
1039      * @param srcEndRow end row (exclusive) in the source block
1040      * @param srcStartColumn start column in the source block
1041      * @param srcEndColumn end column (exclusive) in the source block
1042      * @param dstBlock destination block
1043      * @param dstWidth destination block width ({@link #BLOCK_SIZE} or smaller)
1044      * @param dstStartRow start row in the destination block
1045      * @param dstStartColumn start column in the destination block
1046      */
1047     private void copyBlockPart(final double[] srcBlock, final int srcWidth,
1048                                final int srcStartRow, final int srcEndRow,
1049                                final int srcStartColumn, final int srcEndColumn,
1050                                final double[] dstBlock, final int dstWidth,
1051                                final int dstStartRow, final int dstStartColumn) {
1052         final int length = srcEndColumn - srcStartColumn;
1053         int srcPos = srcStartRow * srcWidth + srcStartColumn;
1054         int dstPos = dstStartRow * dstWidth + dstStartColumn;
1055         for (int srcRow = srcStartRow; srcRow < srcEndRow; ++srcRow) {
1056             System.arraycopy(srcBlock, srcPos, dstBlock, dstPos, length);
1057             srcPos += srcWidth;
1058             dstPos += dstWidth;
1059         }
1060     }
1061 
1062     /** {@inheritDoc} */
1063     @Override
1064     public void setSubMatrix(final double[][] subMatrix, final int row,
1065                              final int column)
1066         throws MathIllegalArgumentException, NullArgumentException {
1067         // safety checks
1068         MathUtils.checkNotNull(subMatrix);
1069         final int refLength = subMatrix[0].length;
1070         if (refLength == 0) {
1071             throw new MathIllegalArgumentException(LocalizedCoreFormats.AT_LEAST_ONE_COLUMN);
1072         }
1073         final int endRow = row + subMatrix.length - 1;
1074         final int endColumn = column + refLength - 1;
1075         MatrixUtils.checkSubMatrixIndex(this, row, endRow, column, endColumn);
1076         for (final double[] subRow : subMatrix) {
1077             if (subRow.length != refLength) {
1078                 throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
1079                                                        refLength, subRow.length);
1080             }
1081         }
1082 
1083         // compute blocks bounds
1084         final int blockStartRow = row / BLOCK_SIZE;
1085         final int blockEndRow = (endRow + BLOCK_SIZE) / BLOCK_SIZE;
1086         final int blockStartColumn = column / BLOCK_SIZE;
1087         final int blockEndColumn = (endColumn + BLOCK_SIZE) / BLOCK_SIZE;
1088 
1089         // perform copy block-wise, to ensure good cache behavior
1090         for (int iBlock = blockStartRow; iBlock < blockEndRow; ++iBlock) {
1091             final int iHeight = blockHeight(iBlock);
1092             final int firstRow = iBlock * BLOCK_SIZE;
1093             final int iStart = FastMath.max(row,    firstRow);
1094             final int iEnd = FastMath.min(endRow + 1, firstRow + iHeight);
1095 
1096             for (int jBlock = blockStartColumn; jBlock < blockEndColumn; ++jBlock) {
1097                 final int jWidth = blockWidth(jBlock);
1098                 final int firstColumn = jBlock * BLOCK_SIZE;
1099                 final int jStart = FastMath.max(column,    firstColumn);
1100                 final int jEnd = FastMath.min(endColumn + 1, firstColumn + jWidth);
1101                 final int jLength = jEnd - jStart;
1102 
1103                 // handle one block, row by row
1104                 final double[] block = blocks[iBlock * blockColumns + jBlock];
1105                 for (int i = iStart; i < iEnd; ++i) {
1106                     System.arraycopy(subMatrix[i - row], jStart - column,
1107                                      block, (i - firstRow) * jWidth + (jStart - firstColumn),
1108                                      jLength);
1109                 }
1110 
1111             }
1112         }
1113     }
1114 
1115     /** {@inheritDoc} */
1116     @Override
1117     public BlockRealMatrix getRowMatrix(final int row)
1118         throws MathIllegalArgumentException {
1119         MatrixUtils.checkRowIndex(this, row);
1120         final BlockRealMatrix out = new BlockRealMatrix(1, columns);
1121 
1122         // perform copy block-wise, to ensure good cache behavior
1123         final int iBlock = row / BLOCK_SIZE;
1124         final int iRow = row - iBlock * BLOCK_SIZE;
1125         int outBlockIndex = 0;
1126         int outIndex = 0;
1127         double[] outBlock = out.blocks[outBlockIndex];
1128         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1129             final int jWidth = blockWidth(jBlock);
1130             final double[] block = blocks[iBlock * blockColumns + jBlock];
1131             final int available = outBlock.length - outIndex;
1132             if (jWidth > available) {
1133                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, available);
1134                 outBlock = out.blocks[++outBlockIndex];
1135                 System.arraycopy(block, iRow * jWidth, outBlock, 0, jWidth - available);
1136                 outIndex = jWidth - available;
1137             } else {
1138                 System.arraycopy(block, iRow * jWidth, outBlock, outIndex, jWidth);
1139                 outIndex += jWidth;
1140             }
1141         }
1142 
1143         return out;
1144     }
1145 
1146     /** {@inheritDoc} */
1147     @Override
1148     public void setRowMatrix(final int row, final RealMatrix matrix)
1149         throws MathIllegalArgumentException {
1150         if (matrix instanceof BlockRealMatrix) {
1151             setRowMatrix(row, (BlockRealMatrix) matrix);
1152         } else {
1153             super.setRowMatrix(row, matrix);
1154         }
1155     }
1156 
1157     /**
1158      * Sets the entries in row number <code>row</code>
1159      * as a row matrix.  Row indices start at 0.
1160      *
1161      * @param row the row to be set
1162      * @param matrix row matrix (must have one row and the same number of columns
1163      * as the instance)
1164      * @throws MathIllegalArgumentException if the specified row index is invalid.
1165      * @throws MathIllegalArgumentException if the matrix dimensions do
1166      * not match one instance row.
1167      */
1168     public void setRowMatrix(final int row, final BlockRealMatrix matrix)
1169         throws MathIllegalArgumentException {
1170         MatrixUtils.checkRowIndex(this, row);
1171         final int nCols = getColumnDimension();
1172         if ((matrix.getRowDimension() != 1) ||
1173             (matrix.getColumnDimension() != nCols)) {
1174             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH_2x2,
1175                                                    matrix.getRowDimension(), matrix.getColumnDimension(),
1176                                                    1, nCols);
1177         }
1178 
1179         // perform copy block-wise, to ensure good cache behavior
1180         final int iBlock = row / BLOCK_SIZE;
1181         final int iRow = row - iBlock * BLOCK_SIZE;
1182         int mBlockIndex = 0;
1183         int mIndex = 0;
1184         double[] mBlock = matrix.blocks[mBlockIndex];
1185         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1186             final int jWidth = blockWidth(jBlock);
1187             final double[] block = blocks[iBlock * blockColumns + jBlock];
1188             final int available  = mBlock.length - mIndex;
1189             if (jWidth > available) {
1190                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, available);
1191                 mBlock = matrix.blocks[++mBlockIndex];
1192                 System.arraycopy(mBlock, 0, block, iRow * jWidth, jWidth - available);
1193                 mIndex = jWidth - available;
1194             } else {
1195                 System.arraycopy(mBlock, mIndex, block, iRow * jWidth, jWidth);
1196                 mIndex += jWidth;
1197            }
1198         }
1199     }
1200 
1201     /** {@inheritDoc} */
1202     @Override
1203     public BlockRealMatrix getColumnMatrix(final int column)
1204         throws MathIllegalArgumentException {
1205         MatrixUtils.checkColumnIndex(this, column);
1206         final BlockRealMatrix out = new BlockRealMatrix(rows, 1);
1207 
1208         // perform copy block-wise, to ensure good cache behavior
1209         final int jBlock = column / BLOCK_SIZE;
1210         final int jColumn = column - jBlock * BLOCK_SIZE;
1211         final int jWidth = blockWidth(jBlock);
1212         int outBlockIndex = 0;
1213         int outIndex = 0;
1214         double[] outBlock = out.blocks[outBlockIndex];
1215         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1216             final int iHeight = blockHeight(iBlock);
1217             final double[] block = blocks[iBlock * blockColumns + jBlock];
1218             for (int i = 0; i < iHeight; ++i) {
1219                 if (outIndex >= outBlock.length) {
1220                     outBlock = out.blocks[++outBlockIndex];
1221                     outIndex = 0;
1222                 }
1223                 outBlock[outIndex++] = block[i * jWidth + jColumn];
1224             }
1225         }
1226 
1227         return out;
1228     }
1229 
1230     /** {@inheritDoc} */
1231     @Override
1232     public void setColumnMatrix(final int column, final RealMatrix matrix)
1233         throws MathIllegalArgumentException {
1234         if (matrix instanceof BlockRealMatrix) {
1235             setColumnMatrix(column, (BlockRealMatrix) matrix);
1236         } else {
1237             super.setColumnMatrix(column, matrix);
1238         }
1239     }
1240 
1241     /**
1242      * Sets the entries in column number <code>column</code>
1243      * as a column matrix.  Column indices start at 0.
1244      *
1245      * @param column the column to be set
1246      * @param matrix column matrix (must have one column and the same number of rows
1247      * as the instance)
1248      * @throws MathIllegalArgumentException if the specified column index is invalid.
1249      * @throws MathIllegalArgumentException if the matrix dimensions do
1250      * not match one instance column.
1251      */
1252     void setColumnMatrix(final int column, final BlockRealMatrix matrix)
1253         throws MathIllegalArgumentException {
1254         MatrixUtils.checkColumnIndex(this, column);
1255         final int nRows = getRowDimension();
1256         if ((matrix.getRowDimension() != nRows) ||
1257             (matrix.getColumnDimension() != 1)) {
1258             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH_2x2,
1259                                                    matrix.getRowDimension(), matrix.getColumnDimension(),
1260                                                    nRows, 1);
1261         }
1262 
1263         // perform copy block-wise, to ensure good cache behavior
1264         final int jBlock = column / BLOCK_SIZE;
1265         final int jColumn = column - jBlock * BLOCK_SIZE;
1266         final int jWidth = blockWidth(jBlock);
1267         int mBlockIndex = 0;
1268         int mIndex = 0;
1269         double[] mBlock = matrix.blocks[mBlockIndex];
1270         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1271             final int iHeight = blockHeight(iBlock);
1272             final double[] block = blocks[iBlock * blockColumns + jBlock];
1273             for (int i = 0; i < iHeight; ++i) {
1274                 if (mIndex >= mBlock.length) {
1275                     mBlock = matrix.blocks[++mBlockIndex];
1276                     mIndex = 0;
1277                 }
1278                 block[i * jWidth + jColumn] = mBlock[mIndex++];
1279             }
1280         }
1281     }
1282 
1283     /** {@inheritDoc} */
1284     @Override
1285     public RealVector getRowVector(final int row)
1286         throws MathIllegalArgumentException {
1287         MatrixUtils.checkRowIndex(this, row);
1288         final double[] outData = new double[columns];
1289 
1290         // perform copy block-wise, to ensure good cache behavior
1291         final int iBlock = row / BLOCK_SIZE;
1292         final int iRow = row - iBlock * BLOCK_SIZE;
1293         int outIndex = 0;
1294         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1295             final int jWidth = blockWidth(jBlock);
1296             final double[] block = blocks[iBlock * blockColumns + jBlock];
1297             System.arraycopy(block, iRow * jWidth, outData, outIndex, jWidth);
1298             outIndex += jWidth;
1299         }
1300 
1301         return new ArrayRealVector(outData, false);
1302     }
1303 
1304     /** {@inheritDoc} */
1305     @Override
1306     public void setRowVector(final int row, final RealVector vector)
1307         throws MathIllegalArgumentException {
1308         if (vector instanceof ArrayRealVector) {
1309             setRow(row, ((ArrayRealVector) vector).getDataRef());
1310         } else {
1311             super.setRowVector(row, vector);
1312         }
1313     }
1314 
1315     /** {@inheritDoc} */
1316     @Override
1317     public RealVector getColumnVector(final int column)
1318         throws MathIllegalArgumentException {
1319         MatrixUtils.checkColumnIndex(this, column);
1320         final double[] outData = new double[rows];
1321 
1322         // perform copy block-wise, to ensure good cache behavior
1323         final int jBlock = column / BLOCK_SIZE;
1324         final int jColumn = column - jBlock * BLOCK_SIZE;
1325         final int jWidth = blockWidth(jBlock);
1326         int outIndex = 0;
1327         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1328             final int iHeight = blockHeight(iBlock);
1329             final double[] block = blocks[iBlock * blockColumns + jBlock];
1330             for (int i = 0; i < iHeight; ++i) {
1331                 outData[outIndex++] = block[i * jWidth + jColumn];
1332             }
1333         }
1334 
1335         return new ArrayRealVector(outData, false);
1336     }
1337 
1338     /** {@inheritDoc} */
1339     @Override
1340     public void setColumnVector(final int column, final RealVector vector)
1341         throws MathIllegalArgumentException {
1342         if (vector instanceof ArrayRealVector) {
1343             setColumn(column, ((ArrayRealVector) vector).getDataRef());
1344         } else {
1345             super.setColumnVector(column, vector);
1346         }
1347     }
1348 
1349     /** {@inheritDoc} */
1350     @Override
1351     public double[] getRow(final int row) throws MathIllegalArgumentException {
1352         MatrixUtils.checkRowIndex(this, row);
1353         final double[] out = new double[columns];
1354 
1355         // perform copy block-wise, to ensure good cache behavior
1356         final int iBlock = row / BLOCK_SIZE;
1357         final int iRow = row - iBlock * BLOCK_SIZE;
1358         int outIndex = 0;
1359         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1360             final int jWidth     = blockWidth(jBlock);
1361             final double[] block = blocks[iBlock * blockColumns + jBlock];
1362             System.arraycopy(block, iRow * jWidth, out, outIndex, jWidth);
1363             outIndex += jWidth;
1364         }
1365 
1366         return out;
1367     }
1368 
1369     /** {@inheritDoc} */
1370     @Override
1371     public void setRow(final int row, final double[] array)
1372         throws MathIllegalArgumentException {
1373         MatrixUtils.checkRowIndex(this, row);
1374         final int nCols = getColumnDimension();
1375         if (array.length != nCols) {
1376             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH_2x2,
1377                                                    1, array.length,
1378                                                    1, nCols);
1379         }
1380 
1381         // perform copy block-wise, to ensure good cache behavior
1382         final int iBlock = row / BLOCK_SIZE;
1383         final int iRow = row - iBlock * BLOCK_SIZE;
1384         int outIndex = 0;
1385         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1386             final int jWidth     = blockWidth(jBlock);
1387             final double[] block = blocks[iBlock * blockColumns + jBlock];
1388             System.arraycopy(array, outIndex, block, iRow * jWidth, jWidth);
1389             outIndex += jWidth;
1390         }
1391     }
1392 
1393     /** {@inheritDoc} */
1394     @Override
1395     public double[] getColumn(final int column) throws MathIllegalArgumentException {
1396         MatrixUtils.checkColumnIndex(this, column);
1397         final double[] out = new double[rows];
1398 
1399         // perform copy block-wise, to ensure good cache behavior
1400         final int jBlock  = column / BLOCK_SIZE;
1401         final int jColumn = column - jBlock * BLOCK_SIZE;
1402         final int jWidth  = blockWidth(jBlock);
1403         int outIndex = 0;
1404         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1405             final int iHeight = blockHeight(iBlock);
1406             final double[] block = blocks[iBlock * blockColumns + jBlock];
1407             for (int i = 0; i < iHeight; ++i) {
1408                 out[outIndex++] = block[i * jWidth + jColumn];
1409             }
1410         }
1411 
1412         return out;
1413     }
1414 
1415     /** {@inheritDoc} */
1416     @Override
1417     public void setColumn(final int column, final double[] array)
1418         throws MathIllegalArgumentException {
1419         MatrixUtils.checkColumnIndex(this, column);
1420         final int nRows = getRowDimension();
1421         if (array.length != nRows) {
1422             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH_2x2,
1423                                                    array.length, 1,
1424                                                    nRows, 1);
1425         }
1426 
1427         // perform copy block-wise, to ensure good cache behavior
1428         final int jBlock  = column / BLOCK_SIZE;
1429         final int jColumn = column - jBlock * BLOCK_SIZE;
1430         final int jWidth = blockWidth(jBlock);
1431         int outIndex = 0;
1432         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1433             final int iHeight = blockHeight(iBlock);
1434             final double[] block = blocks[iBlock * blockColumns + jBlock];
1435             for (int i = 0; i < iHeight; ++i) {
1436                 block[i * jWidth + jColumn] = array[outIndex++];
1437             }
1438         }
1439     }
1440 
1441     /** {@inheritDoc} */
1442     @Override
1443     public double getEntry(final int row, final int column)
1444         throws MathIllegalArgumentException {
1445         MatrixUtils.checkMatrixIndex(this, row, column);
1446         final int iBlock = row / BLOCK_SIZE;
1447         final int jBlock = column / BLOCK_SIZE;
1448         final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1449             (column - jBlock * BLOCK_SIZE);
1450         return blocks[iBlock * blockColumns + jBlock][k];
1451     }
1452 
1453     /** {@inheritDoc} */
1454     @Override
1455     public void setEntry(final int row, final int column, final double value)
1456         throws MathIllegalArgumentException {
1457         MatrixUtils.checkMatrixIndex(this, row, column);
1458         final int iBlock = row / BLOCK_SIZE;
1459         final int jBlock = column / BLOCK_SIZE;
1460         final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1461             (column - jBlock * BLOCK_SIZE);
1462         blocks[iBlock * blockColumns + jBlock][k] = value;
1463     }
1464 
1465     /** {@inheritDoc} */
1466     @Override
1467     public void addToEntry(final int row, final int column,
1468                            final double increment)
1469         throws MathIllegalArgumentException {
1470         MatrixUtils.checkMatrixIndex(this, row, column);
1471         final int iBlock = row    / BLOCK_SIZE;
1472         final int jBlock = column / BLOCK_SIZE;
1473         final int k = (row    - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1474             (column - jBlock * BLOCK_SIZE);
1475         blocks[iBlock * blockColumns + jBlock][k] += increment;
1476     }
1477 
1478     /** {@inheritDoc} */
1479     @Override
1480     public void multiplyEntry(final int row, final int column,
1481                               final double factor)
1482         throws MathIllegalArgumentException {
1483         MatrixUtils.checkMatrixIndex(this, row, column);
1484         final int iBlock = row / BLOCK_SIZE;
1485         final int jBlock = column / BLOCK_SIZE;
1486         final int k = (row - iBlock * BLOCK_SIZE) * blockWidth(jBlock) +
1487             (column - jBlock * BLOCK_SIZE);
1488         blocks[iBlock * blockColumns + jBlock][k] *= factor;
1489     }
1490 
1491     /** {@inheritDoc} */
1492     @Override
1493     public BlockRealMatrix transpose() {
1494         final int nRows = getRowDimension();
1495         final int nCols = getColumnDimension();
1496         final BlockRealMatrix out = new BlockRealMatrix(nCols, nRows);
1497 
1498         // perform transpose block-wise, to ensure good cache behavior
1499         int blockIndex = 0;
1500         for (int iBlock = 0; iBlock < blockColumns; ++iBlock) {
1501             final int pStart = iBlock * BLOCK_SIZE;
1502             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, columns);
1503             for (int jBlock = 0; jBlock < blockRows; ++jBlock) {
1504                 // transpose current block
1505                 final double[] outBlock = out.blocks[blockIndex];
1506                 final double[] tBlock = blocks[jBlock * blockColumns + iBlock];
1507                 final int qStart = jBlock * BLOCK_SIZE;
1508                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, rows);
1509                 int k = 0;
1510                 for (int p = pStart; p < pEnd; ++p) {
1511                     final int lInc = pEnd - pStart;
1512                     int l = p - pStart;
1513                     for (int q = qStart; q < qEnd; ++q) {
1514                         outBlock[k] = tBlock[l];
1515                         ++k;
1516                         l+= lInc;
1517                     }
1518                 }
1519                 // go to next block
1520                 ++blockIndex;
1521             }
1522         }
1523 
1524         return out;
1525     }
1526 
1527     /** {@inheritDoc} */
1528     @Override
1529     public int getRowDimension() {
1530         return rows;
1531     }
1532 
1533     /** {@inheritDoc} */
1534     @Override
1535     public int getColumnDimension() {
1536         return columns;
1537     }
1538 
1539     /** {@inheritDoc} */
1540     @Override
1541     public double[] operate(final double[] v)
1542         throws MathIllegalArgumentException {
1543         if (v.length != columns) {
1544             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
1545                                                    v.length, columns);
1546         }
1547         final double[] out = new double[rows];
1548 
1549         // perform multiplication block-wise, to ensure good cache behavior
1550         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1551             final int pStart = iBlock * BLOCK_SIZE;
1552             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1553             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1554                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
1555                 final int qStart = jBlock * BLOCK_SIZE;
1556                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1557                 int k = 0;
1558                 for (int p = pStart; p < pEnd; ++p) {
1559                     double sum = 0;
1560                     int q = qStart;
1561                     while (q < qEnd - 3) {
1562                         sum += block[k]     * v[q]     +
1563                                block[k + 1] * v[q + 1] +
1564                                block[k + 2] * v[q + 2] +
1565                                block[k + 3] * v[q + 3];
1566                         k += 4;
1567                         q += 4;
1568                     }
1569                     while (q < qEnd) {
1570                         sum += block[k++] * v[q++];
1571                     }
1572                     out[p] += sum;
1573                 }
1574             }
1575         }
1576 
1577         return out;
1578     }
1579 
1580     /** {@inheritDoc} */
1581     @Override
1582     public double[] preMultiply(final double[] v)
1583         throws MathIllegalArgumentException {
1584         if (v.length != rows) {
1585             throw new MathIllegalArgumentException(LocalizedCoreFormats.DIMENSIONS_MISMATCH,
1586                                                    v.length, rows);
1587         }
1588         final double[] out = new double[columns];
1589 
1590         // perform multiplication block-wise, to ensure good cache behavior
1591         for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1592             final int jWidth  = blockWidth(jBlock);
1593             final int jWidth2 = jWidth  + jWidth;
1594             final int jWidth3 = jWidth2 + jWidth;
1595             final int jWidth4 = jWidth3 + jWidth;
1596             final int qStart = jBlock * BLOCK_SIZE;
1597             final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1598             for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1599                 final double[] block  = blocks[iBlock * blockColumns + jBlock];
1600                 final int pStart = iBlock * BLOCK_SIZE;
1601                 final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1602                 for (int q = qStart; q < qEnd; ++q) {
1603                     int k = q - qStart;
1604                     double sum = 0;
1605                     int p = pStart;
1606                     while (p < pEnd - 3) {
1607                         sum += block[k]           * v[p]     +
1608                                block[k + jWidth]  * v[p + 1] +
1609                                block[k + jWidth2] * v[p + 2] +
1610                                block[k + jWidth3] * v[p + 3];
1611                         k += jWidth4;
1612                         p += 4;
1613                     }
1614                     while (p < pEnd) {
1615                         sum += block[k] * v[p++];
1616                         k += jWidth;
1617                     }
1618                     out[q] += sum;
1619                 }
1620             }
1621         }
1622 
1623         return out;
1624     }
1625 
1626     /** {@inheritDoc} */
1627     @Override
1628     public double walkInRowOrder(final RealMatrixChangingVisitor visitor) {
1629         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1630         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1631             final int pStart = iBlock * BLOCK_SIZE;
1632             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1633             for (int p = pStart; p < pEnd; ++p) {
1634                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1635                     final int jWidth = blockWidth(jBlock);
1636                     final int qStart = jBlock * BLOCK_SIZE;
1637                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1638                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1639                     int k = (p - pStart) * jWidth;
1640                     for (int q = qStart; q < qEnd; ++q) {
1641                         block[k] = visitor.visit(p, q, block[k]);
1642                         ++k;
1643                     }
1644                 }
1645              }
1646         }
1647         return visitor.end();
1648     }
1649 
1650     /** {@inheritDoc} */
1651     @Override
1652     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor) {
1653         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1654         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1655             final int pStart = iBlock * BLOCK_SIZE;
1656             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1657             for (int p = pStart; p < pEnd; ++p) {
1658                 for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1659                     final int jWidth = blockWidth(jBlock);
1660                     final int qStart = jBlock * BLOCK_SIZE;
1661                     final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1662                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1663                     int k = (p - pStart) * jWidth;
1664                     for (int q = qStart; q < qEnd; ++q) {
1665                         visitor.visit(p, q, block[k]);
1666                         ++k;
1667                     }
1668                 }
1669              }
1670         }
1671         return visitor.end();
1672     }
1673 
1674     /** {@inheritDoc} */
1675     @Override
1676     public double walkInRowOrder(final RealMatrixChangingVisitor visitor,
1677                                  final int startRow, final int endRow,
1678                                  final int startColumn, final int endColumn)
1679         throws MathIllegalArgumentException {
1680         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1681         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1682         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1683             final int p0     = iBlock * BLOCK_SIZE;
1684             final int pStart = FastMath.max(startRow, p0);
1685             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1686             for (int p = pStart; p < pEnd; ++p) {
1687                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1688                     final int jWidth = blockWidth(jBlock);
1689                     final int q0     = jBlock * BLOCK_SIZE;
1690                     final int qStart = FastMath.max(startColumn, q0);
1691                     final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1692                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1693                     int k = (p - p0) * jWidth + qStart - q0;
1694                     for (int q = qStart; q < qEnd; ++q) {
1695                         block[k] = visitor.visit(p, q, block[k]);
1696                         ++k;
1697                     }
1698                 }
1699              }
1700         }
1701         return visitor.end();
1702     }
1703 
1704     /** {@inheritDoc} */
1705     @Override
1706     public double walkInRowOrder(final RealMatrixPreservingVisitor visitor,
1707                                  final int startRow, final int endRow,
1708                                  final int startColumn, final int endColumn)
1709         throws MathIllegalArgumentException {
1710         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1711         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1712         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1713             final int p0     = iBlock * BLOCK_SIZE;
1714             final int pStart = FastMath.max(startRow, p0);
1715             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1716             for (int p = pStart; p < pEnd; ++p) {
1717                 for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1718                     final int jWidth = blockWidth(jBlock);
1719                     final int q0     = jBlock * BLOCK_SIZE;
1720                     final int qStart = FastMath.max(startColumn, q0);
1721                     final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1722                     final double[] block = blocks[iBlock * blockColumns + jBlock];
1723                     int k = (p - p0) * jWidth + qStart - q0;
1724                     for (int q = qStart; q < qEnd; ++q) {
1725                         visitor.visit(p, q, block[k]);
1726                         ++k;
1727                     }
1728                 }
1729              }
1730         }
1731         return visitor.end();
1732     }
1733 
1734     /** {@inheritDoc} */
1735     @Override
1736     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor) {
1737         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1738         int blockIndex = 0;
1739         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1740             final int pStart = iBlock * BLOCK_SIZE;
1741             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1742             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1743                 final int qStart = jBlock * BLOCK_SIZE;
1744                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1745                 final double[] block = blocks[blockIndex];
1746                 int k = 0;
1747                 for (int p = pStart; p < pEnd; ++p) {
1748                     for (int q = qStart; q < qEnd; ++q) {
1749                         block[k] = visitor.visit(p, q, block[k]);
1750                         ++k;
1751                     }
1752                 }
1753                 ++blockIndex;
1754             }
1755         }
1756         return visitor.end();
1757     }
1758 
1759     /** {@inheritDoc} */
1760     @Override
1761     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor) {
1762         visitor.start(rows, columns, 0, rows - 1, 0, columns - 1);
1763         int blockIndex = 0;
1764         for (int iBlock = 0; iBlock < blockRows; ++iBlock) {
1765             final int pStart = iBlock * BLOCK_SIZE;
1766             final int pEnd   = FastMath.min(pStart + BLOCK_SIZE, rows);
1767             for (int jBlock = 0; jBlock < blockColumns; ++jBlock) {
1768                 final int qStart = jBlock * BLOCK_SIZE;
1769                 final int qEnd   = FastMath.min(qStart + BLOCK_SIZE, columns);
1770                 final double[] block = blocks[blockIndex];
1771                 int k = 0;
1772                 for (int p = pStart; p < pEnd; ++p) {
1773                     for (int q = qStart; q < qEnd; ++q) {
1774                         visitor.visit(p, q, block[k]);
1775                         ++k;
1776                     }
1777                 }
1778                 ++blockIndex;
1779             }
1780         }
1781         return visitor.end();
1782     }
1783 
1784     /** {@inheritDoc} */
1785     @Override
1786     public double walkInOptimizedOrder(final RealMatrixChangingVisitor visitor,
1787                                        final int startRow, final int endRow,
1788                                        final int startColumn,
1789                                        final int endColumn)
1790         throws MathIllegalArgumentException {
1791         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1792         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1793         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1794             final int p0     = iBlock * BLOCK_SIZE;
1795             final int pStart = FastMath.max(startRow, p0);
1796             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1797             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1798                 final int jWidth = blockWidth(jBlock);
1799                 final int q0     = jBlock * BLOCK_SIZE;
1800                 final int qStart = FastMath.max(startColumn, q0);
1801                 final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1802                 final double[] block = blocks[iBlock * blockColumns + jBlock];
1803                 for (int p = pStart; p < pEnd; ++p) {
1804                     int k = (p - p0) * jWidth + qStart - q0;
1805                     for (int q = qStart; q < qEnd; ++q) {
1806                         block[k] = visitor.visit(p, q, block[k]);
1807                         ++k;
1808                     }
1809                 }
1810             }
1811         }
1812         return visitor.end();
1813     }
1814 
1815     /** {@inheritDoc} */
1816     @Override
1817     public double walkInOptimizedOrder(final RealMatrixPreservingVisitor visitor,
1818                                        final int startRow, final int endRow,
1819                                        final int startColumn,
1820                                        final int endColumn)
1821         throws MathIllegalArgumentException {
1822         MatrixUtils.checkSubMatrixIndex(this, startRow, endRow, startColumn, endColumn);
1823         visitor.start(rows, columns, startRow, endRow, startColumn, endColumn);
1824         for (int iBlock = startRow / BLOCK_SIZE; iBlock < 1 + endRow / BLOCK_SIZE; ++iBlock) {
1825             final int p0     = iBlock * BLOCK_SIZE;
1826             final int pStart = FastMath.max(startRow, p0);
1827             final int pEnd   = FastMath.min((iBlock + 1) * BLOCK_SIZE, 1 + endRow);
1828             for (int jBlock = startColumn / BLOCK_SIZE; jBlock < 1 + endColumn / BLOCK_SIZE; ++jBlock) {
1829                 final int jWidth = blockWidth(jBlock);
1830                 final int q0     = jBlock * BLOCK_SIZE;
1831                 final int qStart = FastMath.max(startColumn, q0);
1832                 final int qEnd   = FastMath.min((jBlock + 1) * BLOCK_SIZE, 1 + endColumn);
1833                 final double[] block = blocks[iBlock * blockColumns + jBlock];
1834                 for (int p = pStart; p < pEnd; ++p) {
1835                     int k = (p - p0) * jWidth + qStart - q0;
1836                     for (int q = qStart; q < qEnd; ++q) {
1837                         visitor.visit(p, q, block[k]);
1838                         ++k;
1839                     }
1840                 }
1841             }
1842         }
1843         return visitor.end();
1844     }
1845 
1846     /**
1847      * Get the height of a block.
1848      * @param blockRow row index (in block sense) of the block
1849      * @return height (number of rows) of the block
1850      */
1851     private int blockHeight(final int blockRow) {
1852         return (blockRow == blockRows - 1) ? rows - blockRow * BLOCK_SIZE : BLOCK_SIZE;
1853     }
1854 
1855     /**
1856      * Get the width of a block.
1857      * @param blockColumn column index (in block sense) of the block
1858      * @return width (number of columns) of the block
1859      */
1860     private int blockWidth(final int blockColumn) {
1861         return (blockColumn == blockColumns - 1) ? columns - blockColumn * BLOCK_SIZE : BLOCK_SIZE;
1862     }
1863 
1864 }