matrix transpost in C blocking code example
Example: matrix transpose tiling
const int N_r = 56;
const int N_c = 75;
const int TILE_DIM = 16;
const int outer_Dimc = (N_c - 1) / TILE_DIM + 1;
const int outer_Dimr = (N_r - 1) / TILE_DIM + 1;
int** dest;
int** src;
//
// (0) Outer loops to iterate over tiles
//
for (int by = 0; by < outer_Dimr; ++by) {
for (int bx = 0; bx < outer_Dimc; ++bx) {
//
// (1) Loops to iterate over tile entries
//
for (int ty = 0; ty < TILE_DIM; ++ty) {
for (int tx = 0; tx < TILE_DIM; ++tx) {
int col = bx * TILE_DIM + tx; // Matrix column index
int row = by * TILE_DIM + ty; // Matrix row index
// Bounds check
if (row < N_r && col < N_c) {
dest[col][row] = src[row][col];
}
}
}
}
}