cache simulator

Published on December 2016 | Categories: Documents | Downloads: 49 | Comments: 0 | Views: 461
of 4
Download PDF   Embed   Report

cache simulator

Comments

Content

void transpose_submit(int M, int N, int A[N][M], int B[M][N]) { REQUIRES(M > 0); REQUIRES(N > 0); if(M == 64 && N == 64) { transpose_6464(M,N,A,B); } else { transpose_6167(M,N,A,B); } ENSURES(is_transpose(M, N, A, B)); } char transpose_6464_desc[] = "64x64"; void transpose_6464(int M, int N, int A[N][M], int B[M][N]) { int i,j,ai,aj,k; //loop variables int t0,t1,t2,t3; //temp variables int blockSize = 8; for(i = 0; i < N; i+=blockSize) { for(j = 0; j < M; j+=blockSize) { //split each block into 4 segments: /*a|b c|d*/ //and we want to transpose to: /*a-1|c-1 b-1|d-1*/ //keep in mind we still need to transpose //inside each segment (notice the -1 notation) //segment a aj = j; for(ai = i; ai < i+(blockSize/2); ai++) { //store A 1 x blockSize/2 into variables t0 = A[ai][aj]; t1 = A[ai][aj+1]; t2 = A[ai][aj+2]; t3 = A[ai][aj+3]; /*move into B, this way you keep A in the cache and don't loose it after the first few mov es from A to B*/ B[aj][ai] = B[aj+1][ai] B[aj+2][ai] B[aj+3][ai] } //segment b aj = j+(blockSize/2); for(ai = i; ai < i+(blockSize/2); ai++) { //store A 1 x blockSize/2 into variables t0 = A[ai][aj]; t1 = A[ai][aj+1]; t2 = A[ai][aj+2]; t3 = A[ai][aj+3]; /*move into B, this way you keep A in the t0; = t1; = t2; = t3;

cache and don't loose it after the first few mov es from A to B*/ B[aj-(blockSize/2)][ai+(blockSize/2)] = B[aj-(blockSize/2)+1][ai+(blockSize/2)] B[aj-(blockSize/2)+2][ai+(blockSize/2)] B[aj-(blockSize/2)+3][ai+(blockSize/2)] } /*Notice that above we moved segment b into the upper ri ght corner of the block, This is because later when we move segment b into the up per right corner, we will also move segment b into the bottom left corner, keep in mind b is already transposed just not in the cor rect place, but since segment b is already transposed we move it int o the bottom right corner row by row and segment c column by column from matrix A, so this wi ll transpose c while we are moving it into the top right corner of the block in matrix B*/ /*This is better because we won't be jumping around if w e were to move segment b into the bottom right corner of the block in matrix B and then mo ving c into the top right corner of the block in matrix B*/ //segment b & c for(k = 0; k < (blockSize/2); k++) { //store 1 x blockSize/2 into temp variables t0 = B[j+k][i+(blockSize/2)]; t1 = B[j+k][i+(blockSize/2)+1]; t2 = B[j+k][i+(blockSize/2)+2]; t3 = B[j+k][i+(blockSize/2)+3]; //move C into the upper right corner of block in B while transposing it B[j+k][i+(blockSize/2)] = A[i+(blockSize/2)][j+k ]; B[j+k][i+(blockSize/2)+1] = A[i+(blockSize/2)+1] [j+k]; B[j+k][i+(blockSize/2)+2] = A[i+(blockSize/2)+2] [j+k]; B[j+k][i+(blockSize/2)+3] = A[i+(blockSize/2)+3] [j+k]; //move the temp variables into the bottom left c orner, without transposing it B[j+(blockSize/2)+k][i] = B[j+(blockSize/2)+k][i+1] B[j+(blockSize/2)+k][i+2] B[j+(blockSize/2)+k][i+3] } //segment d aj = j+(blockSize/2); for(ai = i+(blockSize/2); ai < i+blockSize; ai++) { //store A 1 x blockSize/2 into variables t0 = A[ai][aj]; t1 = A[ai][aj+1]; t0; = t1; = t2; = t3; t0; = t1; = t2; = t3;

t2 = A[ai][aj+2]; t3 = A[ai][aj+3]; /*move into B, this way you keep A in the cache and don't loose it after the first few mov es from A to B*/ B[aj][ai] = B[aj+1][ai] B[aj+2][ai] B[aj+3][ai] } } } } char transpose_3232_desc[] = "32x32"; void transpose_3232(int M, int N, int A[N][M], int B[M][N]) { int dn, m, n; //loop variables int t0, t1, t2, t3, t4, t5, t6, t7; //temp variables int blockSize = 8; for(n = 0; n < N; n += blockSize) { for(m = 0; m < M; m += blockSize) { for(dn = n; dn < n+blockSize; dn++) { //store A 1 x blockSize into variables t0 = A[dn][m]; t1 = A[dn][m+1]; t2 = A[dn][m+2]; t3 = A[dn][m+3]; t4 = A[dn][m+4]; t5 = A[dn][m+5]; t6 = A[dn][m+6]; t7 = A[dn][m+7]; /*move into B, this way you keep A in the cache and don't loose it after the first few mov es from A to B*/ B[m][dn] = B[m+1][dn] B[m+2][dn] B[m+3][dn] B[m+4][dn] B[m+5][dn] B[m+6][dn] B[m+7][dn] } } } } char transpose_6167_desc[] = "61x67"; void transpose_6167(int M, int N, int A[N][M], int B[M][N]) { int dn, m, n; //loop variables int t0, t1, t2, t3, t4, t5, t6, t7; //temp variables int blockSize = 8; //oversize N and M to the nearest multiple of the blockSize t0; = t1; = t2; = t3; = t4; = t5; = t6; = t7; t0; = t1; = t2; = t3;

int overN = N+(blockSize-(N%blockSize)); int overM = M+(blockSize-(M%blockSize)); /*we let the blocking "overflow" and block by 8*/ for(n = 0; n < overN; n += blockSize) { for(m = 0; m < overM; m += blockSize) { for(dn = n; dn < n+blockSize; dn++) { if(dn < N) { //store A 1 x blockSize into variables if(m < M) {t0 = A[dn][m];} if(m+1 < M) {t1 = A[dn][m+1];} if(m+2 < M) {t2 = A[dn][m+2];} if(m+3 < M) {t3 = A[dn][m+3];} if(m+4 < M) {t4 = A[dn][m+4];} //make sure you aren't accessing something out o f range if(m+5 < M) {t5 = A[dn][m+5];} if(m+6 < M) {t6 = A[dn][m+6];} if(m+7 < M) {t7 = A[dn][m+7];} /*move into B, this way you keep A in the cache and don't loose it after the first few mov es from A to B*/ if(m < if(m+1 if(m+2 if(m+3 if(m+5 range if(m+5 < M) {B[m+5][dn] = t5;} if(m+6 < M) {B[m+6][dn] = t6;} if(m+7 < M) {B[m+7][dn] = t7;} } } } M) {B[m][dn] = t0;} < M) {B[m+1][dn] = t1;} < M) {B[m+2][dn] = t2;} < M) {B[m+3][dn] = t3;} < M) {B[m+4][dn] = t4;}

//make sure you aren't writing when A was out of

Sponsor Documents

Or use your account on DocShare.tips

Hide

Forgot your password?

Or register your new account on DocShare.tips

Hide

Lost your password? Please enter your email address. You will receive a link to create a new password.

Back to log-in

Close