实现一个缓存模拟器,读取给定的 trace 文件,输出对应的hit,miss,eviction次数
valid_bit是行的有效位,tag是标记位,lru表示最近最久未使用,lru越大,越久未使用,需要牺牲行时牺牲lru最大的一行
typedef struct
{
int valid_bit;
unsigned long tag;
int lru;
} Cache_line;
typedef struct
{
Cache_line* lines;
} Cache_set;
typedef struct
{
int S;
int E;
Cache_set* sets;
} Cache;
根据S,E,b初始化cache
void init_cache(int S, int E, int b, Cache* cache)
{
cache->S = S;
cache->E = E;
cache->sets = (Cache_set*) malloc(S * sizeof(Cache_set));
for (int i = 0; i < S; i++)
{
cache->sets[i].lines = (Cache_line*) malloc(E * sizeof(Cache_line));
for (int j = 0; j < E; j++)
{
cache->sets[i].lines[j].valid_bit = 0;
cache->sets[i].lines[j].time = 0;
}
}
}
命中需要满足两个条件
int get_hit_index(Cache *cache, int set_index, int tag)
{
for (int i = 0; i < cache->E; i++)
{
if (cache->sets[set_index].lines[i].valid_bit == 1 && cache->sets[set_index].lines[i].tag == tag)
{
return i;
}
}
return -1;
}
若命中则返回命中的索引,否则返回-1
遍历所有行,只要有一行有效位为0则该组未满。
int get_empty_index(Cache *cache, int set_index, int tag)
{
for (int i = 0; i < cache->E; ++i)
{
if (cache->sets[set_index].lines[i].valid_bit == 0)
{
return i;
}
}
return -1;
}
若满则返回-1,未满则返回该行索引
void load(Cache *cache, int set_index, int tag, int verbose)
{
int hit_index = get_hit_index(cache, set_index, tag);
if (hit_index < 0)
{
miss++;
int empty_index = get_empty_index(cache, set_index, tag);
if (verbose)
{
printf("miss ");
}
if (empty_index < 0)
{
eviction++;
if (verbose)
{
printf("eviction ");
}
//比较lru大小
int max_index = 0;
int max = cache->sets[set_index].lines[0].lru;
for (int i = 1; i < cache->E; i++)
{
if (cache->sets[set_index].lines[i].lru > max)
{
max_index = i;
max = cache->sets[set_index].lines[i].lru;
}
}
//牺牲行
cache->sets[set_index].lines[max_index].valid_bit = 1;
cache->sets[set_index].lines[max_index].tag = tag;
cache->sets[set_index].lines[max_index].lru = 0;
}
else
{
for (int i = 0; i < cache->E; i++)
{
//把空行的有效位设为1,标志位设置为请求地址的标记位,lru设为0,其他行的lru+1
if (i == empty_index)
{
cache->sets[set_index].lines[i].valid_bit = 1;
cache->sets[set_index].lines[i].tag = tag;
cache->sets[set_index].lines[i].lru = 0;
}
else
{
cache->sets[set_index].lines[i].lru++;
}
}
}
}
else
{
hit++;
if (verbose)
{
printf("hit ");
}
for (int i = 0; i < cache->E; i++)
{
//命中行的lru改为0,其他行的lru+1
if (i == hit_index)
{
cache->sets[set_index].lines[i].lru = 0;
}
else
{
cache->sets[set_index].lines[i].lru++;
}
}
}
}
//store与load相同
void store(Cache *cache, int set_index, int tag, int verbose)
{
load(cache, set_index, tag, verbose);
}
//modify相当于一次载入+一次储存
void modify(Cache *cache, int set_index, int tag, int verbose)
{
load(cache, set_index, tag, verbose);
store(cache, set_index, tag, verbose);
}
void read_file(int s, int E, int b, char* file, Cache* cache, int verbose)
{
FILE *p = fopen(file, "r");
char opt;
unsigned long address;
int size;
while (fscanf(p, " %c %lx,%d", &opt, &address, &size) > 0)
{
if (opt == 'I')
{
continue;
}
else
{
int tag = address >> (b + s);
int set_index = (address >> b) & ((1 << s) - 1);
if (verbose == 1)
{
printf("%c %lx,%d ", opt, address, size);
}
if (opt == 'S')
{
store(cache, set_index, tag, verbose);
}
if (opt == 'M')
{
modify(cache, set_index, tag, verbose);
}
if (opt == 'L')
{
load(cache, set_index, tag, verbose);
}
if (verbose == 1)
{
printf("\n");
}
}
}
}
main函数需要读取命令参数,使用getopt实现
int main(int argc, char *argv[])
{
int s, S, E, b;
char file[20];
int verbose = 0;
Cache cache;
int ch;
while ((ch = getopt(argc, argv, "vs:E:b:t:")) != -1)
{
switch (ch)
{
case 'v':
verbose = 1;
break;
case 's':
s = atoi(optarg);
S = pow(2, s);
break;
case 'E':
E = atoi(optarg);
break;
case 'b':
b = atoi(optarg);
break;
case 't':
strcpy(file, optarg);
break;
default:
break;
}
}
init_cache(S, E, b, &cache);
read_file(s, E, b, file, &cache, verbose);
printSummary(hit, miss, eviction);
return 0;
}
#include "cachelab.h"
#include <stdio.h>
#include <stdlib.h>
#include <getopt.h>
#include <string.h>
#include <unistd.h>
#include <math.h>
typedef struct
{
int valid_bit;
unsigned long tag;
int lru;
} Cache_line;
typedef struct
{
Cache_line* lines;
} Cache_set;
typedef struct
{
int S;
int E;
Cache_set* sets;
} Cache;
int hit = 0;
int miss = 0;
int eviction = 0;
void init_cache(int S, int E, int b, Cache* cache)
{
cache->S = S;
cache->E = E;
cache->sets = (Cache_set*) malloc(S * sizeof(Cache_set));
for (int i = 0; i < S; i++)
{
cache->sets[i].lines = (Cache_line*) malloc(E * sizeof(Cache_line));
for (int j = 0; j < E; j++)
{
cache->sets[i].lines[j].valid_bit = 0;
cache->sets[i].lines[j].lru = 0;
}
}
}
int get_hit_index(Cache *cache, int set_index, int tag)
{
for (int i = 0; i < cache->E; i++)
{
if (cache->sets[set_index].lines[i].valid_bit == 1 && cache->sets[set_index].lines[i].tag == tag)
{
return i;
}
}
return -1;
}
int get_empty_index(Cache *cache, int set_index, int tag)
{
for (int i = 0; i < cache->E; ++i)
{
if (cache->sets[set_index].lines[i].valid_bit == 0)
{
return i;
}
}
return -1;
}
void load(Cache *cache, int set_index, int tag, int verbose)
{
int hit_index = get_hit_index(cache, set_index, tag);
if (hit_index < 0)
{
miss++;
int empty_index = get_empty_index(cache, set_index, tag);
if (verbose)
{
printf("miss ");
}
if (empty_index < 0)
{
eviction++;
if (verbose)
{
printf("eviction ");
}
int max_index = 0;
int max = cache->sets[set_index].lines[0].lru;
for (int i = 1; i < cache->E; i++)
{
if (cache->sets[set_index].lines[i].lru > max)
{
max_index = i;
max = cache->sets[set_index].lines[i].lru;
}
}
cache->sets[set_index].lines[max_index].valid_bit = 1;
cache->sets[set_index].lines[max_index].tag = tag;
cache->sets[set_index].lines[max_index].lru = 0;
}
else
{
for (int i = 0; i < cache->E; i++)
{
if (i == empty_index)
{
cache->sets[set_index].lines[i].valid_bit = 1;
cache->sets[set_index].lines[i].tag = tag;
cache->sets[set_index].lines[i].lru = 0;
}
else
{
cache->sets[set_index].lines[i].lru++;
}
}
}
}
else
{
hit++;
if (verbose)
{
printf("hit ");
}
for (int i = 0; i < cache->E; i++)
{
if (i == hit_index)
{
cache->sets[set_index].lines[i].lru = 0;
}
else
{
cache->sets[set_index].lines[i].lru++;
}
}
}
}
void store(Cache *cache, int set_index, int tag, int verbose)
{
load(cache, set_index, tag, verbose);
}
void modify(Cache *cache, int set_index, int tag, int verbose)
{
load(cache, set_index, tag, verbose);
store(cache, set_index, tag, verbose);
}
void read_file(int s, int E, int b, char* file, Cache* cache, int verbose)
{
FILE *p = fopen(file, "r");
char opt;
unsigned long address;
int size;
while (fscanf(p, " %c %lx,%d", &opt, &address, &size) > 0)
{
if (opt == 'I')
{
continue;
}
else
{
int tag = address >> (b + s);
int set_index = (address >> b) & ((1 << s) - 1);
if (verbose == 1)
{
printf("%c %lx,%d ", opt, address, size);
}
if (opt == 'S')
{
store(cache, set_index, tag, verbose);
}
if (opt == 'M')
{
modify(cache, set_index, tag, verbose);
}
if (opt == 'L')
{
load(cache, set_index, tag, verbose);
}
if (verbose == 1)
{
printf("\n");
}
}
}
}
int main(int argc, char *argv[])
{
int s, S, E, b;
char file[20];
int verbose = 0;
Cache cache;
int ch;
while ((ch = getopt(argc, argv, "vs:E:b:t:")) != -1)
{
switch (ch)
{
case 'v':
verbose = 1;
break;
case 's':
s = atoi(optarg);
S = pow(2, s);
break;
case 'E':
E = atoi(optarg);
break;
case 'b':
b = atoi(optarg);
break;
case 't':
strcpy(file, optarg);
break;
default:
break;
}
}
init_cache(S, E, b, &cache);
read_file(s, E, b, file, &cache, verbose);
printSummary(hit, miss, eviction);
return 0;
}
gg,差1
找了半天找不出来问题出在哪
我选择死亡
编写一个函数,计算给定矩阵的转置,最小化模拟缓存中的未命中数
测试矩阵的尺寸为32*32、64*64、61*67
测试缓存 s = 5,E = 1,b = 5
32组,一组1行,一个块32字节,存放8个int
for (int i = 0; i < M; i++)
{
for (int j = 0; j < N; j++)
{
B[j][i] = A[i][j];
}
}
矩阵一行32个int,数字代表数据储存的组,缓存32组,可以储存8*32个int,8行填满一个cache,每隔8行出现冲突,造成冲突不命中
把矩阵分成16个8*8的小块,分别转置
if ( M == 32 && N == 32)
{
for (int i = 0; i < N; i += 8)
{
for (int j = 0; j < M; j += 8)
{
for (int k = i; k < i + 8; k++)
{
int temp0 = A[k][j];
int temp1 = A[k][j+1];
int temp2 = A[k][j+2];
int temp3 = A[k][j+3];
int temp4 = A[k][j+4];
int temp5 = A[k][j+5];
int temp6 = A[k][j+6];
int temp7 = A[k][j+7];
B[j][k] = temp0;
B[j+1][k] = temp1;
B[j+2][k] = temp2;
B[j+3][k] = temp3;
B[j+4][k] = temp4;
B[j+5][k] = temp5;
B[j+6][k] = temp6;
B[j+7][k] = temp7;
}
}
}
}
一行64个int,4行填满一个cache,每隔4行出现冲突,造成冲突不命中
分成4*4的试试
if ( M == 64 && N == 64)
{
for (int i = 0; i < N; i += 4)
{
for (int j = 0; j < M; j += 4)
{
for (int k = i; k < i + 4; k++)
{
int temp0 = A[k][j];
int temp1 = A[k][j+1];
int temp2 = A[k][j+2];
int temp3 = A[k][j+3];
B[j][k] = temp0;
B[j+1][k] = temp1;
B[j+2][k] = temp2;
B[j+3][k] = temp3;
}
}
}
}
miss应该在1300以下,1699,gg
不可能用16*16的,还是要用8*8的搞,转置时还是要分成4*4,俺寻思得把8*8分成4个4*4。
然后就不会了
记录一个对b操作,先转置再平移的做法
for (i = 0; i < N; i += 8)
{
for (j = 0; j < M; j += 8)
{
for (k = i; k < i + 4; k++)
{
a0 = A[k][j];
a1 = A[k][j + 1];
a2 = A[k][j + 2];
a3 = A[k][j + 3];
a4 = A[k][j + 4];
a5 = A[k][j + 5];
a6 = A[k][j + 6];
a7 = A[k][j + 7];
B[j][k] = a0;
B[j + 1][k] = a1;
B[j + 2][k] = a2;
B[j + 3][k] = a3;
B[j][k + 4] = a4;
B[j + 1][k + 4] = a5;
B[j + 2][k + 4] = a6;
B[j + 3][k + 4] = a7;
}
for (l = j + 4; l < j + 8; l++)
{
a4 = A[i + 4][l - 4]; // A left-down col
a5 = A[i + 5][l - 4];
a6 = A[i + 6][l - 4];
a7 = A[i + 7][l - 4];
a0 = B[l - 4][i + 4]; // B right-above line
a1 = B[l - 4][i + 5];
a2 = B[l - 4][i + 6];
a3 = B[l - 4][i + 7];
B[l - 4][i + 4] = a4; // set B right-above line
B[l - 4][i + 5] = a5;
B[l - 4][i + 6] = a6;
B[l - 4][i + 7] = a7;
B[l][i] = a0; // set B left-down col
B[l][i + 1] = a1;
B[l][i + 2] = a2;
B[l][i + 3] = a3;
B[l][i + 4] = A[i + 4][l];
B[l][i + 5] = A[i + 5][l];
B[l][i + 6] = A[i + 6][l];
B[l][i + 7] = A[i + 7][l];
}
}
}
非对称矩阵,相差4行不一定冲突,直接分块
试了一下8*8,miss超出2000,16*16可以
if ( M ==61 && N == 67)
{
for (int i = 0; i < N; i += 16)
{
for (int j = 0; j < M; j += 16)
{
for (int k = i; k < i + 16 && k < 67; k++)
{
for (int l = j; l < j + 16 && l < 61; l++)
{
B[l][k] = A[k][l];
}
}
}
}
}
本作品采用知识共享署名-非商业性使用-禁止演绎 4.0 国际许可协议(CC BY-NC-ND 4.0)进行许可。
This work is licensed under the Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International License (CC BY-NC-ND 4.0).