I don't understand why my code for the fully associative cache doesn't match the trace files that I'm given.
The parameters are each cache line is 32 bytes and the total cache size is 16KB.
My implementations for set associative caches of 2,4,8,and 16 all work perfectly (using least recently used replacement policy). But for fully associative, which could also just be described as a set associative of 32, is VERY close to the trace file but not quite. Frankly, I don't know how to debug this one since there's a vast amount of steps (at least the way I did it)
Here's the relevant parts of my code (excuse the inefficiency)
//Fully Associative
int **fullyAssoc;
fullyAssoc = new int*[64]; //where fullyAssoc[0][index] is way 0, fullyAssoc[2][index] is way 1 etc..
int **LRU32;
LRU32 = new int*[32];
for (int i = 0; i < 64; ++i){ //Initialize all entries in fullyAssoc to 0
fullyAssoc[i] = new int[16 * CACHE_LINE / 32];
}
for (int i = 0; i < 16; i++){ //Initialize LRU array
LRU32[0][i] = 0;
LRU32[1][i] = 1;
LRU32[2][i] = 2;
LRU32[3][i] = 3;
LRU32[4][i] = 4;
LRU32[5][i] = 5;
LRU32[6][i] = 6;
LRU32[7][i] = 7;
LRU32[8][i] = 8;
LRU32[9][i] = 9;
LRU32[10][i] = 10;
LRU32[11][i] = 11;
LRU32[12][i] = 12;
LRU32[13][i] = 13;
LRU32[14][i] = 14;
LRU32[15][i] = 15;
LRU32[16][i] = 16;
LRU32[17][i] = 17;
LRU32[18][i] = 18;
LRU32[19][i] = 19;
LRU32[20][i] = 20;
LRU32[21][i] = 21;
LRU32[22][i] = 22;
LRU32[23][i] = 23;
LRU32[24][i] = 24;
LRU32[25][i] = 25;
LRU32[26][i] = 26;
LRU32[27][i] = 27;
LRU32[28][i] = 28;
LRU32[29][i] = 29;
LRU32[30][i] = 30;
LRU32[31][i] = 31;
}
int fullyAssocLRU = 0;
int memCount = 0;
while(getline(fileIn, line)){
stringstream s(line);
s >> instruction >> hex >> address;
int indexFull;
int tagFull;
unsigned long long address, addressFull;
address = address >> 5; //Byte offset
addressFull = address;
indexFull = addressFull % 16;
tagFull = addressFull >> 4;
if (assocCache(fullyAssoc, indexFull, 32, tagFull, LRU32) == 1){
fullyAssocLRU++;
}
}
void LRU_update(int **lru, int index, int way, int ways){
int temp = 0;
int temp2[ways];
int temp_index = 0;
int i = 0;
while(i < ways){
if (lru[i][index] == way/2){
temp = lru[i][index];
i++;
continue;
}
else{
temp2[temp_index] = lru[i][index];
temp_index++;
}
i++;
}
for (int j = 0; j < ways - 1; j++){
lru[j][index] = temp2[j];
}
lru[ways - 1][index] = temp;
}
bool assocCache(int **block, int index, int ways, int tag, int **lru){
bool retVal = false;
for(int i = 0; i < 2*ways; i = i + 2){
if (block[i][index] == 0){
block[i][index] = 1;
block[i+1][index] = tag;
LRU_update(lru, index, i, ways);
return retVal;
}
else{
if (block[i+1][index] == tag){
retVal = true;
LRU_update(lru, index, i, ways);
return retVal;
}
else{
continue;
}
}
}
int head = 2 * lru[0][index];
block[head][index] = 1;
block[head+1][index] = tag;
LRU_update(lru, index, head, ways);
return retVal;
}
The trace files is supposed to be:
837589,1122102; 932528,1122102; 972661,1122102; 1005547,1122102; //For direct mapped
993999,1122102; 999852,1122102; 999315,1122102; 1000092,1122102; //For set associative
1000500,1122102; //For fully associative (LRU)
My output is:
837589,1122102; 932528,1122102; 972661,1122102; 1005547,1122102;
939999,1122102; 999852,1122102; 999315,1122102; 1000092,1122102;
1000228,1122102;
As you can see, for the fully associative one, it's only 272 off the correct output. Why would it be off when switching from 16 ways to 32 ways?
Ah, I mistakenly though a fully associative cache for a 32 line size cache of 16KB cache size is 32 ways, when it's actually 512 ways.
Related
I am not able to utilize FF_CONSTANT force effect. My try code is:
struct ff_effect joy_effect_, joy_effect_2;
if (iwantconstantforce)
{
joy_effect_.id = -1;
joy_effect_.type = FF_CONSTANT;
joy_effect_.direction = 0x0000; // down
joy_effect_.replay.length = 100;
joy_effect_.replay.delay = 0;
joy_effect_.trigger.button = 0;
joy_effect_.trigger.interval = 100;
joy_effect_.u.constant.level = 65535;
joy_effect_.u.constant.envelope.attack_length = joy_effect_.replay.length / 10;
joy_effect_.u.constant.envelope.fade_length = joy_effect_.replay.length / 10;
joy_effect_.u.constant.envelope.attack_level = joy_effect_.u.constant.level / 10;
joy_effect_.u.constant.envelope.fade_level = joy_effect_.u.constant.level / 10;
}
I am able to produce FF_SPRING and FF_DAMPER effects with following codes.
if (youwantdampereffect)
{
joy_effect_.id = -1;
joy_effect_.direction = 0; // down
joy_effect_.type = FF_DAMPER;
joy_effect_.replay.length = 20;
joy_effect_.replay.delay = 0;
joy_effect_.u.condition[0].right_saturation = 65535;
joy_effect_.u.condition[0].left_saturation = 65535;
joy_effect_.u.condition[0].right_coeff = 65535 / 2;
joy_effect_.u.condition[0].left_coeff = 65535 / 2;
joy_effect_.u.condition[0].deadband = 0;
joy_effect_.u.condition[0].center = 0;
int ret = ioctl(ff_fd_, EVIOCSFF, &joy_effect_); // upload the effect
}
if (youwantspringeffect)
{
joy_effect_2.id = -1;
joy_effect_2.direction = 0; // down
joy_effect_2.type = FF_SPRING;
joy_effect_2.replay.length = 20;
joy_effect_2.replay.delay = 0;
joy_effect_2.u.condition[0].right_saturation = 65535 / 2;
joy_effect_2.u.condition[0].left_saturation = 65535 / 2;
joy_effect_2.u.condition[0].right_coeff = 32767;
joy_effect_2.u.condition[0].left_coeff = 32767;
joy_effect_2.u.condition[0].deadband = 0;
joy_effect_2.u.condition[0].center = 0;
int ret = ioctl(ff_fd_, EVIOCSFF, &joy_effect_2); // upload the effect
}
I do not find any info about what is constant force effect feels like or when it makes sense to use it.
Can somebody brief its importance and usage?
Thanks :)
I'm trying to use mnist dataset for neural networks but im getting a Access violation writing location 0x00000000
the code is
for (int i = 0; i < length; i++) {
innerarray = (int8_t*)malloc(width * height);
for (int j = 0; j < width * height; j++) {
int8_t value = 0;
innerarray[j] = value;
}
temparray[i] = innerarray;
}
for (int i = 0; i < length; i++) {
for (int j = 0; j < width * height; j++) {
int8_t grayscale;
rf.read((char*)&grayscale, 1);
temparray[i][j] = grayscale; //error happens here
}
}
variable values:
int length = 10000;
int width = 28;
int height = 28;
The weird thing is it only happen when i >= 2512. Also replacing grayscale with 0 doesn't work. I can hower set temparray[2512][0] to 0 before the last nested for loop.
Like this:
for (int i = 0; i < length; i++) {
innerarray = (int8_t*)malloc(width * height);
for (int j = 0; j < width * height; j++) {
int8_t value = 0;
innerarray[j] = value;
}
temparray[i] = innerarray;
}
temparray[2512][0] = 0; //works
for (int i = 0; i < length; i++) {
for (int j = 0; j < width * height; j++) {
int8_t grayscale;
rf.read((char*)&grayscale, 1);
temparray[i][j] = 0; //error still happens here
}
}
The full code is:
#include<iostream>
#include<fstream>
#include<cstdint>
#include<cstdlib>
#include<array>
using namespace std;
struct images {
int32_t height = 0;
int32_t width = 0;
int32_t magicnumber = 0;
int32_t numberofimages = 0;
int8_t** images[];
void setimages(int8_t** newimages) {
delete[] this->images;
int8_t** images = (int8_t**)malloc(numberofimages);
int8_t* innerarray;
for (int i = 0; i < numberofimages; i++) {
innerarray = (int8_t*)malloc(width * height);
images[i] = innerarray;
}
for (int i = 0; i < numberofimages; i++) {
for (int j = 0; j < width * height; j++) {
images[i][j] = newimages[i][j];
}
}
};
};
struct labels {
int32_t magicnumber = 0;
int32_t numberoflabels = 0;
int8_t labels[];
};
int32_t litleendiantobig(int32_t litle) {//reverse works as well
int32_t big = ((4278190080 & litle) >> 24) + ((255 & litle) << 24) + ((16711680 & litle) >> 8) + ((65280 & litle) << 8);
return big;
}
images loadimages(string filename, int32_t magicalnumber) {
ifstream rf(filename, ios::out | ios::binary);
if (!rf) {
cout << "Cannot open file! " << filename << endl;
exit(1);
}
int32_t magicnumberoffile;
rf.read((char*)&magicnumberoffile, 4);
magicnumberoffile = litleendiantobig(magicnumberoffile);
if (magicalnumber != magicnumberoffile) {
cout << "Wrong magic number!" << endl;
cout << "expected:" << magicalnumber << endl;
cout << "got:" << magicnumberoffile << endl;
exit(1);
}
images img;
int32_t length;
rf.read((char*)&length, 4);
length = litleendiantobig(length);
img.numberofimages = length;
int32_t width;
rf.read((char*)&width, 4);
width = litleendiantobig(width);
img.width = width;
int32_t height;
rf.read((char*)&height, 4);
height = litleendiantobig(height);
img.height = height;
int8_t** temparray = (int8_t**)malloc(length);
int8_t* innerarray;
for (int i = 0; i < length; i++) {
innerarray = (int8_t*)malloc(width * height);
for (int j = 0; j < width * height; j++) {
int8_t value = 0;
innerarray[j] = value;
}
temparray[i] = innerarray;
}
for (int i = 0; i < length; i++) {
for (int j = 0; j < width * height; j++) {
int8_t grayscale;
rf.read((char*)&grayscale, 1);
temparray[i][j] = grayscale; //error happens here
}
}
img.setimages(temparray);
rf.close();
return img;
}
int main() {
images testimages;
loadimages("t10k-images.bin", 2051);
cout << testimages.images;
return 0;
}
I don't now how to solve the problem and can't find it anywhere else. Thanks for helping me out.
Your using malloc has done you in.
int* array = (int*)malloc(width* height); // allocate width * height bytes.
array[i] = x; // Sets the [i] _integer_ of array to x.
// but you allocated space for BYTE size elemennts.
The correct way to allocate integers using malloc:
int* array = (int*)malloc(width* height * sizeof(int)); // allocate width * height ints
Either that or your original intent was to allocate 8 bit pixels. In that case, your pointers should be declared as unsigned char*.
In either case, when coding in C++, types are important, and using operator new to allocate your arrays would have saved you from these troubles.
I want to solve some algorithm problem.
Could you suggest any algorithms working more fast?
*Problem summary
- Find same array of key[200] is same as source array KEY[200]
- Each element of KEY[200] array is random numbers range 1~255
- only 2 file are given.
- You must implement just function find_array() of user_code.cpp
- It is not allowed to edit any other things
- You can use check() function for finding array
- test case is 50, time limit is 10 sec for 50 test case, memory limit is 256MB.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
extern void find_array(unsigned char key[200]);
unsigned char KEY[200];
int check(unsigned char key[200])
{
int pos = 0;
int equal = 0;
for (int c = 0; c < 200; c++)
{
if (key[c] == KEY[c])
pos++;
}
for (int c1 = 0; c1 < 200; c1++)
{
for (int c2 = 0; c2 < 200; c2++)
{
if(key[c1] == KEY[c2])
equal++;
}
}
return pos * 256 + equal;
}
int main()
{
for (int t = 0; t < 1; t++) //test case 50개
{
for (int i = 0; i < 200; i++)
{
KEY[i] = rand() % 255 + 1; //1~255
}
unsigned char key[200] = { 0, };
find_array(key); //you must implement this function
}
return 0;
}
//user_code.cpp
extern int check(unsigned char key[200]);
//you must implement this function
//below is my code take a long time(about 2sec for each case)
void find_array(unsigned char key[200])
{
unsigned char temp[200];
int result, pos, equal;
for (int k = 0; k < 200; k++)
temp[k] = 0;
for (int i = 0; i < 200; i++)
{
for (int val = 1; val <= 255; val++)
{
temp[i] = val;
result = check(temp);
equal = result % 256;
pos = (result - equal) / 256;
if (pos >= 1)
{
key[i] = val;
temp[i] = 0;
break;
}
}
}
}
I'm having some trouble with multiplying an array (char array in this particular case) by a value.
My code looks like this:
char* tab1 = copy("11");
char t = '2';
int length = strlen(tab1) + 2;
char*result = populate('0', length);
int p_length = strlen(tab1);
for (int j = p_length - 1; j >= 0; j--) {
char* tmp = multiply_chars(tab1[j], t);
v_shove(tmp, j);
char* tmp2 = add_tables(result, tmp);
delete[] result;
result = tmp2;
delete[] tmp;
}
cout << result << endl;
delete[] result;
delete[] tab1;
None of the methods used (that's populate, multiply_chars and add_tables) causes a leak when ran in an infinite loop. I've narrowed the leak to the
char* tmp2 = add_tables(result, tmp);
delete[] result;
result = tmp2;
part, but have no idea why it would happen.
I check for leaks by running snippets in an infinite loop and checking memory usage.
Any help would be appreciated! If need be I'll post the code of the methods used, but decided not to for the sake of brevity here. They all return new cstrings. Also, the t2 variable is there from when I was checking the array by array multiplication, which also leaked - decided to do array by value multiplication first.
(Now, to be completely honest this is one of the methods required for a school project, but it's such a miniscule part of it, that I thought it wouldn't hurt if I asked - the teacher isn't really big on helping with particular code problems)
The functions are:
char * add_tables(const char * table1, const char * table2)
{
char* tmp1 = get_string_trailing("0",table1);
char* tmp2 = get_string_trailing("0", table2);
int l1 = strlen(tmp1), l2 = strlen(tmp2);
if (l1 != l2) {
if (l1 > l2) {
char* t = resize_string(tmp2, l1 - l2, '0');
delete[] tmp2;
tmp2 = t;
}
else {
char* t = resize_string(tmp1, l2 - l1, '0');
delete[] tmp1;
tmp1 = t;
}
}
int length = strlen(tmp1) + 2;
char*result = new char[length];
result[length - 1] = 0;
int buffer = 0;
for (int i = length - 2; i > 0; i--) {
int t = buffer + (tmp1[i-1]-'0') + (tmp2[i-1]-'0');
result[i] = (t% 10)+'0';
buffer = (t - (t % 10))/10;
}
result[0] = buffer + '0';
char* t = get_string_trailing("0", result);
delete[]result;
result = t;
delete[]tmp1;
delete[]tmp2;
return result;
}
void v_shove(char *&c, int i)
{
char* tmp = shove(c, i);
delete[] c;
c = tmp;
}
char * populate(const char populator, int length)
{
char* result = new char[length + 1];
result[length] = 0;
for (int i = 0; i < length; i++) {
result[i] = populator;
}
return result;
}
char * multiply_chars(const char c1,const char c2)
{
char*result = new char[3];
result[2] = 0;
char tmp1 = c1 - '0', tmp2 = c2 - '0';
result[1] = (tmp1*tmp2 % 10) + '0';
result[0] = (tmp1*tmp2 - (tmp1*tmp2 % 10)) / 10 + '0';
char* r = get_string_trailing("0", result);
delete[] result;
result = r;
return result;
}
int get_length_trailing(const char * ignore,const char * table)
{
int length = 0;
int i = 0;
bool flag = true;
while (i < strlen(table)) {
if (flag) {
for (int j = 0; j < strlen(ignore); j++)
if (table[i] == ignore[j])goto BREAKPOINT;
}
flag = false;
length++;
BREAKPOINT:i++;
}
return length;
}
char * get_string_trailing(const char * ignore,const char * table)
{
int result_length = get_length_trailing(ignore, table);
char* result = new char[result_length + 1];
int counter = 0;
int i = 0;
bool flag = true;
while (i < strlen(table)) {
if (flag)
for (int j = 0; j < strlen(ignore); j++)
if (table[i] == ignore[j])goto BREAKPOINT;
flag = false;
result[counter] = table[i];
counter++;
BREAKPOINT:i++;
}
result[result_length] = 0;
if (result_length == 0) return copy("0");
return result;
}
char * shove(const char * table1, int index)
{
char*result = "0";
int length = strlen(table1) + index + 1;
result = new char[length];
result[length - 1] = 0;
if (index > 0) {
for (int i = 0; i < strlen(table1); i++)
result[i] = table1[i];
for (int i = 0; i < index; i++)
result[strlen(table1) + i] = '0';
}
else {
for (int i = 0; i < strlen(result); i++)
result[i] = table1[i];
}
char* t = get_string_trailing("0", result);
delete[] result;
result = t;
return result;
}
There is at least a memory leak in get_string_trailing: if result_length is zero, you return a copy and do not delete result. There are also confusions between "string" (such as "0") and 'char': with double quotes, the terminating string character (\0) is automatically appended to the string, while simple quotes only define a character. So "0" is made of 2 char in memory and can not be stored in a pointer (undefined behavior, overwriting memory).
To summarize: here you are writing C, not learning C++. If you have to deal with C strings (you are writing a low-level pilot in C or your professor still doesn't understand that C and C++ are different languages), at least use the functions of the string.h (in C) / cstring (in C++) header to minimize the chance of memory leak or undefined behavior. If you do not have to use C strings, use std::string and the string manipulation tools of the standard library. Your work will be much easier, and your code much less vulnerable to bugs:
#include <string>
#include <iostream>
using namespace std;
int main()
{
string tab1("11")
string t("2") // never use the single quotes for a string
cout << stoi(tab1) * stoi(t) << endl;
return;
}
That's it!
In a kernel module (2.6.32-358.el6.x86_64) I'd like to print out all the physical addresses, which are mapped into a process' virtual memory. Given task->mm, I attempt to traverse the process' struct page's as follows:
int i, j, k, l;
for (i = 0; i < PTRS_PER_PGD; ++i)
{
pgd_t *pgd = mm->pgd + i;
if (pgd_none(*pgd) || pgd_bad(*pgd))
continue;
for (j = 0; j < PTRS_PER_PUD; ++j)
{
pud_t *pud = (pud_t *)pgd_page_vaddr(*pgd) + j;
if (pud_none(*pud) || pud_bad(*pud))
continue;
for (k = 0; k < PTRS_PER_PMD; ++k)
{
pmd_t *pmd = (pmd_t *)pud_page_vaddr(*pud) + k;
if (pmd_none(*pmd) || pmd_bad(*pmd))
continue;
for (l = 0; l < PTRS_PER_PTE; ++l)
{
pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd) + l;
if (!pte || pte_none(*pte))
continue;
struct page *p = pte_page(*pte);
unsigned long phys = page_to_phys(p);
printk(KERN_NOTICE "addr %lx", phys);
}
}
}
}
The output looks a bit strange (in particular, there are serieses of identical addresses), so I'd like to ask whether the above is correct, in theory.
A better approach would be to traverse process' VMAs and translate each VMA to physical pages/addresses by means of the page directory:
struct vm_area_struct *vma = 0;
unsigned long vpage;
if (task->mm && task->mm->mmap)
for (vma = task->mm->mmap; vma; vma = vma->vm_next)
for (vpage = vma->vm_start; vpage < vma->vm_end; vpage += PAGE_SIZE)
unsigned long phys = virt2phys(task->mm, vpage);
//...
Where virt2phys would look like this:
//...
pgd_t *pgd = pgd_offset(mm, virt);
if (pgd_none(*pgd) || pgd_bad(*pgd))
return 0;
pud = pud_offset(pgd, virt);
if (pud_none(*pud) || pud_bad(*pud))
return 0;
pmd = pmd_offset(pud, virt);
if (pmd_none(*pmd) || pmd_bad(*pmd))
return 0;
if (!(pte = pte_offset_map(pmd, virt)))
return 0;
if (!(page = pte_page(*pte)))
return 0;
phys = page_to_phys(page);
pte_unmap(pte);
return phys;