Я написал программу параллелизма, основанную на алгоритме умножения Штрассена с использованием указателей.
эта программа возвращает результат умножения двух матриц одинакового размера.
когда размер равен 256, программа заполняет около 1 ГБ оперативной памяти, а когда он составляет 512 оперативных памяти, \ y заполняются, и мои окна не работают, то я должен перезапустить.
Я заменял целые указатели на векторы, после чего невероятно уменьшилось использование памяти. Для размера 1024, всего 80 МБ памяти.
Я немного знаю о векторе, который сначала статически связан, затем, если нам нужно больше места во время выполнения, он будет динамически ограничен.
Почему указателям нужно больше места, чем векторам?
это мой первый код:
#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>
#include <Windows.h>
#include <Psapi.h>
#include<vector>using namespace std;
int ** matrix_1;
int ** matrix_2;
#define number_thread:4;
void show(string name, int n, int **show)
{
cout << " matrix " << name << " :" << endl;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
cout << show[i][j] << " ";
cout << endl;
}
}int ** strassen(int n, int **matrix_a, int ** matrix_b)
{
int ** A11;
int ** A12;
int ** A21;
int ** A22;
int ** B11;
int ** B12;
int ** B21;
int ** B22;
int ** result;int **m1, **m2, **m3, ** m4, ** m5, ** m6, ** m7, ** m8;
A11 = new int*[n / 2];
A12 = new int*[n / 2];
A21 = new int*[n / 2];
A22 = new int*[n / 2];
B11 = new int*[n / 2];
B12 = new int*[n / 2];
B21 = new int*[n / 2];
B22 = new int*[n / 2];result = new int *[n];
m1 = new int*[n / 2];
m2 = new int*[n / 2];
m3 = new int*[n / 2];
m4 = new int*[n / 2];
m5 = new int*[n / 2];
m6 = new int*[n / 2];
m7 = new int*[n / 2];
m8 = new int*[n / 2];
cilk_for(int i = 0; i < n / 2; i++)
{
//cout << " value i : " << i << endl;
A11[i] = new int[n / 2];
A12[i] = new int[n / 2];
A21[i] = new int[n / 2];
A22[i] = new int[n / 2];
B11[i] = new int[n / 2];
B12[i] = new int[n / 2];
B21[i] = new int[n / 2];
B22[i] = new int[n / 2];
m1[i] = new int[n / 2];
m2[i] = new int[n / 2];
m3[i] = new int[n / 2];
m4[i] = new int[n / 2];
m5[i] = new int[n / 2];
m6[i] = new int[n / 2];
m7[i] = new int[n / 2];
m8[i] = new int[n / 2];
}
cilk_for(int i = 0; i < n; i++) // matrix result
result[i] = new int[n];if (n == 2)
{
result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];
return result;
}
// for (int i = 0; i < n;i++)
cilk_for(int i = 0; i < (n / 2); i++)
{
for (int j = 0; j < (n / 2); j++)
{
A11[i][j] = matrix_a[i][j];
B11[i][j] = matrix_b[i][j];
A12[i][j] = matrix_a[i][j + n / 2];
B12[i][j] = matrix_b[i][j + n / 2];
A21[i][j] = matrix_a[i + n / 2][j];
B21[i][j] = matrix_b[i + n / 2][j];
A22[i][j] = matrix_a[i + n / 2][j + n / 2];
B22[i][j] = matrix_b[i + n / 2][j + n / 2];}
}
/*
show("A11", n / 2, A11);
show("A12", n / 2, A12);
show("A21", n / 2, A21);
show("A22", n / 2, A22);
show("B11", n / 2, B11);
show("B12", n / 2, B12);
show("B21", n / 2, B21);
show("B22", n / 2, B22);*/
// Run By eight_thread
m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22cilk_sync;
/*
cout << "****************************\n";
cout << "*********** before add :\n";
show("m1", n / 2, m1);
show("m2", n / 2, m2);
show("m3", n / 2, m3);
show("m4", n / 2, m4);
show("m5", n / 2, m5);
show("m6", n / 2, m6);
show("m7", n / 2, m7);
show("m8", n / 2, m8);*/cilk_for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
{
m1[i][j] = m1[i][j] + m2[i][j];
m3[i][j] = m3[i][j] + m4[i][j];
m5[i][j] = m5[i][j] + m6[i][j];
m7[i][j] = m7[i][j] + m8[i][j];
}
/*cout << "after adding hello \n";
show("m1", n / 2, m1);
show("m3", n / 2, m3);
show("m5", n / 2, m5);
show("m7", n / 2, m7);*/cilk_for(int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
if (i < n / 2 && j < n / 2)
{
result[i][j] = m1[i][j];
}
else if (i < n / 2 && j >= n / 2)
{
result[i][j] = m3[i][j - n / 2];
}
else if (i >= n / 2 && j < n / 2)
{
result[i][j] = m5[i - n / 2][j];
}
else if (i >= n / 2 && j >= n / 2)
{
result[i][j] = m7[i - n / 2][j - n / 2];
}
}
}
/*
cilk_for(int i = 0; i < n / 2; i++)
{
for (int j = 0; j < n / 2; j++)
{
delete A11[i][j];
delete A12[i][j];
delete A21[i][j];
delete A22[i][j];
delete B11[i][j];
delete B12[i][j];
delete B21[i][j];
delete B22[i][j];delete m1[i][j];
delete m2[i][j];
delete m3[i][j];
delete m4[i][j];
delete m5[i][j];
delete m6[i][j];
delete m7[i][j];
delete m8[i][j];*/
/* }
delete[] A11[i];
delete[] A12[i];
delete[] A21[i];
delete[] A22[i];
delete[] B11[i];
delete[] B12[i];
delete[] B21[i];
delete[] B22[i];delete[] m1[i];
delete[] m2[i];
delete[] m3[i];
delete[] m4[i];
delete[] m5[i];
delete[] m6[i];
delete[] m7[i];
delete[] m8[i];
}*/delete[] A11;
delete[] A12;
delete[] A21;
delete[] A22;
delete[] B11;
delete[] B12;
delete[] B21;
delete[] B22;delete[] m1;
delete[] m2;
delete[] m3;
delete[] m4;
delete[] m5;
delete[] m6;
delete[] m7;
delete[] m8;
return result;
}int main()
{
int size;
freopen("in.txt", "r", stdin);
freopen("out.txt", "w", stdout);__cilkrts_set_param("nworkers", "4");
//cout << " please Enter the size OF ur matrix /n";
cin >> size;
matrix_1 = new int*[size];
matrix_2 = new int*[size];
if (size % 2 == 0)
{
//instialize matrix1
//cout << "matrix_1 :" << endl;
for (int i = 0; i < size; i++)
{
matrix_1[i] = new int[size];
for (int j = 0; j < size; j++)
{
matrix_1[i][j] = rand() % 3;
//cin >> matrix_1[i][j];
//cout << matrix_1[i][j] << " ";
}
//cout << endl;
}
//instialize matrix2
//cout << "matrix2_is :\n";
for (int i = 0; i < size; i++)
{
matrix_2[i] = new int[size];
for (int j = 0; j < size; j++)
{
matrix_2[i][j] = rand() % 3;
//cout << matrix_2[i][j]<<" ";
//cin >> matrix_2[i][j];
}
// cout << endl;
}
clock_t begin = clock();matrix_2 = strassen(size, matrix_1, matrix_2);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "*******\ntime is : " << elapsed_secs << endl;
//answer:
/* for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
cout<< matrix_2[i][j]<<" ";
}
cout << endl;
}*/}
else
cout << " we couldnt use strasen ";
cout << "\nTotal Virtual Memory:" << endl;
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
printf("%u", totalVirtualMem);
cout << "\nVirtual Memory currently used:" << endl;
// MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
printf("%u", virtualMemUsed);cout << "\nVirtual Memory currently used by current process:" << endl;
PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
printf("%u", virtualMemUsedByMe);
cout << "\nPhysical Memory currently used: " << endl;
//MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;
printf("%u", physMemUsed);
cout << endl;
cout << "\nPhysical Memory currently used by current process : " << endl;
// PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
printf("%u", physMemUsedByMe);
//cout << "memory usage :"<<double(totalVirtualMem) << endl;//_getch();
return 0;
}
Я заменяю весь массив указателей векторами:
#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>
#include <Windows.h>
#include <Psapi.h>
#include<vector>using namespace std;
vector<vector<int> > matrix_1, matrix_2;
//int matrix_1;
//int ** matrix_2;
#define number_thread:4;
void show(string name ,int n, int **show)
{
cout << " matrix " << name<<" :" << endl;
for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
cout << show[i][j] << " ";
cout << endl;
}
}vector<vector<int>> strassen(int n, vector<vector<int>> matrix_a, vector<vector<int>> matrix_b)
{
vector<vector<int>> A11;
vector<vector<int>> A12;
vector<vector<int>> A21;
vector<vector<int>> A22;
vector<vector<int>> B11;
vector<vector<int>> B12;
vector<vector<int>> B21;
vector<vector<int>> B22;
vector<vector<int>> result;vector<int> help;vector<vector<int>> m1, m2, m3, m4, m5, m6, m7, m8;help.clear();
for (int j = 0; j < n / 2; j++)
{
help.push_back(2);
}for(int i = 0; i < n / 2; i++)
{
A11.push_back(help);
A12.push_back(help);
A21.push_back(help);
A22.push_back(help);
B11.push_back(help);
B12.push_back(help);
B21.push_back(help);
B22.push_back(help);m1.push_back(help);
m2.push_back(help);
m3.push_back(help);
m4.push_back(help);
m5.push_back(help);
m6.push_back(help);
m7.push_back(help);
m8.push_back(help);
}for (int j = 0; j < n / 2; j++)
help.push_back(2);
for(int i = 0; i < n; i++)
{
result.push_back(help);
}
if (n == 2)
{
result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];
return result;
}
// for (int i = 0; i < n;i++)
for(int i = 0; i < (n / 2); i++)
{
for(int j = 0; j <( n / 2); j++)
{
A11[i][j] = matrix_a[i][j];
B11[i][j] = matrix_b[i][j];
A12[i][j] = matrix_a[i][j + n / 2];
B12[i][j] = matrix_b[i][j + n / 2];
A21[i][j] = matrix_a[i + n / 2][j];
B21[i][j] = matrix_b[i + n / 2][j];
A22[i][j] = matrix_a[i + n / 2][j + n / 2];
B22[i][j] = matrix_b[i + n / 2][j + n / 2];}
}
/*
show("A11", n / 2, A11);
show("A12", n / 2, A12);
show("A21", n / 2, A21);
show("A22", n / 2, A22);
show("B11", n / 2, B11);
show("B12", n / 2, B12);
show("B21", n / 2, B21);
show("B22", n / 2, B22);*/
// Run By eight_thread
m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22cilk_sync;
/*
cout << "****************************\n";
cout << "*********** before add :\n";
show("m1", n / 2, m1);
show("m2", n / 2, m2);
show
("m3", n / 2, m3);
show("m4", n / 2, m4);
show("m5", n / 2, m5);
show("m6", n / 2, m6);
show("m7", n / 2, m7);
show("m8", n / 2, m8);*/for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
{
m1[i][j] = m1[i][j] + m2[i][j];
m3[i][j] = m3[i][j] + m4[i][j];
m5[i][j] = m5[i][j] + m6[i][j];
m7[i][j] = m7[i][j] + m8[i][j];
}
/*cout << "after adding hello \n";
show("m1", n / 2, m1);
show("m3", n / 2, m3);
show("m5", n / 2, m5);
show("m7", n / 2, m7);*/for(int i = 0; i < n ; i++)
{
for(int j = 0; j < n ; j++)
{
if (i < n / 2 && j < n / 2)
{
result[i][j] = m1[i][j];
}
else if (i < n / 2 && j >= n / 2)
{
result[i][j] = m3[i][j - n / 2];
}
else if (i >= n / 2 && j < n / 2)
{
result[i][j] = m5[i - n / 2][j];
}
else if (i >= n / 2 && j >= n / 2)
{
result[i][j] = m7[i - n / 2][j - n / 2];
}
}
}/*
cilk_for(int i = 0; i < n / 2; i++)
{
for (int j = 0; j < n / 2; j++)
{
delete A11[i][j];
delete A12[i][j];
delete A21[i][j];
delete A22[i][j];
delete B11[i][j];
delete B12[i][j];
delete B21[i][j];
delete B22[i][j];delete m1[i][j];
delete m2[i][j];
delete m3[i][j];
delete m4[i][j];
delete m5[i][j];
delete m6[i][j];
delete m7[i][j];
delete m8[i][j];*/
/* }
delete[] A11[i];
delete[] A12[i];
delete[] A21[i];
delete[] A22[i];
delete[] B11[i];
delete[] B12[i];
delete[] B21[i];
delete[] B22[i];delete[] m1[i];
delete[] m2[i];
delete[] m3[i];
delete[] m4[i];
delete[] m5[i];
delete[] m6[i];
delete[] m7[i];
delete[] m8[i];
}*//* for (int i = 0; i < n; i++)
{
for (int j = 0; j < n; j++)
{
cout << result[i][j] << " ";
}
cout << endl;
}*/
return result;
}int main()
{
int size;
freopen("in.txt","r",stdin);
freopen("out.txt", "w", stdout);__cilkrts_set_param("nworkers", "1");
//cout << " please Enter the size OF ur matrix /n";
cin >> size;
vector<int> inner;
if (size % 2 == 0)
{
//instialize matrix1
cout << "matrix_1 :" << endl;
for (int i = 0; i < size; i++)
{
inner.clear();
for (int j = 0; j < size; j++)
{
inner.push_back(rand()%3);
//cin >> matrix_1[i][j];
cout << inner[j]<<" ";
}
cout << endl;
matrix_1.push_back(inner);
}
//instialize matrix2
cout << "matrix2_is :\n";
inner.clear();
for (int i = 0; i < size; i++)
{
inner.clear();
//matrix_2[i] = new int[size];
for (int j = 0; j < size; j++)
{
inner.push_back(rand()%3);
cout << inner[j]<<" ";
//cin >> matrix_2[i][j];
}
cout << endl;
matrix_2.push_back(inner);
}
clock_t begin = clock();matrix_2 = strassen(size, matrix_1, matrix_2);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "*******\ntime is : " << elapsed_secs << endl;
//answer:
cout << "answerrr :" << endl;
for (int i = 0; i < size; i++)
{
for (int j = 0; j < size; j++)
{
cout<< matrix_2[i][j]<<" ";
}
cout << endl;
}}
else
cout << " we couldnt use strasen ";
cout << "\nTotal Virtual Memory:" << endl;
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
printf("%u", totalVirtualMem);
cout << "\nVirtual Memory currently used:" << endl;
// MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
printf("%u", virtualMemUsed);cout << "\nVirtual Memory currently used by current process:" << endl;
PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
printf("%u", virtualMemUsedByMe);
cout << "\nPhysical Memory currently used: " << endl;
//MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;
printf("%u", physMemUsed);
cout << endl;
cout << "\nPhysical Memory currently used by current process : " << endl;
// PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
printf("%u", physMemUsedByMe);
//cout << "memory usage :"<<double(totalVirtualMem) << endl;//_getch();
return 0;
}
Я собираюсь догадаться, что это распределение вопрос. Выделение из OS
кажется, довольно много времени от того, что я видел.
Просто предположение, но, возможно, std::vector
по умолчанию распределитель захватывает гораздо больший непрерывный блок памяти из OS
и опирается на то, чтобы удовлетворить меньшие вектор распределение?
Этот ответ может дать некоторое представление:
https://stackoverflow.com/a/29659791/3807729
Мне удалось сократить время, необходимое для запуска тестовой программы, просто выделив, а затем освободив большой std::vector
перед запуском операции синхронизации.
Я предполагаю, что C++
система времени выполнения (в некоторых реализациях) может удерживать память, полученную от OS
даже после того, как он был освобожден, потому что захватывая небольшие куски из OS
каждый раз намного дороже.
На ум приходят две вероятные причины: