level 1
FranciscoLYK
楼主
#define LEFT_ROW 1000
#define LEFT_COL 2400
#define RIGHT_ROW 2400
#define RIGHT_COL 1000
#define RES_ROW 1000
#define RES_COL 1000
#define LENGTH 32
__global__ void multiMatrix(int** left, int** right, int** result) {
int x_pos = blockIdx.x * blockDim.x + threadIdx.x;
int y_pos = blockIdx.y * blockDim.y + threadIdx.y;
if (x_pos < LEFT_COL && y_pos < RIGHT_ROW) {
for (int k = 0; k < LEFT_ROW; k++) {
result[x_pos][y_pos] += left[x_pos][k] * right[k][y_pos];
}
}
}
int main(void)
{
int** arr_left;
int** arr_right;
int** cpu_result;
int** gpu_result;
cudaMallocManaged((void**)&arr_left, sizeof(int) * LEFT_ROW);
cudaMallocManaged((void**)&arr_right, sizeof(int) * RIGHT_ROW);
cudaMallocManaged((void**)&gpu_result, sizeof(int) * RES_ROW);
for (int i = 0; i < LEFT_ROW; i++) {
cudaMallocManaged((void**)&arr_left[i], sizeof(int) * LEFT_COL);
}
for (int i = 0; i < RIGHT_ROW; i++) {
cudaMallocManaged((void**)&arr_right[i], sizeof(int) * RIGHT_COL);
}
for (int i = 0; i < RES_ROW; i++) {
cudaMallocManaged((void**)&gpu_result[i], sizeof(int) * RES_COL);
}
for (int i = 0; i < LEFT_ROW; i++) {
for (int j = 0; j < LEFT_COL; j++) {
arr_left[i][j] = 1;
}
}
for (int i = 0; i < RIGHT_ROW; i++) {
for (int j = 0; j < RIGHT_COL; j++) {
arr_right[i][j] = 1;
}
}
for (int i = 0; i < RES_ROW; i++) {
for (int j = 0; j < RES_COL; j++) {
gpu_result[i][j] = 0;
}
}
dim3 blockSz(LENGTH, LENGTH);
dim3 gridSz((LEFT_COL + LENGTH - 1) / LENGTH, (RIGHT_ROW + LENGTH - 1) / LENGTH);
multiMatrix << <gridSz, blockSz >> > (arr_left, arr_right, gpu_result);
cudaDeviceSynchronize();
//这里打印运行报错抛异常了
printf("%d\n", gpu_result);
for (int i = 0; i < LEFT_ROW; i++) {
cudaFree(arr_left+i);
}
for (int i = 0; i < RIGHT_ROW; i++) {
cudaFree(arr_right+i);
}
for (int i = 0; i < RES_ROW; i++) {
cudaFree(gpu_result+i);
}
cudaFree(arr_left);
cudaFree(arr_right);
cudaFree(gpu_result);
return 0;
}
想问下为啥printf那里会抛异常阿,我把printf核函数运行前也是没问题的,求教
2024年05月05日 14点05分
1
#define LEFT_COL 2400
#define RIGHT_ROW 2400
#define RIGHT_COL 1000
#define RES_ROW 1000
#define RES_COL 1000
#define LENGTH 32
__global__ void multiMatrix(int** left, int** right, int** result) {
int x_pos = blockIdx.x * blockDim.x + threadIdx.x;
int y_pos = blockIdx.y * blockDim.y + threadIdx.y;
if (x_pos < LEFT_COL && y_pos < RIGHT_ROW) {
for (int k = 0; k < LEFT_ROW; k++) {
result[x_pos][y_pos] += left[x_pos][k] * right[k][y_pos];
}
}
}
int main(void)
{
int** arr_left;
int** arr_right;
int** cpu_result;
int** gpu_result;
cudaMallocManaged((void**)&arr_left, sizeof(int) * LEFT_ROW);
cudaMallocManaged((void**)&arr_right, sizeof(int) * RIGHT_ROW);
cudaMallocManaged((void**)&gpu_result, sizeof(int) * RES_ROW);
for (int i = 0; i < LEFT_ROW; i++) {
cudaMallocManaged((void**)&arr_left[i], sizeof(int) * LEFT_COL);
}
for (int i = 0; i < RIGHT_ROW; i++) {
cudaMallocManaged((void**)&arr_right[i], sizeof(int) * RIGHT_COL);
}
for (int i = 0; i < RES_ROW; i++) {
cudaMallocManaged((void**)&gpu_result[i], sizeof(int) * RES_COL);
}
for (int i = 0; i < LEFT_ROW; i++) {
for (int j = 0; j < LEFT_COL; j++) {
arr_left[i][j] = 1;
}
}
for (int i = 0; i < RIGHT_ROW; i++) {
for (int j = 0; j < RIGHT_COL; j++) {
arr_right[i][j] = 1;
}
}
for (int i = 0; i < RES_ROW; i++) {
for (int j = 0; j < RES_COL; j++) {
gpu_result[i][j] = 0;
}
}
dim3 blockSz(LENGTH, LENGTH);
dim3 gridSz((LEFT_COL + LENGTH - 1) / LENGTH, (RIGHT_ROW + LENGTH - 1) / LENGTH);
multiMatrix << <gridSz, blockSz >> > (arr_left, arr_right, gpu_result);
cudaDeviceSynchronize();
//这里打印运行报错抛异常了
printf("%d\n", gpu_result);
for (int i = 0; i < LEFT_ROW; i++) {
cudaFree(arr_left+i);
}
for (int i = 0; i < RIGHT_ROW; i++) {
cudaFree(arr_right+i);
}
for (int i = 0; i < RES_ROW; i++) {
cudaFree(gpu_result+i);
}
cudaFree(arr_left);
cudaFree(arr_right);
cudaFree(gpu_result);
return 0;
}
想问下为啥printf那里会抛异常阿,我把printf核函数运行前也是没问题的,求教