c++ - 如何优化和加速矩阵的乘法? [英] how to optimize and speed up the multiplication of matrix in c++?

查看：136 发布时间：2021/6/2 18:31:11 c++ matrix-multiplication

本文介绍了c++ - 如何优化和加速矩阵的乘法?的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

这是矩阵乘法的优化实现，该例程执行矩阵乘法运算.C := C + A * B(其中 A、B 和 C 是以列优先格式存储的 n×n 矩阵)在退出时，A 和 B 保持它们的输入值.

this is optimized implementation of matrix multiplication and this routine performs a matrix multiplication operation. C := C + A * B (where A, B, and C are n-by-n matrices stored in column-major format) On exit, A and B maintain their input values.

void matmul_optimized(int n, int *A, int *B, int *C)
{
    // to the effective bitwise calculation
    // save the matrix as the different type
    int i, j, k;
    int cij;
    for (i = 0; i < n; ++i) {
        for (j = 0; j < n; ++j) {
            cij = C[i + j * n]; // the initialization into C also, add separate additions to the product and sum operations and then record as a separate variable so there is no multiplication
            for (k = 0; k < n; ++k) {
                cij ^= A[i + k * n] & B[k + j * n]; // the multiplication of each terms is expressed by using & operator the addition is done by ^ operator.
            }
            C[i + j * n] = cij; // allocate the final result into C         }
    }
}

基于上述函数/方法，我如何更快地加速矩阵的乘法?

how do I more speed up the multiplication of matrix based on above function/method?

此函数在 2048 x 2048 矩阵中进行了测试.

this function is tested up to 2048 by 2048 matrix.

函数 matmul_optimized 是用 matmul 完成的.

the function matmul_optimized is done with matmul.

#include <stdio.h>
#include <stdlib.h>

#include "cpucycles.c"
#include "helper_functions.c"
#include "matmul_reference.c"
#include "matmul_optimized.c"

int main()
{
    int i, j;
    int n = 1024; // Number of rows or columns in the square matrices
    int *A, *B;   // Input matrices
    int *C1, *C2; // Output matrices from the reference and optimized implementations

    // Performance and correctness measurement declarations
    long int CLOCK_start, CLOCK_end, CLOCK_total, CLOCK_ref, CLOCK_opt;
    long int COUNTER, REPEAT = 5;
    int difference;
    float speedup;

    // Allocate memory for the matrices
    A = malloc(n * n * sizeof(int));
    B = malloc(n * n * sizeof(int));
    C1 = malloc(n * n * sizeof(int));
    C2 = malloc(n * n * sizeof(int));

    // Fill bits in A, B, C1
    fill(A, n * n);
    fill(B, n * n);
    fill(C1, n * n);

    // Initialize C2 = C1
    for (i = 0; i < n; i++)
        for (j = 0; j < n; j++)
            C2[i * n + j] = C1[i * n + j];

    // Measure performance of the reference implementation
    CLOCK_total = 0;
    for (COUNTER = 0; COUNTER < REPEAT; COUNTER++)
    {
        CLOCK_start = cpucycles();
        matmul_reference(n, A, B, C1);
        CLOCK_end = cpucycles();
        CLOCK_total = CLOCK_total + CLOCK_end - CLOCK_start;
    }
    CLOCK_ref = CLOCK_total / REPEAT;
    printf("n=%d Avg cycle count for reference implementation = %ld\n", n, CLOCK_ref);

    // Measure performance of the optimized implementation
    CLOCK_total = 0;
    for (COUNTER = 0; COUNTER < REPEAT; COUNTER++)
    {
        CLOCK_start = cpucycles();
        matmul_optimized(n, A, B, C2);
        CLOCK_end = cpucycles();
        CLOCK_total = CLOCK_total + CLOCK_end - CLOCK_start;
    }
    CLOCK_opt = CLOCK_total / REPEAT;
    printf("n=%d Avg cycle count for optimized implementation = %ld\n", n, CLOCK_opt);

    speedup = (float)CLOCK_ref / (float)CLOCK_opt;

    // Check correctness by comparing C1 and C2
    difference = 0;
    for (i = 0; i < n; i++)
        for (j = 0; j < n; j++)
            difference = difference + C1[i * n + j] - C2[i * n + j];

    if (difference == 0)
        printf("Speedup factor = %.2f\n", speedup);
    if (difference != 0)
        printf("Reference and optimized implementations do not match\n");

    //print(C2, n);

    free(A);
    free(B);
    free(C1);
    free(C2);
    return 0;
}

c++ - 如何优化和加速矩阵的乘法? [英] how to optimize and speed up the multiplication of matrix in c++?

问题描述

推荐答案

相关文章

C/C++开发最新文章

热门教程

热门工具

登录关闭

c++ - 如何优化和加速矩阵的乘法? [英] how to optimize and speed up the multiplication of matrix in c++?

问题描述

推荐答案

相关文章

C/C++开发最新文章

热门教程

热门工具

登录 关闭

登录关闭