arm mat mult f32 8c source


CMSIS DSP Software Library: arm_mat_mult_f32.c Source File Main Page Modules Data Structures Files Examples File List Globals arm_mat_mult_f32.c Go to the documentation of this file.00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_mat_mult_f32.c 00009 * 00010 * Description: Floating-point matrix multiplication. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 0.0.5 2010/04/26 00027 * incorporated review comments and updated with latest CMSIS layer 00028 * 00029 * Version 0.0.3 2010/03/10 00030 * Initial version 00031 * -------------------------------------------------------------------- */ 00032 00033 #include "arm_math.h" 00034 00070 arm_status arm_mat_mult_f32( 00071 const arm_matrix_instance_f32 * pSrcA, 00072 const arm_matrix_instance_f32 * pSrcB, 00073 arm_matrix_instance_f32 * pDst) 00074 { 00075 float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ 00076 float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ 00077 float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ 00078 // float32_t *pSrcB = pSrcB->pData; /* input data matrix pointer B */ 00079 float32_t *pOut = pDst->pData; /* output data matrix pointer */ 00080 float32_t *px; /* Temporary output data matrix pointer */ 00081 float32_t sum; /* Accumulator */ 00082 uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ 00083 uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ 00084 uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ 00085 uint16_t col, i = 0u, j, row = numRowsA, colCnt; /* loop counters */ 00086 arm_status status; /* status of matrix multiplication */ 00087 00088 #ifdef ARM_MATH_MATRIX_CHECK 00089 /* Check for matrix mismatch condition */ 00090 if((pSrcA->numCols != pSrcB->numRows) || 00091 (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) 00092 { 00093 00094 /* Set status as ARM_MATH_SIZE_MISMATCH */ 00095 status = ARM_MATH_SIZE_MISMATCH; 00096 } 00097 else 00098 #endif 00099 { 00100 /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ 00101 /* row loop */ 00102 do 00103 { 00104 /* Output pointer is set to starting address of the row being processed */ 00105 px = pOut + i; 00106 00107 /* For every row wise process, the column loop counter is to be initiated */ 00108 col = numColsB; 00109 00110 /* For every row wise process, the pIn2 pointer is set 00111 ** to the starting address of the pSrcB data */ 00112 pIn2 = pSrcB->pData; 00113 00114 j = 0u; 00115 00116 /* column loop */ 00117 do 00118 { 00119 /* Set the variable sum, that acts as accumulator, to zero */ 00120 sum = 0.0f; 00121 00122 /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ 00123 pIn1 = pInA; 00124 00125 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00126 colCnt = numColsA >> 2; 00127 00128 /* matrix multiplication */ 00129 while(colCnt > 0u) 00130 { 00131 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00132 sum += *pIn1++ * (*pIn2); 00133 pIn2 += numColsB; 00134 sum += *pIn1++ * (*pIn2); 00135 pIn2 += numColsB; 00136 sum += *pIn1++ * (*pIn2); 00137 pIn2 += numColsB; 00138 sum += *pIn1++ * (*pIn2); 00139 pIn2 += numColsB; 00140 00141 /* Decrement the loop count */ 00142 colCnt--; 00143 } 00144 00145 /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here. 00146 ** No loop unrolling is used. */ 00147 colCnt = numColsA % 0x4u; 00148 00149 while(colCnt > 0u) 00150 { 00151 /* c(m,n) = a(1,1)*b(1,1) + a(1,2) * b(2,1) + .... + a(m,p)*b(p,n) */ 00152 sum += *pIn1++ * (*pIn2); 00153 pIn2 += numColsB; 00154 00155 /* Decrement the loop counter */ 00156 colCnt--; 00157 } 00158 00159 /* Store the result in the destination buffer */ 00160 *px++ = sum; 00161 00162 /* Update the pointer pIn2 to point to the starting address of the next column */ 00163 j++; 00164 pIn2 = pSrcB->pData + j; 00165 00166 /* Decrement the column loop counter */ 00167 col--; 00168 00169 } while(col > 0u); 00170 00171 /* Update the pointer pInA to point to the starting address of the next row */ 00172 i = i + numColsB; 00173 pInA = pInA + numColsA; 00174 00175 /* Decrement the row loop counter */ 00176 row--; 00177 00178 } while(row > 0u); 00179 00180 /* Set status as ARM_MATH_SUCCESS */ 00181 status = ARM_MATH_SUCCESS; 00182 } 00183 00184 /* Return to application */ 00185 return (status); 00186 } 00187  All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines Generated on Mon Nov 29 2010 17:19:57 for CMSIS DSP Software Library by  1.7.2

Wyszukiwarka

Podobne podstrony:
arm mat mult q15? source
arm mat mult ?2?
arm mat trans ?2? source
arm mat mult q31? source
arm mat ?d ?2? source
arm mat sub ?2? source
arm mat inverse ?2? source
arm mat scale ?2? source
arm mat init ?2? source
arm mult ?2? source
arm mat mult ?st q15? source
arm mat mult ?st q31? source
arm mat mult ?st q15?
arm mat ?d q31? source
arm mat mult q15?
arm cmplx mag ?2? source
arm fir interpolate ?2? source
arm mat sub q15? source
arm mat scale q15? source

więcej podobnych podstron