arm correlate q7 8c source


CMSIS DSP Software Library: arm_correlate_q7.c Source File Main Page Modules Data Structures Files Examples File List Globals arm_correlate_q7.c Go to the documentation of this file.00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 29. November 2010 00005 * $Revision: V1.0.3 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_correlate_q7.c 00009 * 00010 * Description: Process function for Q7 Correlation. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.3 2010/11/29 00015 * Re-organized the CMSIS folders and updated documentation. 00016 * 00017 * Version 1.0.2 2010/11/11 00018 * Documentation updated. 00019 * 00020 * Version 1.0.1 2010/10/05 00021 * Production release and review comments incorporated. 00022 * 00023 * Version 1.0.0 2010/09/20 00024 * Production release and review comments incorporated 00025 * 00026 * Version 0.0.7 2010/06/10 00027 * Misra-C changes done 00028 * 00029 * -------------------------------------------------------------------- */ 00030 00031 #include "arm_math.h" 00032 00062 void arm_correlate_q7( 00063 q7_t * pSrcA, 00064 uint32_t srcALen, 00065 q7_t * pSrcB, 00066 uint32_t srcBLen, 00067 q7_t * pDst) 00068 { 00069 q7_t *pIn1; /* inputA pointer */ 00070 q7_t *pIn2; /* inputB pointer */ 00071 q7_t *pOut = pDst; /* output pointer */ 00072 q7_t *px; /* Intermediate inputA pointer */ 00073 q7_t *py; /* Intermediate inputB pointer */ 00074 q7_t *pSrc1; /* Intermediate pointers */ 00075 q31_t sum, acc0, acc1, acc2, acc3; /* Accumulators */ 00076 q31_t input1, input2; /* temporary variables */ 00077 q15_t in1, in2; /* temporary variables */ 00078 q7_t x0, x1, x2, x3, c0, c1; /* temporary variables for holding input and coefficient values */ 00079 uint32_t j, k = 0u, count, blkCnt, outBlockSize, blockSize1, blockSize2, blockSize3; /* loop counter */ 00080 int32_t inc = 1; 00081 00082 00083 /* The algorithm implementation is based on the lengths of the inputs. */ 00084 /* srcB is always made to slide across srcA. */ 00085 /* So srcBLen is always considered as shorter or equal to srcALen */ 00086 /* But CORR(x, y) is reverse of CORR(y, x) */ 00087 /* So, when srcBLen > srcALen, output pointer is made to point to the end of the output buffer */ 00088 /* and the destination pointer modifier, inc is set to -1 */ 00089 /* If srcALen > srcBLen, zero pad has to be done to srcB to make the two inputs of same length */ 00090 /* But to improve the performance, 00091 * we include zeroes in the output instead of zero padding either of the the inputs*/ 00092 /* If srcALen > srcBLen, 00093 * (srcALen - srcBLen) zeroes has to included in the starting of the output buffer */ 00094 /* If srcALen < srcBLen, 00095 * (srcALen - srcBLen) zeroes has to included in the ending of the output buffer */ 00096 if(srcALen >= srcBLen) 00097 { 00098 /* Initialization of inputA pointer */ 00099 pIn1 = (pSrcA); 00100 00101 /* Initialization of inputB pointer */ 00102 pIn2 = (pSrcB); 00103 00104 /* Number of output samples is calculated */ 00105 outBlockSize = (2u * srcALen) - 1u; 00106 00107 /* When srcALen > srcBLen, zero padding is done to srcB 00108 * to make their lengths equal. 00109 * Instead, (outBlockSize - (srcALen + srcBLen - 1)) 00110 * number of output samples are made zero */ 00111 j = outBlockSize - (srcALen + (srcBLen - 1u)); 00112 00113 while(j > 0u) 00114 { 00115 /* Zero is stored in the destination buffer */ 00116 *pOut++ = 0; 00117 00118 /* Decrement the loop counter */ 00119 j--; 00120 } 00121 00122 } 00123 else 00124 { 00125 /* Initialization of inputA pointer */ 00126 pIn1 = (pSrcB); 00127 00128 /* Initialization of inputB pointer */ 00129 pIn2 = (pSrcA); 00130 00131 /* srcBLen is always considered as shorter or equal to srcALen */ 00132 j = srcBLen; 00133 srcBLen = srcALen; 00134 srcALen = j; 00135 00136 /* CORR(x, y) = Reverse order(CORR(y, x)) */ 00137 /* Hence set the destination pointer to point to the last output sample */ 00138 pOut = pDst + ((srcALen + srcBLen) - 2u); 00139 00140 /* Destination address modifier is set to -1 */ 00141 inc = -1; 00142 00143 } 00144 00145 /* The function is internally 00146 * divided into three parts according to the number of multiplications that has to be 00147 * taken place between inputA samples and inputB samples. In the first part of the 00148 * algorithm, the multiplications increase by one for every iteration. 00149 * In the second part of the algorithm, srcBLen number of multiplications are done. 00150 * In the third part of the algorithm, the multiplications decrease by one 00151 * for every iteration.*/ 00152 /* The algorithm is implemented in three stages. 00153 * The loop counters of each stage is initiated here. */ 00154 blockSize1 = srcBLen - 1u; 00155 blockSize2 = srcALen - (srcBLen - 1u); 00156 blockSize3 = blockSize1; 00157 00158 /* -------------------------- 00159 * Initializations of stage1 00160 * -------------------------*/ 00161 00162 /* sum = x[0] * y[srcBlen - 1] 00163 * sum = x[0] * y[srcBlen - 2] + x[1] * y[srcBlen - 1] 00164 * .... 00165 * sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen - 1] * y[srcBLen - 1] 00166 */ 00167 00168 /* In this stage the MAC operations are increased by 1 for every iteration. 00169 The count variable holds the number of MAC operations performed */ 00170 count = 1u; 00171 00172 /* Working pointer of inputA */ 00173 px = pIn1; 00174 00175 /* Working pointer of inputB */ 00176 pSrc1 = pIn2 + (srcBLen - 1u); 00177 py = pSrc1; 00178 00179 /* ------------------------ 00180 * Stage1 process 00181 * ----------------------*/ 00182 00183 /* The first stage starts here */ 00184 while(blockSize1 > 0u) 00185 { 00186 /* Accumulator is made zero for every iteration */ 00187 sum = 0; 00188 00189 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00190 k = count >> 2; 00191 00192 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00193 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00194 while(k > 0u) 00195 { 00196 /* x[0] , x[1] */ 00197 in1 = (q15_t) * px++; 00198 in2 = (q15_t) * px++; 00199 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00200 00201 /* y[srcBLen - 4] , y[srcBLen - 3] */ 00202 in1 = (q15_t) * py++; 00203 in2 = (q15_t) * py++; 00204 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00205 00206 /* x[0] * y[srcBLen - 4] */ 00207 /* x[1] * y[srcBLen - 3] */ 00208 sum = __SMLAD(input1, input2, sum); 00209 00210 /* x[2] , x[3] */ 00211 in1 = (q15_t) * px++; 00212 in2 = (q15_t) * px++; 00213 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00214 00215 /* y[srcBLen - 2] , y[srcBLen - 1] */ 00216 in1 = (q15_t) * py++; 00217 in2 = (q15_t) * py++; 00218 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00219 00220 /* x[2] * y[srcBLen - 2] */ 00221 /* x[3] * y[srcBLen - 1] */ 00222 sum = __SMLAD(input1, input2, sum); 00223 00224 00225 /* Decrement the loop counter */ 00226 k--; 00227 } 00228 00229 /* If the count is not a multiple of 4, compute any remaining MACs here. 00230 ** No loop unrolling is used. */ 00231 k = count % 0x4u; 00232 00233 while(k > 0u) 00234 { 00235 /* Perform the multiply-accumulates */ 00236 /* x[0] * y[srcBLen - 1] */ 00237 sum += (q31_t) ((q15_t) * px++ * *py++); 00238 00239 /* Decrement the loop counter */ 00240 k--; 00241 } 00242 00243 /* Store the result in the accumulator in the destination buffer. */ 00244 *pOut = (q7_t) (__SSAT(sum >> 7, 8)); 00245 /* Destination pointer is updated according to the address modifier, inc */ 00246 pOut += inc; 00247 00248 /* Update the inputA and inputB pointers for next MAC calculation */ 00249 py = pSrc1 - count; 00250 px = pIn1; 00251 00252 /* Increment the MAC count */ 00253 count++; 00254 00255 /* Decrement the loop counter */ 00256 blockSize1--; 00257 } 00258 00259 /* -------------------------- 00260 * Initializations of stage2 00261 * ------------------------*/ 00262 00263 /* sum = x[0] * y[0] + x[1] * y[1] +...+ x[srcBLen-1] * y[srcBLen-1] 00264 * sum = x[1] * y[0] + x[2] * y[1] +...+ x[srcBLen] * y[srcBLen-1] 00265 * .... 00266 * sum = x[srcALen-srcBLen-2] * y[0] + x[srcALen-srcBLen-1] * y[1] +...+ x[srcALen-1] * y[srcBLen-1] 00267 */ 00268 00269 /* Working pointer of inputA */ 00270 px = pIn1; 00271 00272 /* Working pointer of inputB */ 00273 py = pIn2; 00274 00275 /* count is index by which the pointer pIn1 to be incremented */ 00276 count = 1u; 00277 00278 /* ------------------- 00279 * Stage2 process 00280 * ------------------*/ 00281 00282 /* Stage2 depends on srcBLen as in this stage srcBLen number of MACS are performed. 00283 * So, to loop unroll over blockSize2, 00284 * srcBLen should be greater than or equal to 4 */ 00285 if(srcBLen >= 4u) 00286 { 00287 /* Loop unroll over blockSize2, by 4 */ 00288 blkCnt = blockSize2 >> 2u; 00289 00290 while(blkCnt > 0u) 00291 { 00292 /* Set all accumulators to zero */ 00293 acc0 = 0; 00294 acc1 = 0; 00295 acc2 = 0; 00296 acc3 = 0; 00297 00298 /* read x[0], x[1], x[2] samples */ 00299 x0 = *px++; 00300 x1 = *px++; 00301 x2 = *px++; 00302 00303 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00304 k = srcBLen >> 2u; 00305 00306 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00307 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00308 do 00309 { 00310 /* Read y[0] sample */ 00311 c0 = *py++; 00312 /* Read y[1] sample */ 00313 c1 = *py++; 00314 00315 /* Read x[3] sample */ 00316 x3 = *px++; 00317 00318 /* x[0] and x[1] are packed */ 00319 in1 = (q15_t) x0; 00320 in2 = (q15_t) x1; 00321 00322 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00323 00324 /* y[0] and y[1] are packed */ 00325 in1 = (q15_t) c0; 00326 in2 = (q15_t) c1; 00327 00328 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00329 00330 /* acc0 += x[0] * y[0] + x[1] * y[1] */ 00331 acc0 = __SMLAD(input1, input2, acc0); 00332 00333 /* x[1] and x[2] are packed */ 00334 in1 = (q15_t) x1; 00335 in2 = (q15_t) x2; 00336 00337 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00338 00339 /* acc1 += x[1] * y[0] + x[2] * y[1] */ 00340 acc1 = __SMLAD(input1, input2, acc1); 00341 00342 /* x[2] and x[3] are packed */ 00343 in1 = (q15_t) x2; 00344 in2 = (q15_t) x3; 00345 00346 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00347 00348 /* acc2 += x[2] * y[0] + x[3] * y[1] */ 00349 acc2 = __SMLAD(input1, input2, acc2); 00350 00351 /* Read x[4] sample */ 00352 x0 = *(px++); 00353 00354 /* x[3] and x[4] are packed */ 00355 in1 = (q15_t) x3; 00356 in2 = (q15_t) x0; 00357 00358 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00359 00360 /* acc3 += x[3] * y[0] + x[4] * y[1] */ 00361 acc3 = __SMLAD(input1, input2, acc3); 00362 00363 /* Read y[2] sample */ 00364 c0 = *py++; 00365 /* Read y[3] sample */ 00366 c1 = *py++; 00367 00368 /* Read x[5] sample */ 00369 x1 = *px++; 00370 00371 /* x[2] and x[3] are packed */ 00372 in1 = (q15_t) x2; 00373 in2 = (q15_t) x3; 00374 00375 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00376 00377 /* y[2] and y[3] are packed */ 00378 in1 = (q15_t) c0; 00379 in2 = (q15_t) c1; 00380 00381 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00382 00383 /* acc0 += x[2] * y[2] + x[3] * y[3] */ 00384 acc0 = __SMLAD(input1, input2, acc0); 00385 00386 /* x[3] and x[4] are packed */ 00387 in1 = (q15_t) x3; 00388 in2 = (q15_t) x0; 00389 00390 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00391 00392 /* acc1 += x[3] * y[2] + x[4] * y[3] */ 00393 acc1 = __SMLAD(input1, input2, acc1); 00394 00395 /* x[4] and x[5] are packed */ 00396 in1 = (q15_t) x0; 00397 in2 = (q15_t) x1; 00398 00399 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00400 00401 /* acc2 += x[4] * y[2] + x[5] * y[3] */ 00402 acc2 = __SMLAD(input1, input2, acc2); 00403 00404 /* Read x[6] sample */ 00405 x2 = *px++; 00406 00407 /* x[5] and x[6] are packed */ 00408 in1 = (q15_t) x1; 00409 in2 = (q15_t) x2; 00410 00411 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00412 00413 /* acc3 += x[5] * y[2] + x[6] * y[3] */ 00414 acc3 = __SMLAD(input1, input2, acc3); 00415 00416 } while(--k); 00417 00418 /* If the srcBLen is not a multiple of 4, compute any remaining MACs here. 00419 ** No loop unrolling is used. */ 00420 k = srcBLen % 0x4u; 00421 00422 while(k > 0u) 00423 { 00424 /* Read y[4] sample */ 00425 c0 = *py++; 00426 00427 /* Read x[7] sample */ 00428 x3 = *px++; 00429 00430 /* Perform the multiply-accumulates */ 00431 /* acc0 += x[4] * y[4] */ 00432 acc0 += ((q15_t) x0 * c0); 00433 /* acc1 += x[5] * y[4] */ 00434 acc1 += ((q15_t) x1 * c0); 00435 /* acc2 += x[6] * y[4] */ 00436 acc2 += ((q15_t) x2 * c0); 00437 /* acc3 += x[7] * y[4] */ 00438 acc3 += ((q15_t) x3 * c0); 00439 00440 /* Reuse the present samples for the next MAC */ 00441 x0 = x1; 00442 x1 = x2; 00443 x2 = x3; 00444 00445 /* Decrement the loop counter */ 00446 k--; 00447 } 00448 00449 /* Store the result in the accumulator in the destination buffer. */ 00450 *pOut = (q7_t) (__SSAT(acc0 >> 7, 8)); 00451 /* Destination pointer is updated according to the address modifier, inc */ 00452 pOut += inc; 00453 00454 *pOut = (q7_t) (__SSAT(acc1 >> 7, 8)); 00455 pOut += inc; 00456 00457 *pOut = (q7_t) (__SSAT(acc2 >> 7, 8)); 00458 pOut += inc; 00459 00460 *pOut = (q7_t) (__SSAT(acc3 >> 7, 8)); 00461 pOut += inc; 00462 00463 /* Update the inputA and inputB pointers for next MAC calculation */ 00464 px = pIn1 + (count * 4u); 00465 py = pIn2; 00466 00467 /* Increment the pointer pIn1 index, count by 1 */ 00468 count++; 00469 00470 /* Decrement the loop counter */ 00471 blkCnt--; 00472 } 00473 00474 /* If the blockSize2 is not a multiple of 4, compute any remaining output samples here. 00475 ** No loop unrolling is used. */ 00476 blkCnt = blockSize2 % 0x4u; 00477 00478 while(blkCnt > 0u) 00479 { 00480 /* Accumulator is made zero for every iteration */ 00481 sum = 0; 00482 00483 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00484 k = srcBLen >> 2u; 00485 00486 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00487 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00488 while(k > 0u) 00489 { 00490 /* Reading two inputs of SrcA buffer and packing */ 00491 in1 = (q15_t) * px++; 00492 in2 = (q15_t) * px++; 00493 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00494 00495 /* Reading two inputs of SrcB buffer and packing */ 00496 in1 = (q15_t) * py++; 00497 in2 = (q15_t) * py++; 00498 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00499 00500 /* Perform the multiply-accumulates */ 00501 sum = __SMLAD(input1, input2, sum); 00502 00503 /* Reading two inputs of SrcA buffer and packing */ 00504 in1 = (q15_t) * px++; 00505 in2 = (q15_t) * px++; 00506 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00507 00508 /* Reading two inputs of SrcB buffer and packing */ 00509 in1 = (q15_t) * py++; 00510 in2 = (q15_t) * py++; 00511 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00512 00513 /* Perform the multiply-accumulates */ 00514 sum = __SMLAD(input1, input2, sum); 00515 00516 /* Decrement the loop counter */ 00517 k--; 00518 } 00519 00520 /* If the srcBLen is not a multiple of 4, compute any remaining MACs here. 00521 ** No loop unrolling is used. */ 00522 k = srcBLen % 0x4u; 00523 00524 while(k > 0u) 00525 { 00526 /* Perform the multiply-accumulates */ 00527 sum += ((q15_t) * px++ * *py++); 00528 00529 /* Decrement the loop counter */ 00530 k--; 00531 } 00532 00533 /* Store the result in the accumulator in the destination buffer. */ 00534 *pOut = (q7_t) (__SSAT(sum >> 7, 8)); 00535 /* Destination pointer is updated according to the address modifier, inc */ 00536 pOut += inc; 00537 00538 /* Update the inputA and inputB pointers for next MAC calculation */ 00539 px = pIn1 + count; 00540 py = pIn2; 00541 00542 /* Increment the pointer pIn1 index, count by 1 */ 00543 count++; 00544 00545 /* Decrement the loop counter */ 00546 blkCnt--; 00547 } 00548 } 00549 else 00550 { 00551 /* If the srcBLen is not a multiple of 4, 00552 * the blockSize2 loop cannot be unrolled by 4 */ 00553 blkCnt = blockSize2; 00554 00555 while(blkCnt > 0u) 00556 { 00557 /* Accumulator is made zero for every iteration */ 00558 sum = 0; 00559 00560 /* Loop over srcBLen */ 00561 k = srcBLen; 00562 00563 while(k > 0u) 00564 { 00565 /* Perform the multiply-accumulate */ 00566 sum += ((q15_t) * px++ * *py++); 00567 00568 /* Decrement the loop counter */ 00569 k--; 00570 } 00571 00572 /* Store the result in the accumulator in the destination buffer. */ 00573 *pOut = (q7_t) (__SSAT(sum >> 7, 8)); 00574 /* Destination pointer is updated according to the address modifier, inc */ 00575 pOut += inc; 00576 00577 /* Update the inputA and inputB pointers for next MAC calculation */ 00578 px = pIn1 + count; 00579 py = pIn2; 00580 00581 /* Increment the MAC count */ 00582 count++; 00583 00584 /* Decrement the loop counter */ 00585 blkCnt--; 00586 } 00587 } 00588 00589 /* -------------------------- 00590 * Initializations of stage3 00591 * -------------------------*/ 00592 00593 /* sum += x[srcALen-srcBLen+1] * y[0] + x[srcALen-srcBLen+2] * y[1] +...+ x[srcALen-1] * y[srcBLen-1] 00594 * sum += x[srcALen-srcBLen+2] * y[0] + x[srcALen-srcBLen+3] * y[1] +...+ x[srcALen-1] * y[srcBLen-1] 00595 * .... 00596 * sum += x[srcALen-2] * y[0] + x[srcALen-1] * y[1] 00597 * sum += x[srcALen-1] * y[0] 00598 */ 00599 00600 /* In this stage the MAC operations are decreased by 1 for every iteration. 00601 The count variable holds the number of MAC operations performed */ 00602 count = srcBLen - 1u; 00603 00604 /* Working pointer of inputA */ 00605 pSrc1 = pIn1 + (srcALen - (srcBLen - 1u)); 00606 px = pSrc1; 00607 00608 /* Working pointer of inputB */ 00609 py = pIn2; 00610 00611 /* ------------------- 00612 * Stage3 process 00613 * ------------------*/ 00614 00615 while(blockSize3 > 0u) 00616 { 00617 /* Accumulator is made zero for every iteration */ 00618 sum = 0; 00619 00620 /* Apply loop unrolling and compute 4 MACs simultaneously. */ 00621 k = count >> 2u; 00622 00623 /* First part of the processing with loop unrolling. Compute 4 MACs at a time. 00624 ** a second loop below computes MACs for the remaining 1 to 3 samples. */ 00625 while(k > 0u) 00626 { 00627 /* x[srcALen - srcBLen + 1] , x[srcALen - srcBLen + 2] */ 00628 in1 = (q15_t) * px++; 00629 in2 = (q15_t) * px++; 00630 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00631 00632 /* y[0] , y[1] */ 00633 in1 = (q15_t) * py++; 00634 in2 = (q15_t) * py++; 00635 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00636 00637 /* sum += x[srcALen - srcBLen + 1] * y[0] */ 00638 /* sum += x[srcALen - srcBLen + 2] * y[1] */ 00639 sum = __SMLAD(input1, input2, sum); 00640 00641 /* x[srcALen - srcBLen + 3] , x[srcALen - srcBLen + 4] */ 00642 in1 = (q15_t) * px++; 00643 in2 = (q15_t) * px++; 00644 input1 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00645 00646 /* y[2] , y[3] */ 00647 in1 = (q15_t) * py++; 00648 in2 = (q15_t) * py++; 00649 input2 = ((q31_t) in1 & 0x0000FFFF) | ((q31_t) in2 << 16); 00650 00651 /* sum += x[srcALen - srcBLen + 3] * y[2] */ 00652 /* sum += x[srcALen - srcBLen + 4] * y[3] */ 00653 sum = __SMLAD(input1, input2, sum); 00654 00655 /* Decrement the loop counter */ 00656 k--; 00657 } 00658 00659 /* If the count is not a multiple of 4, compute any remaining MACs here. 00660 ** No loop unrolling is used. */ 00661 k = count % 0x4u; 00662 00663 while(k > 0u) 00664 { 00665 /* Perform the multiply-accumulates */ 00666 sum += ((q15_t) * px++ * *py++); 00667 00668 /* Decrement the loop counter */ 00669 k--; 00670 } 00671 00672 /* Store the result in the accumulator in the destination buffer. */ 00673 *pOut = (q7_t) (__SSAT(sum >> 7, 8)); 00674 /* Destination pointer is updated according to the address modifier, inc */ 00675 pOut += inc; 00676 00677 /* Update the inputA and inputB pointers for next MAC calculation */ 00678 px = ++pSrc1; 00679 py = pIn2; 00680 00681 /* Decrement the MAC count */ 00682 count--; 00683 00684 /* Decrement the loop counter */ 00685 blockSize3--; 00686 } 00687 00688 } 00689  All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Defines Generated on Mon Nov 29 2010 17:19:56 for CMSIS DSP Software Library by  1.7.2

Wyszukiwarka

Podobne podstrony:
arm correlate q7?
arm ?s q7? source
arm shift q7? source
arm correlate ?2? source
arm offset q7? source
arm ?d q7? source
arm correlate q31? source
arm negate q7? source
arm scale q7? source
arm conv q7? source
arm mult q7? source
arm min q7? source
arm fir q7? source
arm sub q7? source
arm power q7? source
arm fill q7? source
arm copy q7? source
arm correlate q15? source
arm mean q7? source

więcej podobnych podstron