CMSIS-NN  Version 3.1.0
CMSIS NN Software Library
 All Data Structures Files Functions Variables Enumerations Enumerator Macros Groups Pages
Basic Math Functions for Neural Network Computation

Functions

void arm_nn_accumulate_q7_to_q15 (q15_t *pDst, const q7_t *pSrc, uint32_t length)
 Converts the elements from a q7 vector and accumulate to a q15 vector. More...
 
void arm_nn_add_q7 (const q7_t *input, q31_t *output, uint32_t block_size)
 Non-saturating addition of elements of a q7 vector. More...
 
q7_t * arm_nn_depthwise_conv_nt_t_padded_s8 (const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)
 Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs. More...
 
q7_t * arm_nn_depthwise_conv_nt_t_s8 (const q7_t *lhs, const q7_t *rhs, const int32_t input_offset, const uint16_t num_ch, const int32_t *out_shift, const int32_t *out_mult, const int32_t out_offset, const int32_t activation_min, const int32_t activation_max, const uint16_t row_x_col, const int32_t *const output_bias, q7_t *out)
 Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. Dimensions are the same for lhs and rhs. More...
 
arm_status arm_nn_mat_mul_core_1x_s8 (int32_t row_elements, const int8_t *row_base, const int8_t *col_base, int32_t *const sum_col, int32_t *const output)
 General Matrix-multiplication without requantization for one row & one column. More...
 
int8_t * arm_nn_mat_mul_core_4x_s8 (const int32_t row_elements, const int32_t offset, const int8_t *row_base, const int8_t *col_base_ref, const int32_t out_ch, const cmsis_nn_conv_params *conv_params, const cmsis_nn_per_channel_quant_params *quant_params, const int32_t *bias, int8_t *output)
 Matrix-multiplication with requantization & activation function for four rows and one column. More...
 
arm_status arm_nn_mat_mult_nt_t_s8 (const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t *dst_multipliers, const int32_t *dst_shifts, const int32_t lhs_rows, const int32_t rhs_rows, const int32_t rhs_cols, const int32_t lhs_offset, const int32_t dst_offset, const int32_t activation_min, const int32_t activation_max)
 General Matrix-multiplication function with per-channel requantization. This function assumes: More...
 
void arm_nn_mult_q15 (q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize)
 Q7 vector multiplication with variable output shifts. More...
 
void arm_nn_mult_q7 (q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize)
 Q7 vector multiplication with variable output shifts. More...
 
arm_status arm_nn_vec_mat_mult_t_s16 (const q15_t *lhs, const q7_t *rhs, const q63_t *bias, q15_t *dst, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
 s16 Vector by Matrix (transposed) multiplication More...
 
arm_status arm_nn_vec_mat_mult_t_s8 (const q7_t *lhs, const q7_t *rhs, const q31_t *bias, q7_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max, const int32_t address_offset)
 s8 Vector by Matrix (transposed) multiplication More...
 
arm_status arm_nn_vec_mat_mult_t_svdf_s8 (const q7_t *lhs, const q7_t *rhs, q15_t *dst, const int32_t lhs_offset, const int32_t rhs_offset, const int32_t dst_offset, const int32_t dst_multiplier, const int32_t dst_shift, const int32_t rhs_cols, const int32_t rhs_rows, const int32_t activation_min, const int32_t activation_max)
 s8 Vector by Matrix (transposed) multiplication with s16 output More...
 

Description

Basic Math Functions for Neural Network Computation

Function Documentation

void arm_nn_accumulate_q7_to_q15 ( q15_t *  dst,
const q7_t *  src,
uint32_t  block_size 
)
Parameters
[in]*srcpoints to the q7 input vector
[out]*dstpoints to the q15 output vector
[in]block_sizelength of the input vector
Description:

The equation used for the conversion process is:

 dst[n] += (q15_t) src[n] ;   0 <= n < block_size.

References arm_nn_read_q15x2(), arm_nn_read_q7x4_ia(), and arm_nn_write_q15x2_ia().

void arm_nn_add_q7 ( const q7_t *  input,
q31_t *  output,
uint32_t  block_size 
)
Parameters
[in]*inputPointer to the q7 input vector
[out]*outputPointer to the q31 output variable.
[in]block_sizelength of the input vector
Description:

2^24 samples can be added without saturating the result.

The equation used for the conversion process is:

 sum = input[0] + input[1] + .. + input[block_size -1]

References arm_nn_read_q7x4_ia().

q7_t* arm_nn_depthwise_conv_nt_t_padded_s8 ( const q7_t *  lhs,
const q7_t *  rhs,
const int32_t  lhs_offset,
const uint16_t  num_ch,
const int32_t *  out_shift,
const int32_t *  out_mult,
const int32_t  out_offset,
const int32_t  activation_min,
const int32_t  activation_max,
const uint16_t  row_x_col,
const int32_t *const  output_bias,
q7_t *  out 
)
Parameters
[in]lhsInput left-hand side matrix
[in]rhsInput right-hand side matrix (transposed)
[in]lhs_offsetLHS matrix offset(input offset). Range: -127 to 128
[in]num_chNumber of channels in LHS/RHS
[in]out_shiftPer channel output shift. Length of vector is equal to number of channels
[in]out_multPer channel output multiplier. Length of vector is equal to number of channels
[in]out_offsetOffset to be added to the output values. Range: -127 to 128
[in]activation_minMinimum value to clamp the output to. Range: int8
[in]activation_maxMaximum value to clamp the output to. Range: int8
[in]row_x_col(row_dimension * col_dimension) of LHS/RHS matrix
[in]output_biasPer channel output bias. Length of vector is equal to number of channels
[in]outOutput pointer
Returns
The function returns one of the two
  • Updated output pointer if an implementation is available
  • NULL if no implementation is available.
Note
If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.
  • Output shift
  • Output multiplier
  • Output bias
  • rhs

Referenced by arm_depthwise_conv_s8_opt().

q7_t* arm_nn_depthwise_conv_nt_t_s8 ( const q7_t *  lhs,
const q7_t *  rhs,
const int32_t  lhs_offset,
const uint16_t  num_ch,
const int32_t *  out_shift,
const int32_t *  out_mult,
const int32_t  out_offset,
const int32_t  activation_min,
const int32_t  activation_max,
const uint16_t  row_x_col,
const int32_t *const  output_bias,
q7_t *  out 
)
Parameters
[in]lhsInput left-hand side matrix
[in]rhsInput right-hand side matrix (transposed)
[in]lhs_offsetLHS matrix offset(input offset). Range: -127 to 128
[in]num_chNumber of channels in LHS/RHS
[in]out_shiftPer channel output shift. Length of vector is equal to number of channels.
[in]out_multPer channel output multiplier. Length of vector is equal to number of channels.
[in]out_offsetOffset to be added to the output values. Range: -127 to 128
[in]activation_minMinimum value to clamp the output to. Range: int8
[in]activation_maxMaximum value to clamp the output to. Range: int8
[in]row_x_col(row_dimension * col_dimension) of LHS/RHS matrix
[in]output_biasPer channel output bias. Length of vector is equal to number of channels.
[in]outOutput pointer
Returns
The function returns one of the two
  • Updated output pointer if an implementation is available
  • NULL if no implementation is available.
Note
If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out for the following.
  • Output shift
  • Output multiplier
  • Output bias
  • rhs

Referenced by arm_depthwise_conv_s8_opt().

arm_status arm_nn_mat_mul_core_1x_s8 ( int32_t  row_elements,
const int8_t *  row_base,
const int8_t *  col_base,
int32_t *const  sum_col,
int32_t *const  output 
)
Parameters
[in]row_elementsnumber of row elements
[in]row_basepointer to row operand
[in]col_basepointer to col operand
[out]sum_colpointer to store sum of column elements
[out]outputpointer to store result of multiply-accumulate
Returns
The function returns the multiply-accumulated result of the row by column.

Pseudo-code *output = 0 sum_col = 0 for (i = 0; i < row_elements; i++) *output += row_base[i] * col_base[i] sum_col += col_base[i]

Referenced by arm_convolve_1_x_n_s8(), and arm_convolve_1x1_s8_fast().

int8_t* arm_nn_mat_mul_core_4x_s8 ( const int32_t  row_elements,
const int32_t  offset,
const int8_t *  row_base,
const int8_t *  col_base,
const int32_t  out_ch,
const cmsis_nn_conv_params conv_params,
const cmsis_nn_per_channel_quant_params quant_params,
const int32_t *  bias,
int8_t *  output 
)
Parameters
[in]row_elementsnumber of row elements
[in]offsetoffset between rows. Can be the same as row_elements. For e.g, in a 1x1 conv scenario with stride as 1.
[in]row_basepointer to row operand
[in]col_basepointer to col operand
[in]out_chNumber of output channels
[in]conv_paramsPointer to convolution parameters like offsets and activation values
[in]quant_paramsPointer to per-channel quantization parameters
[in]biasPointer to per-channel bias
[out]outputPointer to output where int8 results are stored.
Returns
The function returns the updated output pointer or NULL if implementation is not available.

Compliant to TFLM int8 specification. MVE implementation only

References cmsis_nn_conv_params::activation, cmsis_nn_conv_params::input_offset, cmsis_nn_activation::max, cmsis_nn_activation::min, cmsis_nn_per_channel_quant_params::multiplier, cmsis_nn_conv_params::output_offset, and cmsis_nn_per_channel_quant_params::shift.

Referenced by arm_convolve_1_x_n_s8(), arm_convolve_1x1_s8_fast(), and arm_convolve_s8().

arm_status arm_nn_mat_mult_nt_t_s8 ( const q7_t *  lhs,
const q7_t *  rhs,
const q31_t *  bias,
q7_t *  dst,
const int32_t *  dst_multipliers,
const int32_t *  dst_shifts,
const int32_t  lhs_rows,
const int32_t  rhs_rows,
const int32_t  rhs_cols,
const int32_t  lhs_offset,
const int32_t  dst_offset,
const int32_t  activation_min,
const int32_t  activation_max 
)
  • LHS input matrix NOT transposed (nt)
  • RHS input matrix transposed (t)
Note
This operation also performs the broadcast bias addition before the requantization
Parameters
[in]lhsPointer to the LHS input matrix
[in]rhsPointer to the RHS input matrix
[in]biasPointer to the bias vector. The length of this vector is equal to the number of output columns (or RHS input rows)
[out]dstPointer to the output matrix with "m" rows and "n" columns
[in]dst_multipliersPointer to the multipliers vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
[in]dst_shiftsPointer to the shifts vector needed for the per-channel requantization. The length of this vector is equal to the number of output columns (or RHS input rows)
[in]lhs_rowsNumber of LHS input rows
[in]rhs_rowsNumber of RHS input rows
[in]rhs_colsNumber of LHS/RHS input columns
[in]lhs_offsetOffset to be applied to the LHS input value
[in]dst_offsetOffset to be applied the output result
[in]activation_minMinimum value to clamp down the output. Range : int8
[in]activation_maxMaximum value to clamp up the output. Range : int8
Returns
The function returns ARM_MATH_SUCCESS

References arm_nn_read_q7x4(), arm_nn_read_q7x4_ia(), arm_nn_requantize(), MAX, and MIN.

Referenced by arm_convolve_1x1_s8_fast().

void arm_nn_mult_q15 ( q15_t *  pSrcA,
q15_t *  pSrcB,
q15_t *  pDst,
const uint16_t  out_shift,
uint32_t  blockSize 
)

q7 vector multiplication with variable output shifts

Parameters
[in]*pSrcApointer to the first input vector
[in]*pSrcBpointer to the second input vector
[out]*pDstpointer to the output vector
[in]out_shiftamount of right-shift for output
[in]blockSizenumber of samples in each vector

Scaling and Overflow Behavior:

The function uses saturating arithmetic. Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.

References NN_ROUND.

void arm_nn_mult_q7 ( q7_t *  pSrcA,
q7_t *  pSrcB,
q7_t *  pDst,
const uint16_t  out_shift,
uint32_t  blockSize 
)

q7 vector multiplication with variable output shifts

Parameters
[in]*pSrcApointer to the first input vector
[in]*pSrcBpointer to the second input vector
[out]*pDstpointer to the output vector
[in]out_shiftamount of right-shift for output
[in]blockSizenumber of samples in each vector

Scaling and Overflow Behavior:

The function uses saturating arithmetic. Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.

References NN_ROUND.

arm_status arm_nn_vec_mat_mult_t_s16 ( const q15_t *  lhs,
const q7_t *  rhs,
const q63_t *  bias,
q15_t *  dst,
const int32_t  dst_multiplier,
const int32_t  dst_shift,
const int32_t  rhs_cols,
const int32_t  rhs_rows,
const int32_t  activation_min,
const int32_t  activation_max 
)
Parameters
[in]lhsInput left-hand side vector
[in]rhsInput right-hand side matrix (transposed)
[in]biasInput bias
[out]dstOutput vector
[in]dst_multiplierOutput multiplier
[in]dst_shiftOutput shift
[in]rhs_colsNumber of columns in the right-hand side input matrix
[in]rhs_rowsNumber of rows in the right-hand side input matrix
[in]activation_minMinimum value to clamp the output to. Range: int16
[in]activation_maxMaximum value to clamp the output to. Range: int16
Returns
The function returns ARM_MATH_SUCCESS

References arm_nn_read_q15x2_ia(), arm_nn_requantize_s64(), MAX, and MIN.

Referenced by arm_fully_connected_s16().

arm_status arm_nn_vec_mat_mult_t_s8 ( const q7_t *  lhs,
const q7_t *  rhs,
const q31_t *  bias,
q7_t *  dst,
const int32_t  lhs_offset,
const int32_t  rhs_offset,
const int32_t  dst_offset,
const int32_t  dst_multiplier,
const int32_t  dst_shift,
const int32_t  rhs_cols,
const int32_t  rhs_rows,
const int32_t  activation_min,
const int32_t  activation_max,
const int32_t  address_offset 
)
Parameters
[in]lhsInput left-hand side vector
[in]rhsInput right-hand side matrix (transposed)
[in]biasInput bias
[out]dstOutput vector
[in]lhs_offsetOffset to be added to the input values of the left-hand side vector. Range: -127 to 128
[in]rhs_offsetNot used
[in]dst_offsetOffset to be added to the output values. Range: -127 to 128
[in]dst_multiplierOutput multiplier
[in]dst_shiftOutput shift
[in]rhs_colsNumber of columns in the right-hand side input matrix
[in]rhs_rowsNumber of rows in the right-hand side input matrix
[in]activation_minMinimum value to clamp the output to. Range: int8
[in]activation_maxMaximum value to clamp the output to. Range: int8
[in]address_offsetMemory position offset for dst. First output is stored at 'dst', the second at 'dst + address_offset' and so on. Default value is typically 1.
Returns
The function returns ARM_MATH_SUCCESS

References arm_nn_read_q7x4_ia(), arm_nn_requantize(), MAX, and MIN.

Referenced by arm_fully_connected_s8(), and arm_svdf_s8().

arm_status arm_nn_vec_mat_mult_t_svdf_s8 ( const q7_t *  lhs,
const q7_t *  rhs,
q15_t *  dst,
const int32_t  lhs_offset,
const int32_t  rhs_offset,
const int32_t  scatter_offset,
const int32_t  dst_multiplier,
const int32_t  dst_shift,
const int32_t  rhs_cols,
const int32_t  rhs_rows,
const int32_t  activation_min,
const int32_t  activation_max 
)
Parameters
[in]lhsInput left-hand side vector
[in]rhsInput right-hand side matrix (transposed)
[out]dstOutput vector
[in]lhs_offsetOffset to be added to the input values of the left-hand side vector. Range: -127 to 128
[in]rhs_offsetNot used
[in]scatter_offsetAddress offset for dst. First output is stored at 'dst', the second at 'dst + scatter_offset' and so on.
[in]dst_multiplierOutput multiplier
[in]dst_shiftOutput shift
[in]rhs_colsNumber of columns in the right-hand side input matrix
[in]rhs_rowsNumber of rows in the right-hand side input matrix
[in]activation_minMinimum value to clamp the output to. Range: int16
[in]activation_maxMaximum value to clamp the output to. Range: int16
Returns
The function returns ARM_MATH_SUCCESS

References arm_nn_read_q7x4_ia(), arm_nn_requantize(), MAX, MIN, and NN_Q31_MAX.

Referenced by arm_svdf_state_s16_s8().