CMSIS-NN
Version 1.2.0
CMSIS NN Software Library
|
Functions | |
void | arm_avgpool_s8 (const int dim_src_height, const int dim_src_width, const int dim_dst_height, const int dim_dst_width, const int stride_height, const int stride_width, const int dim_kernel_height, const int dim_kernel_width, const int padding_height, const int padding_width, const int act_min, const int act_max, const int ch_src, int8_t *src, int16_t *bufferA, int8_t *dst) |
s8 average pooling function More... | |
int32_t | arm_avgpool_s8_get_buffer_size (const int dim_dst_width, const int ch_src) |
Get the required buffer size for S8 average pooling function. More... | |
void | arm_max_pool_s8 (const uint16_t input_y, const uint16_t input_x, const uint16_t output_y, const uint16_t output_x, const uint16_t stride_y, const uint16_t stride_x, const uint16_t kernel_y, const uint16_t kernel_x, const uint16_t pad_y, const uint16_t pad_x, const int8_t act_min, const int8_t act_max, const uint16_t channel_in, int8_t *input, int16_t *tmp_buffer, int8_t *output) |
s8 pure C max pooling function More... | |
void | arm_max_pool_s8_opt (const uint16_t input_y, const uint16_t input_x, const uint16_t output_y, const uint16_t output_x, const uint16_t stride_y, const uint16_t stride_x, const uint16_t kernel_y, const uint16_t kernel_x, const uint16_t pad_y, const uint16_t pad_x, const int8_t act_min, const int8_t act_max, const uint16_t depth, int8_t *src, int16_t *tmp_buffer, int8_t *dst) |
s8 DSP optimized max pooling function More... | |
void | arm_maxpool_q7_HWC (q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const uint16_t dim_im_out, q7_t *bufferA, q7_t *Im_out) |
Q7 max pooling function. More... | |
void | arm_avepool_q7_HWC (q7_t *Im_in, const uint16_t dim_im_in, const uint16_t ch_im_in, const uint16_t dim_kernel, const uint16_t padding, const uint16_t stride, const uint16_t dim_im_out, q7_t *bufferA, q7_t *Im_out) |
Q7 average pooling function. More... | |
Perform pooling functions, including max pooling and average pooling
void arm_avepool_q7_HWC | ( | q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const uint16_t | dim_im_out, | ||
q7_t * | bufferA, | ||
q7_t * | Im_out | ||
) |
[in,out] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | pointer to buffer space for input |
[in,out] | Im_out | pointer to output tensor |
Buffer size:
bufferA size: 2*dim_im_out*ch_im_in
The pooling function is implemented as split x-pooling then y-pooling.
This pooling function is input-destructive. Input data is undefined after calling this function.
References arm_q7_to_q15_no_shift().
void arm_avgpool_s8 | ( | const int | dim_src_height, |
const int | dim_src_width, | ||
const int | dim_dst_height, | ||
const int | dim_dst_width, | ||
const int | stride_height, | ||
const int | stride_width, | ||
const int | dim_kernel_height, | ||
const int | dim_kernel_width, | ||
const int | padding_height, | ||
const int | padding_width, | ||
const int | act_min, | ||
const int | act_max, | ||
const int | ch_src, | ||
int8_t * | src, | ||
int16_t * | bufferA, | ||
int8_t * | dst | ||
) |
[in] | dim_src_height | input tensor dimension |
[in] | dim_src_width | input tensor dimension |
[in] | dim_dst_height | output tensor dimension |
[in] | dim_dst_width | output tensor dimension |
[in] | stride_height | stride along y |
[in] | stride_width | stride along x |
[in] | dim_kernel_height | filter kernel size along y |
[in] | dim_kernel_width | filter kernel size along x |
[in] | padding_height | padding size along y |
[in] | padding_width | padding size along x |
[in] | act_min | Min clamping |
[in] | act_max | Max clamping |
[in] | ch_src | number of input tensor channels |
[in,out] | src | pointer to input tensor |
[in] | bufferA | temporary buffer used for optimization and is necessary when both ARM_MATH_LOOPUNROLL and ARM_MATH_DSP are defined. Required space: (ch_src * dim_dst_width) * sizeof(q15_t) bytes Use arm_avgpool_s8_get_buffer_size() to get the size |
[in,out] | dst | pointer to output tensor |
References arm_nn_accumulate_q7_to_q15(), arm_q7_to_q15_no_shift(), MAX, and MIN.
int32_t arm_avgpool_s8_get_buffer_size | ( | const int | dim_dst_width, |
const int | ch_src | ||
) |
[in] | dim_dst_width | output tensor dimension |
[in] | ch_src | number of input tensor channels |
void arm_max_pool_s8 | ( | const uint16_t | input_y, |
const uint16_t | input_x, | ||
const uint16_t | output_y, | ||
const uint16_t | output_x, | ||
const uint16_t | stride_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | kernel_y, | ||
const uint16_t | kernel_x, | ||
const uint16_t | pad_y, | ||
const uint16_t | pad_x, | ||
const int8_t | act_min, | ||
const int8_t | act_max, | ||
const uint16_t | channel_in, | ||
int8_t * | input, | ||
int16_t * | tmp_buffer, | ||
int8_t * | output | ||
) |
[in] | input_y | input tensor dimension along y |
[in] | input_x | input tensor dimension along x |
[in] | output_y | output tensor dimension along y |
[in] | output_x | output tensor dimension along x |
[in] | stride_y | stride along y |
[in] | stride_x | stride along x |
[in] | kernel_y | filter kernel size along y |
[in] | kernel_x | filter kernel size along x |
[in] | pad_y | padding size along y |
[in] | pad_x | padding size along x |
[in] | act_min | Activation min. Lower limit to clamp output to. Range: int8 |
[in] | act_max | Activation max. Upper limit to clamp output to. Range: int8 |
[in] | channel_in | number of input channels |
[in] | input | pointer to input tensor |
[in] | tmp_buffer | Not used. |
[in,out] | output | pointer to output tensor |
Referenced by arm_max_pool_s8_opt().
void arm_max_pool_s8_opt | ( | const uint16_t | input_y, |
const uint16_t | input_x, | ||
const uint16_t | output_y, | ||
const uint16_t | output_x, | ||
const uint16_t | stride_y, | ||
const uint16_t | stride_x, | ||
const uint16_t | kernel_y, | ||
const uint16_t | kernel_x, | ||
const uint16_t | pad_y, | ||
const uint16_t | pad_x, | ||
const int8_t | act_min, | ||
const int8_t | act_max, | ||
const uint16_t | depth, | ||
int8_t * | input, | ||
int16_t * | tmp_buffer, | ||
int8_t * | output | ||
) |
[in] | input_y | input tensor dimension along y |
[in] | input_x | input tensor dimension along x |
[in] | output_y | output tensor dimension along y |
[in] | output_x | output tensor dimension along x |
[in] | stride_y | stride along y |
[in] | stride_x | stride along x |
[in] | kernel_y | filter kernel size along y |
[in] | kernel_x | filter kernel size along x |
[in] | pad_y | padding size along y |
[in] | pad_x | padding size along x |
[in] | act_min | Activation min. Lower limit to clamp output to. Range: int8 |
[in] | act_max | Activation max. Upper limit to clamp output to. Range: int8 |
[in] | depth | number of input channels |
[in] | input | pointer to input tensor |
[in] | tmp_buffer | Not used. |
[in,out] | output | pointer to output tensor |
This optimized implementation is recommended when depth is >= 4 and dimensions are large.
References arm_max_pool_s8().
void arm_maxpool_q7_HWC | ( | q7_t * | Im_in, |
const uint16_t | dim_im_in, | ||
const uint16_t | ch_im_in, | ||
const uint16_t | dim_kernel, | ||
const uint16_t | padding, | ||
const uint16_t | stride, | ||
const uint16_t | dim_im_out, | ||
q7_t * | bufferA, | ||
q7_t * | Im_out | ||
) |
[in,out] | Im_in | pointer to input tensor |
[in] | dim_im_in | input tensor dimention |
[in] | ch_im_in | number of input tensor channels |
[in] | dim_kernel | filter kernel size |
[in] | padding | padding sizes |
[in] | stride | convolution stride |
[in] | dim_im_out | output tensor dimension |
[in,out] | bufferA | Not used |
[in,out] | Im_out | pointer to output tensor |
The pooling function is implemented as split x-pooling then y-pooling.
This pooling function is input-destructive. Input data is undefined after calling this function.
Referenced by main().