TPU-KERNEL_Technical_Reference_Mannal
Introduction
TPU Architecture
Memory Types
TPU Working Mode
TPU Programming
Host Side
Device Side
Basic Definitions
Rounding Mode
Common Functions
Synchronization Functions
Utils Functions
GDMA Functions
Basic BDC Functions
Data Convertion and Rounding Functions
Unary Functions
Binary Functions
Floating Point Binary Functions
Fixed Point Binary Functions
Compare and Select Functions
Floating Point Matrix Functions
Fixed Point Matrix Functions
Floating Point Neural Network Functions
Fixed Point Neural Network Functions
Activation Functions
Scatter and Gather Functions
Special Functions
Quantization Functions
HAU Functions
TPU-KERNEL_Technical_Reference_Mannal
»
Index
Index
A
|
B
|
C
|
D
|
F
|
G
|
L
|
N
|
R
|
S
|
T
|
U
|
V
A
addr (C++ member)
addr_t (C++ type)
ALIGN (C macro)
B
bf16 (C++ member)
bfloat16 (C++ union)
bits (C++ member)
,
[1]
C
context (C++ member)
D
data_type_t (C++ enum)
dim2 (C++ class)
dim2::h (C++ member)
dim2::w (C++ member)
dim4 (C++ class)
dim4::c (C++ member)
dim4::h (C++ member)
dim4::n (C++ member)
dim4::w (C++ member)
DIV_UP (C macro)
DT_BFP16 (C++ enumerator)
DT_FP16 (C++ enumerator)
DT_FP32 (C++ enumerator)
DT_INT16 (C++ enumerator)
DT_INT32 (C++ enumerator)
DT_INT8 (C++ enumerator)
DT_UINT16 (C++ enumerator)
DT_UINT32 (C++ enumerator)
DT_UINT8 (C++ enumerator)
F
f16 (C++ member)
f32 (C++ member)
float16 (C++ union)
G
global_addr_t (C++ type)
L
l2_sram_addr_t (C++ type)
local_addr_t (C++ type)
LOCAL_MEM_SIZE (C macro)
N
NPU_NUM (C macro)
R
RM_DOWN (C++ enumerator)
RM_HALF_AWAY_FROM_ZERO (C++ enumerator)
RM_HALF_DOWN (C++ enumerator)
RM_HALF_TO_EVEN (C++ enumerator)
RM_HALF_UP (C++ enumerator)
RM_TOWARDS_ZERO (C++ enumerator)
RM_UP (C++ enumerator)
rounding_mode_t (C++ enum)
S
s16 (C++ member)
s32 (C++ member)
s8 (C++ member)
SCALAR (C++ enumerator)
scalar_t (C++ member)
(C++ union)
system_addr_t (C++ type)
T
TENSOR (C++ enumerator)
tpu_aligned_feature_size (C++ function)
tpu_aligned_stride (C++ function)
tpu_bank_index (C++ function)
tpu_bank_num (C++ function)
tpu_bdc_abs (C++ function)
tpu_bdc_and (C++ function)
tpu_bdc_and_C (C++ function)
tpu_bdc_arithmetic_sequence_bcast (C++ function)
tpu_bdc_arithmetic_sequence_distribute (C++ function)
tpu_bdc_arithmetic_sequence_general (C++ function)
tpu_bdc_arithmetic_shift (C++ function)
tpu_bdc_arithmetic_shift_C (C++ function)
tpu_bdc_batch_bcast_h_gather (C++ function)
tpu_bdc_batch_bcast_h_gather_exception (C++ function)
tpu_bdc_batch_bcast_h_scatter (C++ function)
tpu_bdc_batch_bcast_w_gather (C++ function)
tpu_bdc_batch_bcast_w_gather_exception (C++ function)
tpu_bdc_batch_bcast_w_mask_select (C++ function)
tpu_bdc_batch_bcast_w_scatter (C++ function)
tpu_bdc_cast (C++ function)
tpu_bdc_cpy (C++ function)
tpu_bdc_cpy_cross_npu (C++ function)
tpu_bdc_cw_trans (C++ function)
tpu_bdc_equal (C++ function)
tpu_bdc_equal_C (C++ function)
tpu_bdc_equal_select (C++ function)
tpu_bdc_fp32_arccos (C++ function)
tpu_bdc_fp32_arcsin (C++ function)
tpu_bdc_fp32_C_div (C++ function)
tpu_bdc_fp32_cos (C++ function)
tpu_bdc_fp32_cot (C++ function)
tpu_bdc_fp32_dequant (C++ function)
tpu_bdc_fp32_div (C++ function)
tpu_bdc_fp32_div_C (C++ function)
tpu_bdc_fp32_elu (C++ function)
tpu_bdc_fp32_erf (C++ function)
tpu_bdc_fp32_erfc (C++ function)
tpu_bdc_fp32_exp (C++ function)
tpu_bdc_fp32_expm1 (C++ function)
tpu_bdc_fp32_gelu (C++ function)
tpu_bdc_fp32_gelu_fast (C++ function)
tpu_bdc_fp32_log (C++ function)
tpu_bdc_fp32_log1p (C++ function)
tpu_bdc_fp32_logx (C++ function)
tpu_bdc_fp32_mac (C++ function)
tpu_bdc_fp32_mac_C (C++ function)
tpu_bdc_fp32_mish (C++ function)
tpu_bdc_fp32_mm (C++ function)
tpu_bdc_fp32_mm_left_const (C++ function)
tpu_bdc_fp32_mm_left_trans (C++ function)
tpu_bdc_fp32_pc_dequant (C++ function)
tpu_bdc_fp32_pc_requant (C++ function)
tpu_bdc_fp32_pow (C++ function)
tpu_bdc_fp32_pow_C (C++ function)
,
[1]
tpu_bdc_fp32_reciprocal (C++ function)
tpu_bdc_fp32_requant (C++ function)
tpu_bdc_fp32_rsqrt (C++ function)
tpu_bdc_fp32_selu (C++ function)
tpu_bdc_fp32_sigmoid (C++ function)
tpu_bdc_fp32_silu (C++ function)
tpu_bdc_fp32_sin (C++ function)
tpu_bdc_fp32_softplus (C++ function)
tpu_bdc_fp32_sqrt (C++ function)
tpu_bdc_fp32_tan (C++ function)
tpu_bdc_fp32_tanh (C++ function)
tpu_bdc_fp32_tunable_C_div (C++ function)
tpu_bdc_fp32_tunable_div (C++ function)
tpu_bdc_fp32_tunable_div_C (C++ function)
tpu_bdc_fp32_tunable_reciprocal (C++ function)
tpu_bdc_fp32_tunable_rsqrt (C++ function)
tpu_bdc_fp32_tunable_sqrt (C++ function)
tpu_bdc_fp32_vc_div (C++ function)
tpu_bdc_fp_add (C++ function)
tpu_bdc_fp_add_bias_sqr (C++ function)
tpu_bdc_fp_add_C (C++ function)
tpu_bdc_fp_add_C_sqr (C++ function)
tpu_bdc_fp_avg_pool2d (C++ function)
tpu_bdc_fp_bias (C++ function)
tpu_bdc_fp_C_sub (C++ function)
tpu_bdc_fp_ceil (C++ function)
tpu_bdc_fp_conv2d (C++ function)
tpu_bdc_fp_conv2d_for_deconv2d (C++ function)
tpu_bdc_fp_conv2d_kernel_const (C++ function)
tpu_bdc_fp_depthwise2d (C++ function)
tpu_bdc_fp_diff_abs (C++ function)
tpu_bdc_fp_diff_abs_C (C++ function)
tpu_bdc_fp_floor (C++ function)
tpu_bdc_fp_hsigmoid (C++ function)
tpu_bdc_fp_hswish (C++ function)
tpu_bdc_fp_ins_avg_pool2d (C++ function)
tpu_bdc_fp_max_pool2d (C++ function)
tpu_bdc_fp_mm (C++ function)
tpu_bdc_fp_mm_all_trans (C++ function)
tpu_bdc_fp_mm_L_const (C++ function)
tpu_bdc_fp_mm_L_const_all_trans (C++ function)
tpu_bdc_fp_mm_L_const_R_trans (C++ function)
tpu_bdc_fp_mm_R_const (C++ function)
tpu_bdc_fp_mm_R_const_all_trans (C++ function)
tpu_bdc_fp_mm_R_trans (C++ function)
tpu_bdc_fp_mul (C++ function)
tpu_bdc_fp_mul_C (C++ function)
tpu_bdc_fp_round (C++ function)
tpu_bdc_fp_scale (C++ function)
tpu_bdc_fp_scale_bias (C++ function)
tpu_bdc_fp_scale_bias_C (C++ function)
tpu_bdc_fp_sub (C++ function)
tpu_bdc_fp_sub_bias_sqr (C++ function)
tpu_bdc_fp_sub_C (C++ function)
tpu_bdc_fp_sub_C_sqr (C++ function)
tpu_bdc_fp_taylor (C++ function)
tpu_bdc_fp_vc_add (C++ function)
tpu_bdc_fp_vc_mul (C++ function)
tpu_bdc_fp_vc_sub (C++ function)
tpu_bdc_greater (C++ function)
tpu_bdc_greater_C (C++ function)
tpu_bdc_greater_equal (C++ function)
tpu_bdc_greater_equal_C (C++ function)
tpu_bdc_greater_select (C++ function)
tpu_bdc_hw_gather (C++ function)
tpu_bdc_hw_gather_exception (C++ function)
tpu_bdc_hw_scatter (C++ function)
tpu_bdc_int8_avg_pool2d (C++ function)
tpu_bdc_int8_mac (C++ function)
tpu_bdc_int8_mac_C (C++ function)
tpu_bdc_int8_max_pool2d (C++ function)
tpu_bdc_int8_mm (C++ function)
tpu_bdc_int8_mm_L_const (C++ function)
tpu_bdc_int8_mm_L_trans (C++ function)
tpu_bdc_int8_pc_zp_mm (C++ function)
tpu_bdc_int8_pc_zp_mm_all_trans (C++ function)
tpu_bdc_int8_pc_zp_mm_L_const (C++ function)
tpu_bdc_int8_pc_zp_mm_L_const_all_trans (C++ function)
tpu_bdc_int8_pc_zp_mm_L_const_R_trans (C++ function)
tpu_bdc_int8_pc_zp_mm_R_const (C++ function)
tpu_bdc_int8_pc_zp_mm_R_const_all_trans (C++ function)
tpu_bdc_int8_pc_zp_mm_R_trans (C++ function)
tpu_bdc_int8_zp_mm (C++ function)
tpu_bdc_int8_zp_mm_all_trans (C++ function)
tpu_bdc_int8_zp_mm_L_const (C++ function)
tpu_bdc_int8_zp_mm_L_const_all_trans (C++ function)
tpu_bdc_int8_zp_mm_L_const_R_trans (C++ function)
tpu_bdc_int8_zp_mm_R_const (C++ function)
tpu_bdc_int8_zp_mm_R_const_all_trans (C++ function)
tpu_bdc_int8_zp_mm_R_trans (C++ function)
tpu_bdc_int_add (C++ function)
tpu_bdc_int_add_C (C++ function)
tpu_bdc_int_C_sub (C++ function)
tpu_bdc_int_dequant (C++ function)
tpu_bdc_int_max_C (C++ function)
tpu_bdc_int_min_C (C++ function)
tpu_bdc_int_mm (C++ function)
tpu_bdc_int_mm_L_const (C++ function)
tpu_bdc_int_mm_L_trans (C++ function)
tpu_bdc_int_mul (C++ function)
tpu_bdc_int_mul_C (C++ function)
tpu_bdc_int_pc_dequant (C++ function)
tpu_bdc_int_pc_requant (C++ function)
tpu_bdc_int_pcs_add (C++ function)
tpu_bdc_int_pcs_add_C (C++ function)
tpu_bdc_int_pcs_C_sub (C++ function)
tpu_bdc_int_pcs_mm (C++ function)
tpu_bdc_int_pcs_mm_L_const (C++ function)
tpu_bdc_int_pcs_mm_L_trans (C++ function)
tpu_bdc_int_pcs_mul (C++ function)
tpu_bdc_int_pcs_mul_C (C++ function)
tpu_bdc_int_pcs_sub (C++ function)
tpu_bdc_int_pcs_sub_C (C++ function)
tpu_bdc_int_requant (C++ function)
tpu_bdc_int_sub (C++ function)
tpu_bdc_int_sub_C (C++ function)
tpu_bdc_int_vc_add (C++ function)
tpu_bdc_int_vc_mul (C++ function)
tpu_bdc_int_vc_sub (C++ function)
tpu_bdc_less (C++ function)
tpu_bdc_less_C (C++ function)
tpu_bdc_less_equal (C++ function)
tpu_bdc_less_equal_C (C++ function)
tpu_bdc_less_select (C++ function)
tpu_bdc_load_fp32_arcsin_coeff (C++ function)
tpu_bdc_load_fp32_cos_coeff (C++ function)
tpu_bdc_load_fp32_erf_coeff (C++ function)
tpu_bdc_load_fp32_exp_coeff (C++ function)
tpu_bdc_load_fp32_exp_table (C++ function)
tpu_bdc_load_fp32_log_coeff (C++ function)
tpu_bdc_load_fp32_sin_coeff (C++ function)
tpu_bdc_load_fp32_tan_coeff (C++ function)
tpu_bdc_logical_shift (C++ function)
tpu_bdc_logical_shift_C (C++ function)
tpu_bdc_max (C++ function)
tpu_bdc_max_C (C++ function)
tpu_bdc_maximum_greater_select (C++ function)
tpu_bdc_min (C++ function)
tpu_bdc_min_C (C++ function)
tpu_bdc_minimum_less_select (C++ function)
tpu_bdc_neg (C++ function)
tpu_bdc_not (C++ function)
tpu_bdc_not_equal (C++ function)
tpu_bdc_not_equal_C (C++ function)
tpu_bdc_npu_bcast (C++ function)
tpu_bdc_or (C++ function)
tpu_bdc_or_C (C++ function)
tpu_bdc_prelu (C++ function)
tpu_bdc_relu (C++ function)
tpu_bdc_set_C (C++ function)
tpu_bdc_sign (C++ function)
tpu_bdc_table_lookup (C++ function)
tpu_bdc_vc_and (C++ function)
tpu_bdc_vc_equal (C++ function)
tpu_bdc_vc_greater (C++ function)
tpu_bdc_vc_greater_equal (C++ function)
tpu_bdc_vc_less (C++ function)
tpu_bdc_vc_less_equal (C++ function)
tpu_bdc_vc_max (C++ function)
tpu_bdc_vc_min (C++ function)
tpu_bdc_vc_not_equal (C++ function)
tpu_bdc_vc_or (C++ function)
tpu_bdc_vc_xor (C++ function)
tpu_bdc_w_gather (C++ function)
tpu_bdc_w_gather_exception (C++ function)
tpu_bdc_w_scatter (C++ function)
tpu_bdc_wc_trans (C++ function)
tpu_bdc_xor (C++ function)
tpu_bdc_xor_C (C++ function)
tpu_channle_num_per_npu (C++ function)
tpu_compact_stride (C++ function)
tpu_continuous_stride (C++ function)
tpu_data_type_bits (C++ function)
tpu_data_type_size (C++ function)
tpu_eu_num (C++ function)
tpu_flush_cache (C++ function)
tpu_gdma_channel_bcast_L2L (C++ function)
tpu_gdma_channel_bcast_S2L (C++ function)
tpu_gdma_compact_L2S (C++ function)
tpu_gdma_compact_nc_trans_L2S (C++ function)
tpu_gdma_compact_nc_trans_S2L (C++ function)
tpu_gdma_compact_S2L (C++ function)
tpu_gdma_compress_normal_max_bytes (C++ function)
tpu_gdma_compress_RACU_L2S (C++ function)
,
[1]
tpu_gdma_compress_RACU_max_meta_bytes (C++ function)
tpu_gdma_compress_RACU_max_racu_bytes (C++ function)
tpu_gdma_cpy_cw_trans_L2L (C++ function)
tpu_gdma_cpy_cw_trans_L2S (C++ function)
tpu_gdma_cpy_cw_trans_S2L (C++ function)
tpu_gdma_cpy_cw_trans_S2S (C++ function)
tpu_gdma_cpy_L2L (C++ function)
tpu_gdma_cpy_L2S (C++ function)
tpu_gdma_cpy_nc_trans_L2L (C++ function)
tpu_gdma_cpy_nc_trans_L2S (C++ function)
tpu_gdma_cpy_nc_trans_S2L (C++ function)
tpu_gdma_cpy_nc_trans_S2S (C++ function)
tpu_gdma_cpy_S2L (C++ function)
tpu_gdma_cpy_S2S (C++ function)
tpu_gdma_decompress_normal_S2L (C++ function)
tpu_gdma_general_cpy_L2S (C++ function)
tpu_gdma_general_cpy_S2L (C++ function)
tpu_gdma_h_gather_L2L (C++ function)
tpu_gdma_h_gather_L2S (C++ function)
tpu_gdma_h_gather_S2L (C++ function)
tpu_gdma_h_gather_S2S (C++ function)
tpu_gdma_h_scatter_L2L (C++ function)
tpu_gdma_h_scatter_L2S (C++ function)
tpu_gdma_h_scatter_S2L (C++ function)
tpu_gdma_h_scatter_S2S (C++ function)
tpu_gdma_mask_select_L2S (C++ function)
,
[1]
tpu_gdma_matrix_L2S (C++ function)
tpu_gdma_matrix_S2L (C++ function)
tpu_gdma_matrix_trans_L2S (C++ function)
tpu_gdma_matrix_trans_S2L (C++ function)
tpu_gdma_nonzero_L2S (C++ function)
,
[1]
tpu_gdma_reverse_L2L (C++ function)
tpu_gdma_reverse_L2S (C++ function)
tpu_gdma_reverse_S2L (C++ function)
tpu_gdma_reverse_S2S (C++ function)
tpu_gdma_set_C_local (C++ function)
tpu_gdma_set_C_system (C++ function)
tpu_gdma_system_cpy (C++ function)
tpu_gdma_vector_L2S (C++ function)
tpu_gdma_vector_S2L (C++ function)
tpu_global_mem_addr (C++ function)
tpu_hau_line_gather (C++ function)
tpu_hau_sort (C++ function)
tpu_hau_sort_natural_index (C++ function)
tpu_hau_sort_specific_index (C++ function)
tpu_initialize (C++ function)
tpu_invalidate_cache (C++ function)
tpu_is_parallel_state (C++ function)
tpu_kernel_launch_async (C++ function)
tpu_kernel_launch_sync (C++ function)
tpu_kernel_sync (C++ function)
tpu_l2_sram_addr (C++ function)
tpu_l2_sram_get_start_addr (C++ function)
tpu_l2_sram_size (C++ function)
tpu_line_aligned_stride (C++ function)
tpu_local_mem_addr (C++ function)
tpu_local_mem_addr_unified (C++ function)
tpu_local_mem_get_start_addr (C++ function)
tpu_local_mem_size_per_npu (C++ function)
tpu_npu_index (C++ function)
tpu_npu_num (C++ function)
tpu_parallel_end (C++ function)
tpu_parallel_start (C++ function)
tpu_poll (C++ function)
type (C++ member)
U
u16 (C++ member)
u32 (C++ member)
u8 (C++ member)
V
var_context_t (C++ union)
var_type_t (C++ enum)
variable_t (C++ class)
VECTOR (C++ enumerator)