[go: up one dir, main page]

Skip to content

Commit

Permalink
delete some unused kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
chenqy4933 committed Oct 10, 2023
1 parent 673ce0e commit 405d866
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 432 deletions.
41 changes: 0 additions & 41 deletions src/kern/optimized/arm/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,47 +204,6 @@ TaskSet llm_matmul_compute_int4_float(
return TaskSet{{task1, M}, {task2, N}};
}

TaskSet llm_matmul_compute_int4_float_packed(
float* dst, const void* src0, const float* bias, const float* src1, uint32_t M,
uint32_t N, uint32_t K, void* workspace, uint32_t size) {
//! src0 is quantized weights, weights store in 32 data as block and a block
//! share the same scale, src1 is featureMap. src0 layout is {N,
//! K}, src1 layout is {M, K}, the dst is {M, N}
INFER_ASSERT(sizeof(float) * K <= size, "workspace is not enough.");
uint32_t weight_q40_stride =
K * dtype_in_byte(DType::Int4) / dtype_block_size(DType::Int4);
uint32_t weight_q80_stride =
K * dtype_in_byte(DType::Int8) / dtype_block_size(DType::Int8);
//! dequantize input, and store in workspace
//! becuase the input is small than the weights, quantized the input will
//! reduce the memory traffic
auto task1 = [=](const TaskId& id) {
for (uint32_t m = id.start; m < id.end; m++) {
BlockQ80* q_src1 =
(BlockQ80*)(static_cast<uint8_t*>(workspace) + m * weight_q80_stride);
quantize_row_q8_0(src1 + m * K, q_src1, K);
}
};
int8_t* q_src = static_cast<int8_t*>(workspace);
auto task2 = [=](const TaskId& id) {
for (uint32_t n = id.start; n < id.end; n++) {
const void* q_weight =
static_cast<const uint8_t*>(src0) + n * 8 * weight_q40_stride;
for (uint32_t m = 0; m < M; m++) {
int8_t* src = q_src + m * weight_q80_stride;
float* dst_ptr = dst + m * N + n * 8;
const float* bias_ptr = bias ? bias + n * 8 : nullptr;
#if defined(__ARM_FEATURE_DOTPROD)
vec_vec_dot_q40_with_q80_packed(K, q_weight, src, dst_ptr, bias_ptr);
// vec_vec_dot_q40_with_q80_packed_asm(K, q_weight, src, dst_ptr,
// bias_ptr);
#endif
}
}
};
return TaskSet{{task1, M}, {task2, N / 8}};
}

size_t llm_matmul_get_workspace_float(uint32_t, uint32_t M, uint32_t N, uint32_t K) {
return sizeof(float) * K * M;
}
Expand Down
1 change: 0 additions & 1 deletion src/kern/optimized/arm/kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ PartialImplementKernel(
PartialImplementKernel(RmsNormFloat, llm_rms_norm_compute_float);
PartialImplementKernel(EmbeddingGetInt4Float, llm_embedding_get_int4_float);
PartialImplementKernel(MatmulInt4Float, llm_matmul_compute_int4_float);
PartialImplementKernel(MatmulInt4FloatPacked, llm_matmul_compute_int4_float_packed);
PartialImplementKernel(
MatmulWithHeadStrideFloat, llm_matmul_compute_with_head_stride_float);
PartialImplementKernel(HeadBatchedMatmulFloat, llm_head_batched_matmul_compute_float);
Expand Down
Loading

0 comments on commit 405d866

Please sign in to comment.