Spaces:
Running
Running
whisper : extend information in whisper_print_timings()
Browse files- whisper.cpp +23 -4
whisper.cpp
CHANGED
|
@@ -474,6 +474,12 @@ struct whisper_context {
|
|
| 474 |
int64_t t_decode_us = 0;
|
| 475 |
int64_t t_start_us = 0;
|
| 476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
ggml_type wtype; // weight type (FP32 or FP16)
|
| 478 |
|
| 479 |
whisper_mel mel;
|
|
@@ -1620,6 +1626,7 @@ static bool whisper_encode(
|
|
| 1620 |
ggml_free(ctx0);
|
| 1621 |
|
| 1622 |
wctx.t_encode_us += ggml_time_us() - t_start_us;
|
|
|
|
| 1623 |
|
| 1624 |
return true;
|
| 1625 |
}
|
|
@@ -1993,6 +2000,7 @@ static bool whisper_decode(
|
|
| 1993 |
ggml_free(ctx0);
|
| 1994 |
|
| 1995 |
wctx.t_decode_us += ggml_time_us() - t_start_us;
|
|
|
|
| 1996 |
|
| 1997 |
return true;
|
| 1998 |
}
|
|
@@ -2644,12 +2652,17 @@ whisper_token whisper_token_transcribe(void) {
|
|
| 2644 |
void whisper_print_timings(struct whisper_context * ctx) {
|
| 2645 |
const int64_t t_end_us = ggml_time_us();
|
| 2646 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2647 |
fprintf(stderr, "\n");
|
|
|
|
| 2648 |
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
|
| 2649 |
fprintf(stderr, "%s: mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
|
| 2650 |
-
fprintf(stderr, "%s: sample time = %8.2f ms\n", __func__, ctx->t_sample_us/
|
| 2651 |
-
fprintf(stderr, "%s: encode time = %8.2f ms / %.2f ms per
|
| 2652 |
-
fprintf(stderr, "%s: decode time = %8.2f ms / %.2f ms per
|
| 2653 |
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
|
| 2654 |
}
|
| 2655 |
|
|
@@ -3004,7 +3017,7 @@ static void whisper_process_logits(
|
|
| 3004 |
}
|
| 3005 |
|
| 3006 |
static whisper_token_data whisper_sample_token(
|
| 3007 |
-
|
| 3008 |
const whisper_decoder & decoder,
|
| 3009 |
bool best) {
|
| 3010 |
whisper_token_data result = {
|
|
@@ -3059,6 +3072,8 @@ static whisper_token_data whisper_sample_token(
|
|
| 3059 |
result.pt = result.p;
|
| 3060 |
}
|
| 3061 |
|
|
|
|
|
|
|
| 3062 |
return result;
|
| 3063 |
}
|
| 3064 |
|
|
@@ -3127,6 +3142,8 @@ static std::vector<whisper_token_data> whisper_sample_token_topk(
|
|
| 3127 |
}
|
| 3128 |
}
|
| 3129 |
|
|
|
|
|
|
|
| 3130 |
return result;
|
| 3131 |
}
|
| 3132 |
|
|
@@ -3726,6 +3743,7 @@ int whisper_full(
|
|
| 3726 |
__func__, j, decoder.sequence.entropy, params.entropy_thold);
|
| 3727 |
|
| 3728 |
decoder.failed = true;
|
|
|
|
| 3729 |
|
| 3730 |
continue;
|
| 3731 |
}
|
|
@@ -3747,6 +3765,7 @@ int whisper_full(
|
|
| 3747 |
|
| 3748 |
if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
|
| 3749 |
success = false;
|
|
|
|
| 3750 |
}
|
| 3751 |
|
| 3752 |
if (success) {
|
|
|
|
| 474 |
int64_t t_decode_us = 0;
|
| 475 |
int64_t t_start_us = 0;
|
| 476 |
|
| 477 |
+
int32_t n_sample = 0; // number of tokens sampled
|
| 478 |
+
int32_t n_encode = 0; // number of encoder calls
|
| 479 |
+
int32_t n_decode = 0; // number of decoder calls
|
| 480 |
+
int32_t n_fail_p = 0; // number of logprob threshold failures
|
| 481 |
+
int32_t n_fail_h = 0; // number of entropy threshold failures
|
| 482 |
+
|
| 483 |
ggml_type wtype; // weight type (FP32 or FP16)
|
| 484 |
|
| 485 |
whisper_mel mel;
|
|
|
|
| 1626 |
ggml_free(ctx0);
|
| 1627 |
|
| 1628 |
wctx.t_encode_us += ggml_time_us() - t_start_us;
|
| 1629 |
+
wctx.n_encode++;
|
| 1630 |
|
| 1631 |
return true;
|
| 1632 |
}
|
|
|
|
| 2000 |
ggml_free(ctx0);
|
| 2001 |
|
| 2002 |
wctx.t_decode_us += ggml_time_us() - t_start_us;
|
| 2003 |
+
wctx.n_decode++;
|
| 2004 |
|
| 2005 |
return true;
|
| 2006 |
}
|
|
|
|
| 2652 |
void whisper_print_timings(struct whisper_context * ctx) {
|
| 2653 |
const int64_t t_end_us = ggml_time_us();
|
| 2654 |
|
| 2655 |
+
const int32_t n_sample = std::max(1, ctx->n_sample);
|
| 2656 |
+
const int32_t n_encode = std::max(1, ctx->n_encode);
|
| 2657 |
+
const int32_t n_decode = std::max(1, ctx->n_decode);
|
| 2658 |
+
|
| 2659 |
fprintf(stderr, "\n");
|
| 2660 |
+
fprintf(stderr, "%s: fallbacks = %3d p / %3d h\n", __func__, ctx->n_fail_p, ctx->n_fail_h);
|
| 2661 |
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
|
| 2662 |
fprintf(stderr, "%s: mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
|
| 2663 |
+
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_sample_us, n_sample, 1e-3f*ctx->t_sample_us/n_sample);
|
| 2664 |
+
fprintf(stderr, "%s: encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_encode_us, n_encode, 1e-3f*ctx->t_encode_us/n_encode);
|
| 2665 |
+
fprintf(stderr, "%s: decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_decode_us, n_decode, 1e-3f*ctx->t_decode_us/n_decode);
|
| 2666 |
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
|
| 2667 |
}
|
| 2668 |
|
|
|
|
| 3017 |
}
|
| 3018 |
|
| 3019 |
static whisper_token_data whisper_sample_token(
|
| 3020 |
+
whisper_context & ctx,
|
| 3021 |
const whisper_decoder & decoder,
|
| 3022 |
bool best) {
|
| 3023 |
whisper_token_data result = {
|
|
|
|
| 3072 |
result.pt = result.p;
|
| 3073 |
}
|
| 3074 |
|
| 3075 |
+
ctx.n_sample++;
|
| 3076 |
+
|
| 3077 |
return result;
|
| 3078 |
}
|
| 3079 |
|
|
|
|
| 3142 |
}
|
| 3143 |
}
|
| 3144 |
|
| 3145 |
+
ctx.n_sample++;
|
| 3146 |
+
|
| 3147 |
return result;
|
| 3148 |
}
|
| 3149 |
|
|
|
|
| 3743 |
__func__, j, decoder.sequence.entropy, params.entropy_thold);
|
| 3744 |
|
| 3745 |
decoder.failed = true;
|
| 3746 |
+
ctx->n_fail_h++;
|
| 3747 |
|
| 3748 |
continue;
|
| 3749 |
}
|
|
|
|
| 3765 |
|
| 3766 |
if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
|
| 3767 |
success = false;
|
| 3768 |
+
ctx->n_fail_p++;
|
| 3769 |
}
|
| 3770 |
|
| 3771 |
if (success) {
|