ggerganov HF Staff commited on
Commit
7e75e6d
·
unverified ·
1 Parent(s): 1391b88

whisper : extend information in whisper_print_timings()

Browse files
Files changed (1) hide show
  1. whisper.cpp +23 -4
whisper.cpp CHANGED
@@ -474,6 +474,12 @@ struct whisper_context {
474
  int64_t t_decode_us = 0;
475
  int64_t t_start_us = 0;
476
 
 
 
 
 
 
 
477
  ggml_type wtype; // weight type (FP32 or FP16)
478
 
479
  whisper_mel mel;
@@ -1620,6 +1626,7 @@ static bool whisper_encode(
1620
  ggml_free(ctx0);
1621
 
1622
  wctx.t_encode_us += ggml_time_us() - t_start_us;
 
1623
 
1624
  return true;
1625
  }
@@ -1993,6 +2000,7 @@ static bool whisper_decode(
1993
  ggml_free(ctx0);
1994
 
1995
  wctx.t_decode_us += ggml_time_us() - t_start_us;
 
1996
 
1997
  return true;
1998
  }
@@ -2644,12 +2652,17 @@ whisper_token whisper_token_transcribe(void) {
2644
  void whisper_print_timings(struct whisper_context * ctx) {
2645
  const int64_t t_end_us = ggml_time_us();
2646
 
 
 
 
 
2647
  fprintf(stderr, "\n");
 
2648
  fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
2649
  fprintf(stderr, "%s: mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
2650
- fprintf(stderr, "%s: sample time = %8.2f ms\n", __func__, ctx->t_sample_us/1000.0f);
2651
- fprintf(stderr, "%s: encode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_encode_us/1000.0f, ctx->t_encode_us/1000.0f/ctx->model.hparams.n_audio_layer);
2652
- fprintf(stderr, "%s: decode time = %8.2f ms / %.2f ms per layer\n", __func__, ctx->t_decode_us/1000.0f, ctx->t_decode_us/1000.0f/ctx->model.hparams.n_text_layer);
2653
  fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
2654
  }
2655
 
@@ -3004,7 +3017,7 @@ static void whisper_process_logits(
3004
  }
3005
 
3006
  static whisper_token_data whisper_sample_token(
3007
- const whisper_context & ctx,
3008
  const whisper_decoder & decoder,
3009
  bool best) {
3010
  whisper_token_data result = {
@@ -3059,6 +3072,8 @@ static whisper_token_data whisper_sample_token(
3059
  result.pt = result.p;
3060
  }
3061
 
 
 
3062
  return result;
3063
  }
3064
 
@@ -3127,6 +3142,8 @@ static std::vector<whisper_token_data> whisper_sample_token_topk(
3127
  }
3128
  }
3129
 
 
 
3130
  return result;
3131
  }
3132
 
@@ -3726,6 +3743,7 @@ int whisper_full(
3726
  __func__, j, decoder.sequence.entropy, params.entropy_thold);
3727
 
3728
  decoder.failed = true;
 
3729
 
3730
  continue;
3731
  }
@@ -3747,6 +3765,7 @@ int whisper_full(
3747
 
3748
  if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
3749
  success = false;
 
3750
  }
3751
 
3752
  if (success) {
 
474
  int64_t t_decode_us = 0;
475
  int64_t t_start_us = 0;
476
 
477
+ int32_t n_sample = 0; // number of tokens sampled
478
+ int32_t n_encode = 0; // number of encoder calls
479
+ int32_t n_decode = 0; // number of decoder calls
480
+ int32_t n_fail_p = 0; // number of logprob threshold failures
481
+ int32_t n_fail_h = 0; // number of entropy threshold failures
482
+
483
  ggml_type wtype; // weight type (FP32 or FP16)
484
 
485
  whisper_mel mel;
 
1626
  ggml_free(ctx0);
1627
 
1628
  wctx.t_encode_us += ggml_time_us() - t_start_us;
1629
+ wctx.n_encode++;
1630
 
1631
  return true;
1632
  }
 
2000
  ggml_free(ctx0);
2001
 
2002
  wctx.t_decode_us += ggml_time_us() - t_start_us;
2003
+ wctx.n_decode++;
2004
 
2005
  return true;
2006
  }
 
2652
  void whisper_print_timings(struct whisper_context * ctx) {
2653
  const int64_t t_end_us = ggml_time_us();
2654
 
2655
+ const int32_t n_sample = std::max(1, ctx->n_sample);
2656
+ const int32_t n_encode = std::max(1, ctx->n_encode);
2657
+ const int32_t n_decode = std::max(1, ctx->n_decode);
2658
+
2659
  fprintf(stderr, "\n");
2660
+ fprintf(stderr, "%s: fallbacks = %3d p / %3d h\n", __func__, ctx->n_fail_p, ctx->n_fail_h);
2661
  fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us/1000.0f);
2662
  fprintf(stderr, "%s: mel time = %8.2f ms\n", __func__, ctx->t_mel_us/1000.0f);
2663
+ fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_sample_us, n_sample, 1e-3f*ctx->t_sample_us/n_sample);
2664
+ fprintf(stderr, "%s: encode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_encode_us, n_encode, 1e-3f*ctx->t_encode_us/n_encode);
2665
+ fprintf(stderr, "%s: decode time = %8.2f ms / %5d runs (%8.2f ms per run)\n", __func__, 1e-3f*ctx->t_decode_us, n_decode, 1e-3f*ctx->t_decode_us/n_decode);
2666
  fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0f);
2667
  }
2668
 
 
3017
  }
3018
 
3019
  static whisper_token_data whisper_sample_token(
3020
+ whisper_context & ctx,
3021
  const whisper_decoder & decoder,
3022
  bool best) {
3023
  whisper_token_data result = {
 
3072
  result.pt = result.p;
3073
  }
3074
 
3075
+ ctx.n_sample++;
3076
+
3077
  return result;
3078
  }
3079
 
 
3142
  }
3143
  }
3144
 
3145
+ ctx.n_sample++;
3146
+
3147
  return result;
3148
  }
3149
 
 
3743
  __func__, j, decoder.sequence.entropy, params.entropy_thold);
3744
 
3745
  decoder.failed = true;
3746
+ ctx->n_fail_h++;
3747
 
3748
  continue;
3749
  }
 
3765
 
3766
  if (decoder.failed || decoder.sequence.avg_logprobs < params.logprob_thold) {
3767
  success = false;
3768
+ ctx->n_fail_p++;
3769
  }
3770
 
3771
  if (success) {