@@ -1905,10 +1905,10 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can
19051905        return ;
19061906    }
19071907
1908-     const  int64_t  t_start_sample_us = ggml_time_us ();
1909- 
19101908    llama_sample_softmax (ctx, candidates);
19111909
1910+     const  int64_t  t_start_sample_us = ggml_time_us ();
1911+ 
19121912    //  Compute the cumulative probabilities
19131913    float  cum_sum = 0 .0f ;
19141914    size_t  last_idx = candidates->size ;
@@ -1937,9 +1937,8 @@ void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array *
19371937        return ;
19381938    }
19391939
1940-     const  int64_t  t_start_sample_us = ggml_time_us ();
1941- 
19421940    llama_sample_softmax (nullptr , candidates);
1941+     const  int64_t  t_start_sample_us = ggml_time_us ();
19431942
19441943    //  Compute the first and second derivatives
19451944    std::vector<float > first_derivatives (candidates->size  - 1 );
@@ -1991,11 +1990,11 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
19911990        return ;
19921991    }
19931992
1994-     const  int64_t  t_start_sample_us = ggml_time_us ();
1995- 
19961993    //  Compute the softmax of logits and calculate entropy
19971994    llama_sample_softmax (nullptr , candidates);
19981995
1996+     const  int64_t  t_start_sample_us = ggml_time_us ();
1997+ 
19991998    float  entropy = 0 .0f ;
20001999    for  (size_t  i = 0 ; i < candidates->size ; ++i) {
20012000        entropy += -candidates->data [i].p  * logf (candidates->data [i].p );
@@ -2164,13 +2163,11 @@ llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_
21642163
21652164    if  (ctx) {
21662165        ctx->t_sample_us  += ggml_time_us () - t_start_sample_us;
2167-         ctx->n_sample ++;
21682166    }
21692167    return  X;
21702168}
21712169
21722170llama_token llama_sample_token_mirostat_v2 (struct  llama_context  * ctx, llama_token_data_array * candidates, float  tau, float  eta, float  * mu) {
2173-     assert (ctx);
21742171    int64_t  t_start_sample_us;
21752172    t_start_sample_us = ggml_time_us ();
21762173
@@ -2185,13 +2182,14 @@ llama_token llama_sample_token_mirostat_v2(struct llama_context * ctx, llama_tok
21852182        candidates->size  = 1 ;
21862183    }
21872184
2185+     if  (ctx) {
2186+         ctx->t_sample_us  += ggml_time_us () - t_start_sample_us;
2187+     }
2188+ 
21882189    //  Normalize the probabilities of the remaining words
21892190    llama_sample_softmax (ctx, candidates);
21902191
21912192    //  Sample the next word X from the remaining words
2192-     if  (ctx) {
2193-         ctx->t_sample_us  += ggml_time_us () - t_start_sample_us;
2194-     }
21952193    llama_token X = llama_sample_token (ctx, candidates);
21962194    t_start_sample_us = ggml_time_us ();
21972195
0 commit comments