@@ -926,14 +926,26 @@ int main(int argc, char ** argv) {
926
926
res.set_content (ss.str (), " text/vtt" );
927
927
} else if (params.response_format == vjson_format) {
928
928
/* try to match openai/whisper's Python format */
929
- std::string results = output_str (ctx, params, pcmf32s);
929
+ std::string results = output_str (ctx, params, pcmf32s);
930
+ // Get language probabilities
931
+ std::vector<float > lang_probs (whisper_lang_max_id () + 1 , 0 .0f );
932
+ const auto detected_lang_id = whisper_lang_auto_detect (ctx, 0 , params.n_threads , lang_probs.data ());
930
933
json jres = json{
931
934
{" task" , params.translate ? " translate" : " transcribe" },
932
935
{" language" , whisper_lang_str_full (whisper_full_lang_id (ctx))},
933
936
{" duration" , float (pcmf32.size ())/WHISPER_SAMPLE_RATE},
934
937
{" text" , results},
935
- {" segments" , json::array ()}
938
+ {" segments" , json::array ()},
939
+ {" detected_language" , whisper_lang_str_full (detected_lang_id)},
940
+ {" detected_language_probability" , lang_probs[detected_lang_id]},
941
+ {" language_probabilities" , json::object ()}
936
942
};
943
+ // Add all language probabilities
944
+ for (int i = 0 ; i <= whisper_lang_max_id (); ++i) {
945
+ if (lang_probs[i] > 0 .001f ) { // Only include non-negligible probabilities
946
+ jres[" language_probabilities" ][whisper_lang_str (i)] = lang_probs[i];
947
+ }
948
+ }
937
949
const int n_segments = whisper_full_n_segments (ctx);
938
950
for (int i = 0 ; i < n_segments; ++i)
939
951
{
0 commit comments