From 9816f62cd9113715f9d97396ae049fb17096f5e1 Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Fri, 18 Apr 2025 21:20:11 +0200
Subject: [PATCH 1/2] coreml : skip model load in convert-whisper-to-coreml.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds the `skip_model_load` argument to the `convert_encoder`
and `convert_decoder` functions in the `convert-whisper-to-coreml.py`
file.

The motivation for this is that this is only needed if one intends to
perform inference on the model after conversion. In this case it also
seem to avoid an issue with larger models where the following error is
throws:
```console
Running MIL backend_neuralnetwork pipeline: 100%|█████████| 9/9 [00:00<00:00, 35.44 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|███████████| 5641/5641 [03:31<00:00, 26.65 ops/s]
Traceback (most recent call last):
  File "/Users/danbev/work/ai/whisper-work/models/convert-whisper-to-coreml.py", line 322, in <module>
    encoder = convert_encoder(hparams, encoder, quantize=args.quantize)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/danbev/work/ai/whisper-work/models/convert-whisper-to-coreml.py", line 255, in convert_encoder
    model = ct.convert(
            ^^^^^^^^^^^
  File "/Users/danbev/work/ai/whisper-work/venv/lib/python3.11/site-packages/coremltools/converters/_converters_entry.py", line 635, in convert
    mlmodel = mil_convert(
              ^^^^^^^^^^^^
  File "/Users/danbev/work/ai/whisper-work/venv/lib/python3.11/site-packages/coremltools/converters/mil/converter.py", line 186, in mil_convert
    return _mil_convert(
           ^^^^^^^^^^^^^
  File "/Users/danbev/work/ai/whisper-work/venv/lib/python3.11/site-packages/coremltools/converters/mil/converter.py", line 245, in _mil_convert
    return modelClass(
           ^^^^^^^^^^^
  File "/Users/danbev/work/ai/whisper-work/venv/lib/python3.11/site-packages/coremltools/models/model.py", line 489, in __init__
    self.__proxy__, self._spec, self._framework_error = self._get_proxy_and_spec(
                                                        ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/danbev/work/ai/whisper-work/venv/lib/python3.11/site-packages/coremltools/models/model.py", line 550, in _get_proxy_and_spec
    _MLModelProxy(
ValueError: basic_string
```

Refs: https://github.com/ggml-org/whisper.cpp/issues/3012
---
 models/convert-whisper-to-coreml.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/models/convert-whisper-to-coreml.py b/models/convert-whisper-to-coreml.py
index 3876a2874c4..4d37b428094 100644
--- a/models/convert-whisper-to-coreml.py
+++ b/models/convert-whisper-to-coreml.py
@@ -257,7 +257,8 @@ def convert_encoder(hparams, model, quantize=False):
         convert_to="neuralnetwork",
         inputs=[ct.TensorType(name="logmel_data", shape=input_shape)],
         outputs=[ct.TensorType(name="output")],
-        compute_units=ct.ComputeUnit.ALL
+        compute_units=ct.ComputeUnit.ALL,
+        skip_model_load=True,
     )
 
     if quantize:
@@ -282,7 +283,8 @@ def convert_decoder(hparams, model, quantize=False):
         inputs=[
             ct.TensorType(name="token_data", shape=tokens_shape, dtype=int),
             ct.TensorType(name="audio_data", shape=audio_shape)
-        ]
+        ],
+        skip_model_load=True,
     )
 
     if quantize:

From 350f1d9d4552b6c74d6b5d7381ff9483a705789b Mon Sep 17 00:00:00 2001
From: Daniel Bevenius <daniel.bevenius@gmail.com>
Date: Sun, 20 Apr 2025 15:47:32 +0200
Subject: [PATCH 2/2] coreml : set convert_to="mlprogram" in convert

This commit updates the conversion process for Whisper models to use the
"mlprogram" format instead of "neuralnetwork".

The motivation for this change is that when using the "neuralnetwork"
format the underlying model produced is based on protobuf and my
understanding is that there are limitations to this format, such as
sizes of strings and the complexity of the model.

Currently when trying to convert larger models such as large-v3 the
conversion fails but succeeds for smaller models.

The "mlprogram" format is a more recent addition to CoreML and is
designed to be more flexible and powerful, allowing for more complex
models and larger data types. This seems to work for larger and smaller
models alike and unless I'm there are considerations that I'm not aware
of I think this is what we should be using moving forward.
---
 models/convert-whisper-to-coreml.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/models/convert-whisper-to-coreml.py b/models/convert-whisper-to-coreml.py
index 4d37b428094..66827b6d420 100644
--- a/models/convert-whisper-to-coreml.py
+++ b/models/convert-whisper-to-coreml.py
@@ -254,11 +254,10 @@ def convert_encoder(hparams, model, quantize=False):
 
     model = ct.convert(
         traced_model,
-        convert_to="neuralnetwork",
+        convert_to="mlprogram",
         inputs=[ct.TensorType(name="logmel_data", shape=input_shape)],
         outputs=[ct.TensorType(name="output")],
         compute_units=ct.ComputeUnit.ALL,
-        skip_model_load=True,
     )
 
     if quantize:
@@ -279,12 +278,11 @@ def convert_decoder(hparams, model, quantize=False):
 
     model = ct.convert(
         traced_model,
-        convert_to="neuralnetwork",
+        convert_to="mlprogram",
         inputs=[
             ct.TensorType(name="token_data", shape=tokens_shape, dtype=int),
             ct.TensorType(name="audio_data", shape=audio_shape)
         ],
-        skip_model_load=True,
     )
 
     if quantize: