Add pytest for multi-graph and fix minor issues

dimdano · dimdano · commit 7fbf439febd1 · 2025-03-04T15:53:05.000+01:00
diff --git a/docs/ir/multimodelgraph.rst b/docs/ir/multimodelgraph.rst
@@ -68,7 +68,7 @@ This allows modular design flows and easier debugging of large models.
 ``compile`` method
 ==================
 
-Compiles all the individual ``ModelGraph`` subgraphs within the ``MultiModelGraph``.
+Compiles all the individual ``ModelGraph`` subgraphs within the ``MultiModelGraph``. Also, compiles a chained bridge file with all the subgraphs linked together that can be used for the predict function.
 
 .. code-block:: python
 
@@ -97,7 +97,7 @@ The returned ``report`` contains data from each subgraph's build and, if stitchi
 ``predict`` method
 ==================
 
-Performs a forward pass through the chained sub-models using the C-simulation (``sim='csim'``). Data is automatically passed from one subgraph's output to the next subgraph's input. For large stitched designs, you can also leverage RTL simulation (``sim='rtl'``) to perform the forward pass at the register-transfer level. In this case, a Verilog testbench is dynamically generated and executed against the stitched IP design, providing behavioral simulation to accurately verify latency and output at the hardware level.
+Performs a forward pass through the chained bridge file using the C-simulation (``sim='csim'``). Data is automatically passed from one subgraph's output to the next subgraph's input. For large stitched designs, you can also leverage RTL simulation (``sim='rtl'``) to perform the forward pass at the register-transfer level. In this case, a Verilog testbench is dynamically generated and executed against the stitched IP design, providing behavioral simulation to accurately verify latency and output at the hardware level. Note that the input data for the RTL simulation must have a single batch dimension.
 
 .. code-block:: python
 
@@ -126,3 +126,12 @@ Summary
 --------------------------
 
 The ``MultiModelGraph`` class is a tool for modular hardware design. By splitting a large neural network into multiple subgraphs, building each independently, and then stitching them together, you gain flexibility, parallelism, and facilitate hierarchical design, incremental optimization, and integrated system-level simulations.
+
+--------------------------
+Other Notes
+--------------------------
+
+* Branch Splitting Limitation: Splitting in the middle of a branched architecture (e.g., ResNet skip connections or multi-path networks) is currently unsupported. Also, each split subgraph must have a single input and a single output.
+* Handling Multiple NN Inputs & Outputs: The final NN output can support multiple output layers. However, for networks with multiple input layers, proper synchronization is required to drive inputs—especially for stream interfaces. A fork-join mechanism in the Verilog testbench can help manage input synchronization effectively.
+* RTL Simulation Issue: RTL simulation of stitched IPs with io_type='io_parallel' and a split at the flatten layer leads to improper simulation behavior and should be avoided.
+* Array Partitioning for Parallel I/O: For io_parallel interfaces, all IPs must use the 'partition' pragma instead of 'reshape'.
diff --git a/hls4ml/backends/vitis/vitis_backend.py b/hls4ml/backends/vitis/vitis_backend.py
@@ -223,7 +223,7 @@ def build_stitched_design(
             stitched_report = aggregate_graph_reports(graph_reports)
 
         if sim_stitched_design:
-            testbench_output = read_testbench_log(testbench_log_path)
+            testbench_output = read_testbench_log(testbench_log_path, nn_config['outputs'])
             stitched_report['BehavSimResults'] = testbench_output['BehavSimResults']
             stitched_report['StitchedDesignReport']['BestLatency'] = testbench_output['BestLatency']
             stitched_report['StitchedDesignReport']['WorstLatency'] = testbench_output['WorstLatency']
diff --git a/hls4ml/model/graph.py b/hls4ml/model/graph.py
@@ -1,6 +1,7 @@
 import concurrent.futures
 import copy
 import ctypes
+import uuid
 import importlib.util
 import os
 import platform
@@ -1020,6 +1021,7 @@ def __init__(self, graphs):
         self._initialize_config(graphs[0])
         self._bind_modelgraph_methods()
         self._initialize_io_attributes(graphs)
+        self._update_pragmas()
 
     def _initialize_config(self, first_graph):
         self.config = copy.copy(first_graph.config)
@@ -1055,7 +1057,7 @@ def _update_project_config(self, first_graph):
         original_output_dir = first_graph.config.get_output_dir().partition('/graph')[0]
         self.config.config['OutputDir'] = os.path.join(original_output_dir, 'stitched')
         self.config.config['StitchedProjectName'] = 'vivado_stitched_design'
-        self.config.config['Stamp'] = '64616e'
+        self.config.config['Stamp'] = self._make_stamp()
 
     def __getitem__(self, index):
         return self.graphs[index]
@@ -1223,6 +1225,20 @@ def _print_status(self, status):
         status_str = ' | '.join(f'{proj}: {status_icons.get(stat, "?")}' for proj, stat in status.items())
         print(status_str, flush=True)
 
+    def _update_pragmas(self):
+        """
+        Modifies the pragma for all layers in all graphs, replacing 'reshape' with 'partition' where applicable
+        """
+        for g in self.graphs:
+            for layer_name in g.output_vars:
+                if hasattr(g.output_vars[layer_name], 'pragma'):
+                    layer_pragma = g.output_vars[layer_name].pragma
+                    if isinstance(layer_pragma, str) and layer_pragma == 'reshape':
+                        g.output_vars[layer_name].pragma = 'partition'
+                        print(f"Updating pragma in Layer '{layer_name}' from 'reshape' to 'partition'.")
+                else:
+                    print(f"Layer '{layer_name}' does not have a 'pragma' attribute.")
+
     def _assert_consistent_pragmas(self):
         """
         Ensure all graphs have the same pragma in their input and output layers.
@@ -1251,7 +1267,12 @@ def _assert_consistent_pragmas(self):
                 raise ValueError(
                     f"Pragma mismatch in graph {idx}:\n" f"Expected: {ref_pragmas}\n" f"Found: {current_pragmas}"
                 )
-
+            
+    def _make_stamp(self):
+            length = 8
+            stamp = uuid.uuid4()
+            return str(stamp)[-length:]
+    
     def _replace_logos(self):
         spec = importlib.util.find_spec("hls4ml")
         hls4ml_path = os.path.dirname(spec.origin)
diff --git a/hls4ml/utils/simulation_utils.py b/hls4ml/utils/simulation_utils.py
@@ -5,57 +5,6 @@
 import pandas as pd
 from lxml import etree
 
-
-def parse_component_xml(component_xml_path):
-    """
-    Parse the given component.xml file and return structured information
-    about the input and output ports.
-
-    Returns:
-        inputs (list): A list of dicts, each containing 'name', 'direction', and 'width' for input ports.
-        outputs (list): A list of dicts, each containing 'name', 'direction', and 'width' for output ports.
-    """
-    if not os.path.exists(component_xml_path):
-        raise FileNotFoundError(f"component.xml not found at {component_xml_path}")
-
-    # Parse the XML file
-    tree = etree.parse(component_xml_path)
-    root = tree.getroot()
-
-    # Define the namespaces
-    ns = {
-        'spirit': 'http://www.spiritconsortium.org/XMLSchema/SPIRIT/1685-2009',
-        'xilinx': 'http://www.xilinx.com',
-        'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
-    }
-
-    # Extract ports
-    ports = root.findall('.//spirit:model/spirit:ports/spirit:port', namespaces=ns)
-    inputs = []
-    outputs = []
-
-    for port in ports:
-        name = port.find('spirit:name', namespaces=ns).text
-        wire = port.find('spirit:wire', namespaces=ns)
-        if wire is not None:
-            direction = wire.find('spirit:direction', namespaces=ns).text
-            vector = wire.find('spirit:vector', namespaces=ns)
-            if vector is not None:
-                left = vector.find('spirit:left', namespaces=ns).text
-                right = vector.find('spirit:right', namespaces=ns).text
-                width = abs(int(left) - int(right)) + 1
-            else:
-                width = 1
-
-            port_info = {'name': name, 'direction': direction, 'width': width}
-            if direction == 'in':
-                inputs.append(port_info)
-            elif direction == 'out':
-                outputs.append(port_info)
-
-    return inputs, outputs
-
-
 def write_verilog_testbench(nn_config, testbench_output_path):
     """
     Generate a Verilog testbench for a given neural network configuration.
@@ -552,8 +501,7 @@ def prepare_testbench_input(data, fifo_depth, batch_size):
     data_reshaped = data_arr.reshape((fifo_depth, batch_size))
     return data_reshaped
 
-
-def read_testbench_log(testbench_log_path):
+def read_testbench_log(testbench_log_path, outputs):
     """
     Reads the testbench log file and returns a dictionary
     """
@@ -569,8 +517,13 @@ def read_testbench_log(testbench_log_path):
 
         sim_dict = {'BestLatency': int(BestLatency), 'WorstLatency': int(WorstLatency), 'BehavSimResults': []}
 
-        grouped = output_df.groupby('output_name')
-        for name, group in grouped:
+        ordered_output_names = [entry['name'] for entry in outputs]
+        for name in ordered_output_names:
+            group = output_df[output_df['output_name'] == name]
+            if group.empty:
+                print(f"Warning: Expected output '{name}' not found in testbench log.")
+                continue
+
             indices = group['index'].astype(int)
             values = group['value'].astype(float)
             array = np.zeros(max(indices) + 1, dtype=np.float64)
diff --git a/test/pytest/test_multi_graph.py b/test/pytest/test_multi_graph.py
@@ -0,0 +1,94 @@
+from pathlib import Path
+import numpy as np
+import pytest
+import tensorflow as tf
+from tensorflow.keras.layers import Input, Conv2D, Activation, MaxPooling2D, Flatten, Dense
+import hls4ml
+
+test_root_path = Path(__file__).parent
+
+def create_test_model():
+    """
+    This architecture ensures testing of corner cases such as:
+    double layer outputs and variety of layers to serve as spliting points.
+    """
+    inp = Input(shape=(4, 4, 3), name='input_layer')
+    x = Conv2D(4, (3, 3), padding='same', name='conv1')(inp)
+    x = Activation('relu', name='relu1')(x)
+    x = MaxPooling2D((2, 2), name='pool1')(x)
+    x = Flatten(name='flatten')(x)
+    x = Dense(16, activation='relu', name='dense_common')(x)
+    output1 = Dense(5, activation='relu', name='dense1')(x)
+    output2 = Dense(5, activation='relu', name='dense2')(x)
+    model = tf.keras.Model(inputs=inp, outputs=[output1, output2])
+    
+    return model
+
+@pytest.mark.parametrize('io_type', ['io_parallel', 'io_stream'])
+@pytest.mark.parametrize('strategy', ['latency'])
+@pytest.mark.parametrize('granularity', ['model', 'name'])
+@pytest.mark.parametrize('split_layers', [
+    ('pool1', 'dense_common'),
+    ('relu1', 'flatten')
+])
+def test_multimodelgraph_predict(split_layers, io_type, strategy, granularity):
+    """
+    Tests the multi-graph splitting and stitching process.
+    - Verifies that predictions from the monolithic and multi-graph versions match with the CSimulation.
+    - When granularity='name', an additional HLS build and stitched RTL simulation step is performed.
+    - The RTL simulation outputs are compared against the predicted values from CSimulation.
+    """
+    backend = 'vitis'
+    model = create_test_model()
+    model.compile(optimizer='adam', loss='categorical_crossentropy')
+    X_input = np.random.rand(5, 4, 4, 3).astype(np.float32)
+    keras_pred = model.predict(X_input)
+
+    config = hls4ml.utils.config_from_keras_model(model, granularity=granularity, default_precision='ap_fixed<32,16>')
+    config['Model']['Strategy'] = strategy
+
+    output_dir_mono = str(test_root_path / f"hls4mlprj_mono_{granularity}_{'_'.join(split_layers)}_{io_type}_{strategy}")
+    output_dir_multi = str(test_root_path / f"hls4mlprj_multi_{granularity}_{'_'.join(split_layers)}_{io_type}_{strategy}")
+
+    # --- Monolithic HLS conversion (no split) ---
+    hls_model_mono = hls4ml.converters.convert_from_keras_model(
+        model,
+        hls_config=config,
+        output_dir=output_dir_mono,
+        backend=backend,
+        io_type=io_type
+    )
+    hls_model_mono.compile()
+    pred_mono = hls_model_mono.predict(X_input)
+
+    # --- Multi-model conversion with split ---
+    hls_model_multi = hls4ml.converters.convert_from_keras_model(
+        model,
+        hls_config=config,
+        output_dir=output_dir_multi,
+        backend=backend,
+        io_type=io_type,
+        split_layer_names=list(split_layers)
+    )
+    hls_model_multi.compile()
+    pred_multi = hls_model_multi.predict(X_input)
+
+    assert hasattr(hls_model_multi, 'graphs'), "Multi-model graph missing 'graphs' attribute."
+    assert len(hls_model_multi.graphs) == 3, f"Expected 3 subgraphs, got {len(hls_model_multi.graphs)}"
+
+    for mono_out, multi_out in zip(pred_mono, pred_multi):
+        np.testing.assert_allclose(multi_out, mono_out, rtol=0, atol=1e-5)
+    
+    if granularity == 'name':
+        if io_type == 'io_parallel' and split_layers == ('relu1', 'flatten'):
+            pytest.skip("Skipping RTL simulation for io_parallel with split layer at flatten due to improper simulation behavior.")
+
+        # --- Optional: Build the HLS project and run simulation ---
+        hls_model_multi.build(csim=False, cosim=False, vsynth=False, export=True, 
+                        stitch_design=True, sim_stitched_design=True, export_stitched_design=True)
+
+        # test only the first sample, as batch prediction is not supported for stitched RTL simulations
+        inp = np.expand_dims(X_input[0], axis=0)
+        sim_results = hls_model_multi.predict(inp, sim = 'rtl')
+        for sim_out, pred_out in zip(sim_results, list([pred_multi[0][0], pred_multi[1][0]])):
+            np.testing.assert_allclose(sim_out, pred_out, rtol=0, atol=0.3)