From c6e21944045ce8526bf2aee56c98905e77877272 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Fri, 18 Apr 2025 22:11:08 -0500
Subject: [PATCH 1/7] expression profiling

---
 activitysim/abm/models/summarize.py      |  17 ++-
 activitysim/cli/run.py                   |   7 +-
 activitysim/core/assign.py               |  49 +++---
 activitysim/core/configuration/base.py   |   9 ++
 activitysim/core/configuration/top.py    |  20 +++
 activitysim/core/expressions.py          |   1 +
 activitysim/core/interaction_simulate.py |  28 +++-
 activitysim/core/simulate.py             |  46 ++++--
 activitysim/core/timing.py               | 185 +++++++++++++++++++++++
 9 files changed, 324 insertions(+), 38 deletions(-)
 create mode 100644 activitysim/core/timing.py

diff --git a/activitysim/abm/models/summarize.py b/activitysim/abm/models/summarize.py
index e842393fda..c877c636cd 100644
--- a/activitysim/abm/models/summarize.py
+++ b/activitysim/abm/models/summarize.py
@@ -4,11 +4,12 @@
 
 import logging
 import os
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
 
-from activitysim.core import expressions, workflow
+from activitysim.core import expressions, timing, workflow
 from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable
 from activitysim.core.los import Network_LOS
 
@@ -353,6 +354,12 @@ def summarize(
         }
     )
 
+    if state.settings.expression_profile:
+        perf_log_file = Path(trace_label + ".log")
+    else:
+        perf_log_file = None
+    performance_timer = timing.EvalTiming(perf_log_file)
+
     for i, row in spec.iterrows():
         out_file = row["Output"]
         expr = row["Expression"]
@@ -361,15 +368,19 @@ def summarize(
         if out_file.startswith("_"):
             logger.debug(f"Temp Variable: {expr} -> {out_file}")
 
-            locals_d[out_file] = eval(expr, globals(), locals_d)
+            with performance_timer.time_expression(expr):
+                locals_d[out_file] = eval(expr, globals(), locals_d)
             continue
 
         logger.debug(f"Summary: {expr} -> {out_file}.csv")
 
-        resultset = eval(expr, globals(), locals_d)
+        with performance_timer.time_expression(expr):
+            resultset = eval(expr, globals(), locals_d)
         resultset.to_csv(
             state.get_output_file_path(
                 os.path.join(output_location, f"{out_file}.csv")
             ),
             index=False,
         )
+
+    performance_timer.write_log(state)
diff --git a/activitysim/cli/run.py b/activitysim/cli/run.py
index 03258be4b5..f910702155 100644
--- a/activitysim/cli/run.py
+++ b/activitysim/cli/run.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 
-from activitysim.core import chunk, config, mem, tracing, workflow
+from activitysim.core import chunk, config, mem, timing, tracing, workflow
 from activitysim.core.configuration import FileSystem, Settings
 
 logger = logging.getLogger(__name__)
@@ -423,6 +423,11 @@ def run(args):
     if memory_sidecar_process:
         memory_sidecar_process.stop()
 
+    if state.settings.expression_profile:
+        # generate a summary of slower expression evaluation times
+        # across all models and write to a file
+        timing.AnalyzeEvalTiming(state).to_html()
+
     return 0
 
 
diff --git a/activitysim/core/assign.py b/activitysim/core/assign.py
index 4054b2aec9..8c5513416c 100644
--- a/activitysim/core/assign.py
+++ b/activitysim/core/assign.py
@@ -5,11 +5,12 @@
 import logging
 from builtins import object, zip
 from collections import OrderedDict
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
 
-from activitysim.core import chunk, util, workflow
+from activitysim.core import chunk, timing, util, workflow
 
 logger = logging.getLogger(__name__)
 
@@ -246,6 +247,12 @@ def to_series(x):
 
     assert assignment_expressions.shape[0] > 0
 
+    if state.settings.expression_profile:
+        perf_log_file = Path(trace_label + ".log")
+    else:
+        perf_log_file = None
+    performance_timer = timing.EvalTiming(perf_log_file)
+
     trace_assigned_locals = trace_results = None
     if trace_rows is not None:
         # convert to numpy array so we can slice ndarrays as well as series
@@ -282,24 +289,25 @@ def to_series(x):
             n_randoms += 1
             assignment_expressions.loc[expression_idx, "expression"] = expression
     if n_randoms:
-        try:
-            random_draws = state.get_rn_generator().normal_for_df(
-                df, broadcast=True, size=n_randoms
-            )
-        except RuntimeError:
-            pass
-        else:
-            _locals_dict["random_draws"] = random_draws
+        with performance_timer.time_expression("<RANDOM DRAWS>"):
+            try:
+                random_draws = state.get_rn_generator().normal_for_df(
+                    df, broadcast=True, size=n_randoms
+                )
+            except RuntimeError:
+                pass
+            else:
+                _locals_dict["random_draws"] = random_draws
 
-            def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
-                if scale:
-                    x = 1 + ((sigma * sigma) / (mu * mu))
-                    mu = np.log(mu / (np.sqrt(x)))
-                    sigma = np.sqrt(np.log(x))
-                assert broadcast
-                return np.exp(random_draws * sigma + mu)
+                def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
+                    if scale:
+                        x = 1 + ((sigma * sigma) / (mu * mu))
+                        mu = np.log(mu / (np.sqrt(x)))
+                        sigma = np.sqrt(np.log(x))
+                    assert broadcast
+                    return np.exp(random_draws * sigma + mu)
 
-            _locals_dict["rng_lognormal"] = rng_lognormal
+                _locals_dict["rng_lognormal"] = rng_lognormal
 
     sharrow_enabled = state.settings.sharrow
 
@@ -327,7 +335,8 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
 
         if is_temp_singular(target) or is_throwaway(target):
             try:
-                x = eval(expression, globals(), _locals_dict)
+                with performance_timer.time_expression(expression):
+                    x = eval(expression, globals(), _locals_dict)
             except Exception as err:
                 logger.error(
                     "assign_variables error: %s: %s", type(err).__name__, str(err)
@@ -355,7 +364,8 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
 
             # FIXME should whitelist globals for security?
             globals_dict = {}
-            expr_values = to_series(eval(expression, globals_dict, _locals_dict))
+            with performance_timer.time_expression(expression):
+                expr_values = to_series(eval(expression, globals_dict, _locals_dict))
 
             if sharrow_enabled:
                 if isinstance(expr_values.dtype, pd.api.types.CategoricalDtype):
@@ -430,4 +440,5 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False):
         inplace=True,
     )
 
+    performance_timer.write_log(state)
     return variables, trace_results, trace_assigned_locals
diff --git a/activitysim/core/configuration/base.py b/activitysim/core/configuration/base.py
index ec27fee994..9ce7f38555 100644
--- a/activitysim/core/configuration/base.py
+++ b/activitysim/core/configuration/base.py
@@ -234,6 +234,15 @@ def pandas_option_context(self):
         else:
             yield
 
+    performance_log: Path | None = None
+    """Log runtime performance to this file.
+
+    The runtime performance log shows the time taken to evaluate each
+    expression in the specification files.  It is useful for debugging
+    performance issues with complex expressions.
+
+    If set to None (the default), no performance logging will be done."""
+
     def subcomponent_settings(self, subcomponent: str) -> ComputeSettings:
         """Get the sharrow settings for a particular subcomponent."""
         return ComputeSettings(
diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
index 024f878a46..fa5978a6fc 100644
--- a/activitysim/core/configuration/top.py
+++ b/activitysim/core/configuration/top.py
@@ -537,6 +537,26 @@ def _check_store_skims_in_shm(self):
     own that pollutes the collected data.
     """
 
+    expression_profile: bool | None = None
+    """
+    Track the runtime of each individual expression in each spec file.
+
+    .. versionadded:: 1.4
+
+    The default value of this setting is `None`, which sets no global control
+    of expression profiling, and allows this feature to be turned on or off
+    for individual components.  If set to `True`, all components will have
+    expression profiling enabled, and the outputs will be written to files named
+    based on the trace label unless explicitly set in the `compute_settings` for
+    individual components.  If set to `False`, all components will have expression
+    profiling disabled.
+
+    This is generally a developer-only feature and not needed for regular usage.
+    It will add some overhead to the model run, which is only valuable if you
+    expect the review the expression runtimes with an eye towards improving them.
+    Production model users should typically have this set explicitly to `False`.
+    """
+
     benchmarking: bool = False
     """
     Flag this model run as a benchmarking run.
diff --git a/activitysim/core/expressions.py b/activitysim/core/expressions.py
index 413636d3f4..9d21d39139 100644
--- a/activitysim/core/expressions.py
+++ b/activitysim/core/expressions.py
@@ -139,6 +139,7 @@ def compute_columns(
         df,
         _locals_dict,
         trace_rows=state.tracing.trace_targets(df),
+        trace_label=trace_label,
     )
 
     if trace_results is not None:
diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py
index 1089de3edc..6d9f2a86d6 100644
--- a/activitysim/core/interaction_simulate.py
+++ b/activitysim/core/interaction_simulate.py
@@ -7,12 +7,13 @@
 from builtins import zip
 from collections import OrderedDict
 from datetime import timedelta
+from pathlib import Path
 from typing import Mapping
 
 import numpy as np
 import pandas as pd
 
-from activitysim.core import chunk, logit, simulate, tracing, util, workflow
+from activitysim.core import chunk, logit, simulate, timing, tracing, util, workflow
 from activitysim.core.configuration.base import ComputeSettings
 
 logger = logging.getLogger(__name__)
@@ -262,6 +263,18 @@ def to_series(x):
                 exprs = spec.index
                 labels = spec.index
 
+            # init a performance timer if needed
+            if (
+                state.settings.expression_profile
+                and compute_settings.performance_log is None
+            ):
+                perf_log_file = Path(trace_label + ".log")
+            elif state.settings.expression_profile is False:
+                perf_log_file = None
+            else:
+                perf_log_file = compute_settings.performance_log
+            performance_timer = timing.EvalTiming(perf_log_file)
+
             with compute_settings.pandas_option_context():
                 for expr, label, coefficient in zip(exprs, labels, spec.iloc[:, 0]):
                     try:
@@ -269,7 +282,8 @@ def to_series(x):
                         if expr.startswith("_"):
                             target = expr[: expr.index("@")]
                             rhs = expr[expr.index("@") + 1 :]
-                            v = to_series(eval(rhs, globals(), locals_d))
+                            with performance_timer.time_expression(expr):
+                                v = to_series(eval(rhs, globals(), locals_d))
 
                             # update locals to allows us to ref previously assigned targets
                             locals_d[target] = v
@@ -284,10 +298,11 @@ def to_series(x):
                             # they have a non-zero dummy coefficient to avoid being removed from spec as NOPs
                             continue
 
-                        if expr.startswith("@"):
-                            v = to_series(eval(expr[1:], globals(), locals_d))
-                        else:
-                            v = df.eval(expr, resolvers=[locals_d])
+                        with performance_timer.time_expression(expr):
+                            if expr.startswith("@"):
+                                v = to_series(eval(expr[1:], globals(), locals_d))
+                            else:
+                                v = df.eval(expr, resolvers=[locals_d])
 
                         if check_for_variability and v.std() == 0:
                             logger.info(
@@ -402,6 +417,7 @@ def to_series(x):
                     trace_label, "eval.trace_eval_results", trace_eval_results
                 )
 
+            performance_timer.write_log(state)
             chunk_sizer.log_df(trace_label, "v", None)
             chunk_sizer.log_df(
                 trace_label, "eval.utilities", None
diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py
index fcffcf6066..7c8b09e937 100644
--- a/activitysim/core/simulate.py
+++ b/activitysim/core/simulate.py
@@ -21,6 +21,7 @@
     configuration,
     logit,
     pathbuilder,
+    timing,
     tracing,
     util,
     workflow,
@@ -627,6 +628,17 @@ def eval_utilities(
     if utilities is None or estimator or sharrow_enabled == "test":
         trace_label = tracing.extend_trace_label(trace_label, "eval_utils")
 
+        if (
+            state.settings.expression_profile
+            and compute_settings.performance_log is None
+        ):
+            perf_log_file = Path(trace_label + ".log")
+        elif state.settings.expression_profile is False:
+            perf_log_file = None
+        else:
+            perf_log_file = compute_settings.performance_log
+        performance_timer = timing.EvalTiming(perf_log_file)
+
         # avoid altering caller's passed-in locals_d parameter (they may be looping)
         locals_dict = assign.local_utilities(state)
 
@@ -653,10 +665,13 @@ def eval_utilities(
                     with warnings.catch_warnings(record=True) as w:
                         # Cause all warnings to always be triggered.
                         warnings.simplefilter("always")
-                        if expr.startswith("@"):
-                            expression_value = eval(expr[1:], globals_dict, locals_dict)
-                        else:
-                            expression_value = choosers.eval(expr)
+                        with performance_timer.time_expression(expr):
+                            if expr.startswith("@"):
+                                expression_value = eval(
+                                    expr[1:], globals_dict, locals_dict
+                                )
+                            else:
+                                expression_value = choosers.eval(expr)
 
                         if len(w) > 0:
                             for wrn in w:
@@ -685,6 +700,7 @@ def eval_utilities(
                 expression_values[i] = expression_value
                 i += 1
 
+        performance_timer.write_log(state)
         chunk_sizer.log_df(trace_label, "expression_values", expression_values)
 
         if estimator:
@@ -846,7 +862,9 @@ def eval_utilities(
     return utilities
 
 
-def eval_variables(state: workflow.State, exprs, df, locals_d=None):
+def eval_variables(
+    state: workflow.State, exprs, df, locals_d=None, trace_label: str | None = None
+):
     """
     Evaluate a set of variable expressions from a spec in the context
     of a given data table.
@@ -873,6 +891,9 @@ def eval_variables(state: workflow.State, exprs, df, locals_d=None):
     locals_d : Dict
         This is a dictionary of local variables that will be the environment
         for an evaluation of an expression that begins with @
+    trace_label : str
+        The trace label to use for performance logging. If None, performance
+        logging is not activated.
 
     Returns
     -------
@@ -907,13 +928,20 @@ def to_array(x):
 
         return a
 
+    if state.settings.expression_profile and trace_label:
+        perf_log_file = Path(trace_label + ".log")
+    else:
+        perf_log_file = None
+    performance_timer = timing.EvalTiming(perf_log_file)
+
     values = OrderedDict()
     for expr in exprs:
         try:
-            if expr.startswith("@"):
-                expr_values = to_array(eval(expr[1:], globals_dict, locals_dict))
-            else:
-                expr_values = to_array(df.eval(expr))
+            with performance_timer.time_expression(expr):
+                if expr.startswith("@"):
+                    expr_values = to_array(eval(expr[1:], globals_dict, locals_dict))
+                else:
+                    expr_values = to_array(df.eval(expr))
             # read model spec should ensure uniqueness, otherwise we should uniquify
             assert expr not in values
             values[expr] = expr_values
diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py
new file mode 100644
index 0000000000..dd1b014e12
--- /dev/null
+++ b/activitysim/core/timing.py
@@ -0,0 +1,185 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from pathlib import Path
+from time import time_ns
+from typing import TYPE_CHECKING
+
+import pandas as pd
+
+from .util import si_units
+
+if TYPE_CHECKING:
+    from .workflow import State
+
+
+class NoTiming:
+    """Class that does no timing, serves as the default.
+
+    This class is kept as simple as possible to avoid unnecessary overhead when
+    no timing is requested.
+    """
+
+    @contextmanager
+    def time_expression(self, expression: str):
+        """Context manager to time an expression."""
+        yield
+
+    def write_log(self, state: State) -> None:
+        """Write the log to a file."""
+        pass
+
+
+class EvalTiming(NoTiming):
+    def __new__(cls, log: Path | None = None, **kwargs):
+        if log is None:
+            return NoTiming()
+        else:
+            return super().__new__(cls)
+
+    def __init__(self, log: Path | None = None, *, overwrite: bool = False):
+        """
+        Timing class to log the time taken to evaluate expressions.
+
+        Parameters
+        ----------
+        log : Path | None, default None
+            Path to the log file. If None, no logging is done. If this is an
+            absolute path, the log file is created there. If this is just a
+            simple filename or a relative path, the log file is created relative
+            in or relative to the usual logging directory.
+        overwrite : bool, default False
+            If True, overwrite the log file if it already exists. If False,
+            create a new log file with a unique name.
+        """
+        self.log_file = log
+        self.overwrite = overwrite
+        self.elapsed_times = {}
+
+    @contextmanager
+    def time_expression(self, expression: str):
+        """Context manager to time an expression.
+
+        Parameters
+        ----------
+        expression : str
+            The expression to be timed. This is used as the key in the log file.
+        """
+
+        # when performance logging is not enabled, do nothing
+        if self.log_file is None:
+            yield
+            return
+
+        # when performance logging is enabled, we track the time it takes to evaluate
+        # the expression and store it
+        start_time = time_ns()
+        yield
+        end_time = time_ns()
+        elapsed_time = end_time - start_time
+        if expression in self.elapsed_times:
+            self.elapsed_times[expression] += elapsed_time
+        else:
+            self.elapsed_times[expression] = elapsed_time
+
+    def write_log(self, state: State) -> None:
+        """Write the log to a file.
+
+        Parameters
+        ----------
+        state : State
+            The state object containing configuration information. This is used
+            to determine the path for the log file, when the log file is not
+            given as an absolute path.
+        """
+        if self.log_file is None:
+            return
+
+        if self.log_file.is_absolute():
+            filename = self.log_file
+        else:
+            filename = state.get_log_file_path(
+                str(Path("expr-performance") / self.log_file)
+            )
+
+        # if the log file already exists and overwrite is false, create a new file
+        proposed_filename = filename
+        n = 0
+        while not self.overwrite and proposed_filename.exists():
+            n += 1
+            proposed_filename = filename.with_stem(filename.stem + f"-{n}")
+        filename = proposed_filename
+
+        # ensure the parent directory exists
+        filename.parent.mkdir(parents=True, exist_ok=True)
+
+        # Determine the scale for the elapsed times.  We want to use an appropriate
+        # timescale for the elapsed times, which provides useful information without
+        # reporting excessive precision.
+        # If the smallest elapsed time is greater than 1 second, use seconds.
+        # If the smallest elapsed time is greater than 1 millisecond, use milliseconds.
+        # Otherwise, use microseconds, no one should care about nanoseconds.
+        min_t = 1_000_000_000
+        for t in self.elapsed_times.values():
+            if t < min_t:
+                min_t = t
+        if min_t > 1_000_000_000:
+            scale = 1_000_000_000
+            label = "Time (sec)"
+        elif min_t > 1_000_000:
+            scale = 1_000_000
+            label = "Time (msec)"
+        else:
+            scale = 1_000
+            label = "Time (µsec)"
+
+        # The timing log is written in a tab-separated format, with times in the
+        # first column so they are easy to scan through for anomalies.
+        with open(filename, "w") as f:
+            f.write(f"{label:11}\tExpression\n")
+            for expression, elapsed_time in self.elapsed_times.items():
+                t = int(elapsed_time / scale)
+                f.write(f"{t: 11d}\t{expression}\n")
+
+
+class AnalyzeEvalTiming:
+    """
+    Class to analyze the timing of expressions.
+    """
+
+    def __init__(self, state: State):
+        self.log_dir = state.get_log_file_path(str(Path("expr-performance")))
+        raw_data = {}
+        for f in self.log_dir.glob("*.log"):
+            df = pd.read_csv(f, sep="\t")
+            if "(msec)" in df.columns[0]:
+                df.columns = ["Time (µsec)"] + df.columns[1:].tolist()
+                df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000
+            elif "(sec)" in df.columns[0]:
+                df.columns = ["Time (µsec)"] + df.columns[1:].tolist()
+                df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000_000
+            else:
+                df.iloc[:, 0] = df.iloc[:, 0].astype(int)
+            raw_data[str(f.stem)] = df
+        d = pd.concat(raw_data, names=["Component"]).reset_index()
+        self.data = d[["Time (µsec)", "Component", "Expression"]]
+        self.data.sort_values(by=["Time (µsec)"], ascending=[False], inplace=True)
+
+    def to_html(
+        self, filename: str | Path = "expression-timing.html", cutoff_secs=0.1
+    ) -> None:
+        """Write the data to an HTML file.
+
+        Parameters
+        ----------
+        filename : str | Path
+            The name of the file to write the HTML to. If a relative path is given,
+            it will be written in the log directory.
+        cutoff_secs : float
+            The cutoff time in seconds. Only expressions with a runtime greater than
+            this will be included in the HTML file. This is used to avoid writing a
+            huge report full of expressions that run plenty fast.
+        """
+        self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6].to_html(
+            self.log_dir.joinpath(filename), index=False
+        )

From f1e19710c7c899db4ca7e603e50debfa4589a8b4 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Sat, 19 Apr 2025 13:11:58 -0500
Subject: [PATCH 2/7] two reports

---
 activitysim/cli/run.py     |   4 +-
 activitysim/core/timing.py | 155 +++++++++++++++++++++++++++++++++++--
 2 files changed, 152 insertions(+), 7 deletions(-)

diff --git a/activitysim/cli/run.py b/activitysim/cli/run.py
index f910702155..2b924f3408 100644
--- a/activitysim/cli/run.py
+++ b/activitysim/cli/run.py
@@ -426,7 +426,9 @@ def run(args):
     if state.settings.expression_profile:
         # generate a summary of slower expression evaluation times
         # across all models and write to a file
-        timing.AnalyzeEvalTiming(state).to_html()
+        analyze = timing.AnalyzeEvalTiming(state)
+        analyze.component_report()
+        analyze.subcomponent_report()
 
     return 0
 
diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py
index dd1b014e12..edead45a88 100644
--- a/activitysim/core/timing.py
+++ b/activitysim/core/timing.py
@@ -3,7 +3,7 @@
 from contextlib import contextmanager
 from pathlib import Path
 from time import time_ns
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Literal
 
 import pandas as pd
 
@@ -142,6 +142,24 @@ def write_log(self, state: State) -> None:
                 f.write(f"{t: 11d}\t{expression}\n")
 
 
+def write_sortable_table(df: pd.DataFrame, filename: str | Path) -> None:
+    html_table = df.to_html(classes="sortable", index=False)
+    html_content = f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.3.1/tablesort.min.css">
+        </head>
+        <body>
+        {html_table}
+        <script src="https://cdnjs.cloudflare.com/ajax/libs/tablesort/5.3.1/tablesort.min.js"></script>
+        </body>
+        </html>
+        """
+    with open(filename, "w") as f:
+        f.write(html_content)
+
+
 class AnalyzeEvalTiming:
     """
     Class to analyze the timing of expressions.
@@ -162,11 +180,18 @@ def __init__(self, state: State):
                 df.iloc[:, 0] = df.iloc[:, 0].astype(int)
             raw_data[str(f.stem)] = df
         d = pd.concat(raw_data, names=["Component"]).reset_index()
-        self.data = d[["Time (µsec)", "Component", "Expression"]]
+
+        # break trace labels into components and subcomponents
+        d["Subcomponent"] = d["Component"].str.split(".", 1).str[1]
+        d["Component"] = d["Component"].str.split(".", 1).str[0]
+        self.data = d[["Time (µsec)", "Component", "Subcomponent", "Expression"]]
+
         self.data.sort_values(by=["Time (µsec)"], ascending=[False], inplace=True)
 
-    def to_html(
-        self, filename: str | Path = "expression-timing.html", cutoff_secs=0.1
+    def subcomponent_report(
+        self,
+        filename: str | Path = "expression-timing-subcomponents.html",
+        cutoff_secs=0.1,
     ) -> None:
         """Write the data to an HTML file.
 
@@ -180,6 +205,124 @@ def to_html(
             this will be included in the HTML file. This is used to avoid writing a
             huge report full of expressions that run plenty fast.
         """
-        self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6].to_html(
-            self.log_dir.joinpath(filename), index=False
+
+        # include only expressions that took longer than cutoff_secs
+        df = self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6]
+
+        # convert the time to seconds
+        df["Time (µsec)"] /= 1e6
+        df.rename(columns={"Time (µsec)": "Time (sec)"}, inplace=True)
+
+        # format and write the report to HTML
+        df = (
+            df.style.format(
+                {
+                    "Time (sec)": lambda x: f"{x:.3f}",
+                }
+            )
+            .background_gradient(
+                axis=0, gmap=df["Time (sec)"], cmap="YlOrRd", subset=["Time (sec)"]
+            )
+            .hide(axis="index")
+            .set_table_styles([{"selector": "th", "props": [("text-align", "left")]}])
+            .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"])
+        )
+        df.to_html(self.log_dir.joinpath(filename), index=False)
+
+    def component_report_data(self, cutoff_secs: float = 0.1):
+        """
+        Return the data for the component report.
+        """
+        df = (
+            self.data.groupby(["Component", "Expression"])
+            .agg({"Time (µsec)": "sum"})
+            .reset_index()[["Time (µsec)", "Component", "Expression"]]
+            .sort_values(by=["Time (µsec)"], ascending=[False])
         )
+
+        # include only expressions that took longer than cutoff_secs
+        df = df[df["Time (µsec)"] >= cutoff_secs * 1e6]
+
+        # convert the time to seconds
+        df["Time (µsec)"] /= 1e6
+        df.rename(columns={"Time (µsec)": "Time (sec)"}, inplace=True)
+        return df
+
+    def component_report(
+        self,
+        filename: str | Path = "expression-timing-components.html",
+        cutoff_secs=0.1,
+        style: Literal["simple", "grid"] = "simple",
+    ) -> None:
+        """Write component-level aggregations to an HTML file.
+
+        This will aggregate the expression timings by component, which may better
+        reveal expressions that are more problematic because they are evaluated
+        multiple times.
+
+        Parameters
+        ----------
+        filename : str | Path
+            The name of the file to write the HTML to. If a relative path is given,
+            it will be written in the log directory.
+        cutoff_secs : float
+            The cutoff time in seconds. Only expressions with a runtime greater than
+            this will be included in the HTML file. This is used to avoid writing a
+            huge report full of expressions that run plenty fast.
+        style : "simple" | "grid", default "simple"
+            The style of the report. Either "simple" or "grid". "simple" is a
+            simple HTML table, "grid" is a JavaScript data grid.
+        """
+
+        df = self.component_report_data(cutoff_secs=cutoff_secs)
+
+        if style == "simple":
+            # format and write the report to HTML in a simple table
+            df = (
+                df.style.format(
+                    {
+                        "Time (sec)": lambda x: f"{x:.3f}",
+                    }
+                )
+                .background_gradient(
+                    axis=0, gmap=df["Time (sec)"], cmap="YlOrRd", subset=["Time (sec)"]
+                )
+                .hide(axis="index")
+                .set_table_styles(
+                    [{"selector": "th", "props": [("text-align", "left")]}]
+                )
+                .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"])
+            )
+            df.to_html(self.log_dir.joinpath(filename), index=False)
+        elif style == "grid":
+            template = """<html lang="en">
+            <head>
+                <!-- Includes all JS & CSS for the JavaScript Data Grid -->
+                <script src="https://cdn.jsdelivr.net/npm/ag-grid-community/dist/ag-grid-community.min.js"></script>
+            </head>
+            <body>
+                <!-- Data Grid container -->
+                <div id="myGrid" ></div>
+                <script>
+                // Grid Options: Contains all of the Data Grid configurations
+                const gridOptions = {
+                    // Row Data: The data to be displayed.
+                    rowData: <<ROWDATA>>,
+                    // Column Definitions: Defines the columns to be displayed.
+                    columnDefs: [
+                        { field: "Time (sec)", flex: 1, sort: "desc" },
+                        { field: "Component", flex: 2, filter: true },
+                        { field: "Expression", flex: 7, filter: true, sortable: false, wrapText: true, autoHeight: true }
+                    ]
+                };
+                // Your Javascript code to create the Data Grid
+                const myGridElement = document.querySelector('#myGrid');
+                agGrid.createGrid(myGridElement, gridOptions);
+                </script>
+            </body>
+            </html>
+            """
+            with open(self.log_dir.joinpath(filename), "w") as f:
+                f.write(template.replace("<<ROWDATA>>", df.to_json(orient="records")))
+        else:
+            raise ValueError(f"Unknown style {style}. Must be 'simple' or 'grid'.")

From 770b3363f25a359bbabebcac0510575fe6146618 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Sat, 19 Apr 2025 13:33:04 -0500
Subject: [PATCH 3/7] make simple report sortable

---
 activitysim/core/timing.py | 342 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 341 insertions(+), 1 deletion(-)

diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py
index edead45a88..333ab32ae3 100644
--- a/activitysim/core/timing.py
+++ b/activitysim/core/timing.py
@@ -293,7 +293,28 @@ def component_report(
                 )
                 .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"])
             )
-            df.to_html(self.log_dir.joinpath(filename), index=False)
+            dat = df.to_html(index=False)
+            dat = dat.replace("<table ", "<table data-sortable ")
+            template = f"""
+            <!DOCTYPE html>
+            <html lang="en">
+            <head>
+                <meta charset="UTF-8">
+                <meta name="viewport" content="width=device-width, initial-scale=1.0">
+                <title>Expression Timing Report</title>
+                <style>{SORTABLE_CSS}
+                table[data-sortable] {{
+                    font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+                    font-size: 14px;
+                    line-height: 20px;
+            }}</style>
+            </head>
+            <body>
+            {dat}
+            <script>{SORTABLE_JS}</script>
+            """
+            with open(self.log_dir.joinpath(filename), "w") as f:
+                f.write(template)
         elif style == "grid":
             template = """<html lang="en">
             <head>
@@ -326,3 +347,322 @@ def component_report(
                 f.write(template.replace("<<ROWDATA>>", df.to_json(orient="records")))
         else:
             raise ValueError(f"Unknown style {style}. Must be 'simple' or 'grid'.")
+
+
+# Code below is from https://github.com/HubSpot/sortable
+# Copyright (C) 2013 Adam Schwartz, http://adamschwartz.co
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+
+SORTABLE_JS = """
+(function() {
+  var SELECTOR, addEventListener, clickEvents, numberRegExp, sortable, touchDevice, trimRegExp;
+
+  SELECTOR = 'table[data-sortable]';
+
+  numberRegExp = /^-?[£$¤]?[\d,.]+%?$/;
+
+  trimRegExp = /^\s+|\s+$/g;
+
+  clickEvents = ['click'];
+
+  touchDevice = 'ontouchstart' in document.documentElement;
+
+  if (touchDevice) {
+    clickEvents.push('touchstart');
+  }
+
+  addEventListener = function(el, event, handler) {
+    if (el.addEventListener != null) {
+      return el.addEventListener(event, handler, false);
+    } else {
+      return el.attachEvent("on" + event, handler);
+    }
+  };
+
+  sortable = {
+    init: function(options) {
+      var table, tables, _i, _len, _results;
+      if (options == null) {
+        options = {};
+      }
+      if (options.selector == null) {
+        options.selector = SELECTOR;
+      }
+      tables = document.querySelectorAll(options.selector);
+      _results = [];
+      for (_i = 0, _len = tables.length; _i < _len; _i++) {
+        table = tables[_i];
+        _results.push(sortable.initTable(table));
+      }
+      return _results;
+    },
+    initTable: function(table) {
+      var i, th, ths, _i, _len, _ref;
+      if (((_ref = table.tHead) != null ? _ref.rows.length : void 0) !== 1) {
+        return;
+      }
+      if (table.getAttribute('data-sortable-initialized') === 'true') {
+        return;
+      }
+      table.setAttribute('data-sortable-initialized', 'true');
+      ths = table.querySelectorAll('th');
+      for (i = _i = 0, _len = ths.length; _i < _len; i = ++_i) {
+        th = ths[i];
+        if (th.getAttribute('data-sortable') !== 'false') {
+          sortable.setupClickableTH(table, th, i);
+        }
+      }
+      return table;
+    },
+    setupClickableTH: function(table, th, i) {
+      var eventName, onClick, type, _i, _len, _results;
+      type = sortable.getColumnType(table, i);
+      onClick = function(e) {
+        var compare, item, newSortedDirection, position, row, rowArray, sorted, sortedDirection, tBody, ths, value, _compare, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _ref, _ref1;
+        if (e.handled !== true) {
+          e.handled = true;
+        } else {
+          return false;
+        }
+        sorted = this.getAttribute('data-sorted') === 'true';
+        sortedDirection = this.getAttribute('data-sorted-direction');
+        if (sorted) {
+          newSortedDirection = sortedDirection === 'ascending' ? 'descending' : 'ascending';
+        } else {
+          newSortedDirection = type.defaultSortDirection;
+        }
+        ths = this.parentNode.querySelectorAll('th');
+        for (_i = 0, _len = ths.length; _i < _len; _i++) {
+          th = ths[_i];
+          th.setAttribute('data-sorted', 'false');
+          th.removeAttribute('data-sorted-direction');
+        }
+        this.setAttribute('data-sorted', 'true');
+        this.setAttribute('data-sorted-direction', newSortedDirection);
+        tBody = table.tBodies[0];
+        rowArray = [];
+        if (!sorted) {
+          if (type.compare != null) {
+            _compare = type.compare;
+          } else {
+            _compare = function(a, b) {
+              return b - a;
+            };
+          }
+          compare = function(a, b) {
+            if (a[0] === b[0]) {
+              return a[2] - b[2];
+            }
+            if (type.reverse) {
+              return _compare(b[0], a[0]);
+            } else {
+              return _compare(a[0], b[0]);
+            }
+          };
+          _ref = tBody.rows;
+          for (position = _j = 0, _len1 = _ref.length; _j < _len1; position = ++_j) {
+            row = _ref[position];
+            value = sortable.getNodeValue(row.cells[i]);
+            if (type.comparator != null) {
+              value = type.comparator(value);
+            }
+            rowArray.push([value, row, position]);
+          }
+          rowArray.sort(compare);
+          for (_k = 0, _len2 = rowArray.length; _k < _len2; _k++) {
+            row = rowArray[_k];
+            tBody.appendChild(row[1]);
+          }
+        } else {
+          _ref1 = tBody.rows;
+          for (_l = 0, _len3 = _ref1.length; _l < _len3; _l++) {
+            item = _ref1[_l];
+            rowArray.push(item);
+          }
+          rowArray.reverse();
+          for (_m = 0, _len4 = rowArray.length; _m < _len4; _m++) {
+            row = rowArray[_m];
+            tBody.appendChild(row);
+          }
+        }
+        if (typeof window['CustomEvent'] === 'function') {
+          return typeof table.dispatchEvent === "function" ? table.dispatchEvent(new CustomEvent('Sortable.sorted', {
+            bubbles: true
+          })) : void 0;
+        }
+      };
+      _results = [];
+      for (_i = 0, _len = clickEvents.length; _i < _len; _i++) {
+        eventName = clickEvents[_i];
+        _results.push(addEventListener(th, eventName, onClick));
+      }
+      return _results;
+    },
+    getColumnType: function(table, i) {
+      var row, specified, text, type, _i, _j, _len, _len1, _ref, _ref1, _ref2;
+      specified = (_ref = table.querySelectorAll('th')[i]) != null ? _ref.getAttribute('data-sortable-type') : void 0;
+      if (specified != null) {
+        return sortable.typesObject[specified];
+      }
+      _ref1 = table.tBodies[0].rows;
+      for (_i = 0, _len = _ref1.length; _i < _len; _i++) {
+        row = _ref1[_i];
+        text = sortable.getNodeValue(row.cells[i]);
+        _ref2 = sortable.types;
+        for (_j = 0, _len1 = _ref2.length; _j < _len1; _j++) {
+          type = _ref2[_j];
+          if (type.match(text)) {
+            return type;
+          }
+        }
+      }
+      return sortable.typesObject.alpha;
+    },
+    getNodeValue: function(node) {
+      var dataValue;
+      if (!node) {
+        return '';
+      }
+      dataValue = node.getAttribute('data-value');
+      if (dataValue !== null) {
+        return dataValue;
+      }
+      if (typeof node.innerText !== 'undefined') {
+        return node.innerText.replace(trimRegExp, '');
+      }
+      return node.textContent.replace(trimRegExp, '');
+    },
+    setupTypes: function(types) {
+      var type, _i, _len, _results;
+      sortable.types = types;
+      sortable.typesObject = {};
+      _results = [];
+      for (_i = 0, _len = types.length; _i < _len; _i++) {
+        type = types[_i];
+        _results.push(sortable.typesObject[type.name] = type);
+      }
+      return _results;
+    }
+  };
+
+  sortable.setupTypes([
+    {
+      name: 'numeric',
+      defaultSortDirection: 'descending',
+      match: function(a) {
+        return a.match(numberRegExp);
+      },
+      comparator: function(a) {
+        return parseFloat(a.replace(/[^0-9.-]/g, ''), 10) || 0;
+      }
+    }, {
+      name: 'date',
+      defaultSortDirection: 'ascending',
+      reverse: true,
+      match: function(a) {
+        return !isNaN(Date.parse(a));
+      },
+      comparator: function(a) {
+        return Date.parse(a) || 0;
+      }
+    }, {
+      name: 'alpha',
+      defaultSortDirection: 'ascending',
+      match: function() {
+        return true;
+      },
+      compare: function(a, b) {
+        return a.localeCompare(b);
+      }
+    }
+  ]);
+
+  setTimeout(sortable.init, 0);
+
+  if (typeof define === 'function' && define.amd) {
+    define(function() {
+      return sortable;
+    });
+  } else if (typeof exports !== 'undefined') {
+    module.exports = sortable;
+  } else {
+    window.Sortable = sortable;
+  }
+
+}).call(this);
+"""
+
+SORTABLE_CSS = """
+/* line 2, ../sass/_sortable.sass */
+table[data-sortable] {
+  border-collapse: collapse;
+  border-spacing: 0;
+}
+/* line 6, ../sass/_sortable.sass */
+table[data-sortable] th {
+  vertical-align: bottom;
+  font-weight: bold;
+}
+/* line 10, ../sass/_sortable.sass */
+table[data-sortable] th, table[data-sortable] td {
+  text-align: left;
+  padding: 10px;
+}
+/* line 14, ../sass/_sortable.sass */
+table[data-sortable] th:not([data-sortable="false"]) {
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  -ms-user-select: none;
+  -o-user-select: none;
+  user-select: none;
+  -webkit-tap-highlight-color: rgba(0, 0, 0, 0);
+  -webkit-touch-callout: none;
+  cursor: pointer;
+}
+/* line 26, ../sass/_sortable.sass */
+table[data-sortable] th:after {
+  content: "";
+  visibility: hidden;
+  display: inline-block;
+  vertical-align: inherit;
+  height: 0;
+  width: 0;
+  border-width: 5px;
+  border-style: solid;
+  border-color: transparent;
+  margin-right: 1px;
+  margin-left: 10px;
+  float: right;
+}
+/* line 40, ../sass/_sortable.sass */
+table[data-sortable] th[data-sorted="true"]:after {
+  visibility: visible;
+}
+/* line 43, ../sass/_sortable.sass */
+table[data-sortable] th[data-sorted-direction="descending"]:after {
+  border-top-color: inherit;
+  margin-top: 8px;
+}
+/* line 47, ../sass/_sortable.sass */
+table[data-sortable] th[data-sorted-direction="ascending"]:after {
+  border-bottom-color: inherit;
+  margin-top: 3px;
+}"""

From 136a12c586e72d2fe0c683000ebdd663754ac64c Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Sat, 19 Apr 2025 13:47:54 -0500
Subject: [PATCH 4/7] change default to grid

---
 activitysim/core/timing.py | 91 +++++++++++++++++++++++++++++++-------
 1 file changed, 75 insertions(+), 16 deletions(-)

diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py
index 333ab32ae3..140b0bacd7 100644
--- a/activitysim/core/timing.py
+++ b/activitysim/core/timing.py
@@ -192,6 +192,7 @@ def subcomponent_report(
         self,
         filename: str | Path = "expression-timing-subcomponents.html",
         cutoff_secs=0.1,
+        style: Literal["grid", "simple"] = "grid",
     ) -> None:
         """Write the data to an HTML file.
 
@@ -204,6 +205,9 @@ def subcomponent_report(
             The cutoff time in seconds. Only expressions with a runtime greater than
             this will be included in the HTML file. This is used to avoid writing a
             huge report full of expressions that run plenty fast.
+        style : "simple" | "grid", default "simple"
+            The style of the report. Either "simple" or "grid". "simple" is a
+            simple HTML table, "grid" is a JavaScript data grid.
         """
 
         # include only expressions that took longer than cutoff_secs
@@ -213,21 +217,76 @@ def subcomponent_report(
         df["Time (µsec)"] /= 1e6
         df.rename(columns={"Time (µsec)": "Time (sec)"}, inplace=True)
 
-        # format and write the report to HTML
-        df = (
-            df.style.format(
-                {
-                    "Time (sec)": lambda x: f"{x:.3f}",
-                }
-            )
-            .background_gradient(
-                axis=0, gmap=df["Time (sec)"], cmap="YlOrRd", subset=["Time (sec)"]
+        if style == "simple":
+            # format and write the report to HTML
+            df = (
+                df.style.format(
+                    {
+                        "Time (sec)": lambda x: f"{x:.3f}",
+                    }
+                )
+                .background_gradient(
+                    axis=0, gmap=df["Time (sec)"], cmap="YlOrRd", subset=["Time (sec)"]
+                )
+                .hide(axis="index")
+                .set_table_styles(
+                    [{"selector": "th", "props": [("text-align", "left")]}]
+                )
+                .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"])
             )
-            .hide(axis="index")
-            .set_table_styles([{"selector": "th", "props": [("text-align", "left")]}])
-            .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"])
-        )
-        df.to_html(self.log_dir.joinpath(filename), index=False)
+            dat = df.to_html(index=False)
+            dat = dat.replace("<table ", "<table data-sortable ")
+            template = f"""
+            <!DOCTYPE html>
+            <html lang="en">
+            <head>
+                <meta charset="UTF-8">
+                <meta name="viewport" content="width=device-width, initial-scale=1.0">
+                <title>Expression Timing Report</title>
+                <style>{SORTABLE_CSS}
+                table[data-sortable] {{
+                    font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
+                    font-size: 14px;
+                    line-height: 20px;
+            }}</style>
+            </head>
+            <body>
+            {dat}
+            <script>{SORTABLE_JS}</script>
+            """
+            with open(self.log_dir.joinpath(filename), "w") as f:
+                f.write(template)
+        elif style == "grid":
+            template = """<html lang="en">
+            <head>
+                <!-- Includes all JS & CSS for the JavaScript Data Grid -->
+                <script src="https://cdn.jsdelivr.net/npm/ag-grid-community/dist/ag-grid-community.min.js"></script>
+            </head>
+            <body>
+                <!-- Data Grid container -->
+                <div id="myGrid" ></div>
+                <script>
+                // Grid Options: Contains all of the Data Grid configurations
+                const gridOptions = {
+                    // Row Data: The data to be displayed.
+                    rowData: <<ROWDATA>>,
+                    // Column Definitions: Defines the columns to be displayed.
+                    columnDefs: [
+                        { field: "Time (sec)", flex: 1, sort: "desc" },
+                        { field: "Component", flex: 2, filter: true },
+                        { field: "Subcomponent", flex: 4, filter: true, wrapText: true, autoHeight: true, editable: true },
+                        { field: "Expression", flex: 7, filter: true, sortable: false, wrapText: true, autoHeight: true, editable: true }
+                    ]
+                };
+                // Your Javascript code to create the Data Grid
+                const myGridElement = document.querySelector('#myGrid');
+                agGrid.createGrid(myGridElement, gridOptions);
+                </script>
+            </body>
+            </html>
+            """
+            with open(self.log_dir.joinpath(filename), "w") as f:
+                f.write(template.replace("<<ROWDATA>>", df.to_json(orient="records")))
 
     def component_report_data(self, cutoff_secs: float = 0.1):
         """
@@ -252,7 +311,7 @@ def component_report(
         self,
         filename: str | Path = "expression-timing-components.html",
         cutoff_secs=0.1,
-        style: Literal["simple", "grid"] = "simple",
+        style: Literal["grid", "simple"] = "grid",
     ) -> None:
         """Write component-level aggregations to an HTML file.
 
@@ -333,7 +392,7 @@ def component_report(
                     columnDefs: [
                         { field: "Time (sec)", flex: 1, sort: "desc" },
                         { field: "Component", flex: 2, filter: true },
-                        { field: "Expression", flex: 7, filter: true, sortable: false, wrapText: true, autoHeight: true }
+                        { field: "Expression", flex: 7, filter: true, sortable: false, wrapText: true, autoHeight: true, editable: true }
                     ]
                 };
                 // Your Javascript code to create the Data Grid

From 642b4860236f701cbb03b310398f01d15516685c Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Sat, 19 Apr 2025 13:56:01 -0500
Subject: [PATCH 5/7] configurable expression_profile_cutoff

---
 activitysim/core/configuration/top.py | 15 +++++++++++++++
 activitysim/core/timing.py            | 25 ++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py
index fa5978a6fc..2f3363e7a8 100644
--- a/activitysim/core/configuration/top.py
+++ b/activitysim/core/configuration/top.py
@@ -557,6 +557,21 @@ def _check_store_skims_in_shm(self):
     Production model users should typically have this set explicitly to `False`.
     """
 
+    expression_profile_cutoff: float = 0.1
+    """
+    Minimum runtime for an expression to be included in the expression profile.
+
+    .. versionadded:: 1.4
+
+    Expressions that take less than this amount of time to evaluate will not be
+    included in the summary report of expression profiling generated at the end
+    of a model run. For large scale models, this value can be increased to make
+    the report file smaller, as only the largest values will typically be of
+    interest.
+
+    This setting has no effect if :py:attr:`expression_profile` is not `True`.
+    """
+
     benchmarking: bool = False
     """
     Flag this model run as a benchmarking run.
diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py
index 140b0bacd7..2d1daf331b 100644
--- a/activitysim/core/timing.py
+++ b/activitysim/core/timing.py
@@ -167,6 +167,7 @@ class AnalyzeEvalTiming:
 
     def __init__(self, state: State):
         self.log_dir = state.get_log_file_path(str(Path("expr-performance")))
+        self.default_cutoff = state.settings.expression_profile_cutoff
         raw_data = {}
         for f in self.log_dir.glob("*.log"):
             df = pd.read_csv(f, sep="\t")
@@ -191,7 +192,7 @@ def __init__(self, state: State):
     def subcomponent_report(
         self,
         filename: str | Path = "expression-timing-subcomponents.html",
-        cutoff_secs=0.1,
+        cutoff_secs: float | None = None,
         style: Literal["grid", "simple"] = "grid",
     ) -> None:
         """Write the data to an HTML file.
@@ -201,14 +202,17 @@ def subcomponent_report(
         filename : str | Path
             The name of the file to write the HTML to. If a relative path is given,
             it will be written in the log directory.
-        cutoff_secs : float
+        cutoff_secs : float, optional
             The cutoff time in seconds. Only expressions with a runtime greater than
             this will be included in the HTML file. This is used to avoid writing a
-            huge report full of expressions that run plenty fast.
+            huge report full of expressions that run plenty fast. If not provided,
+            the default cutoff time from the settings is used.
         style : "simple" | "grid", default "simple"
             The style of the report. Either "simple" or "grid". "simple" is a
             simple HTML table, "grid" is a JavaScript data grid.
         """
+        if cutoff_secs is None:
+            cutoff_secs = self.default_cutoff
 
         # include only expressions that took longer than cutoff_secs
         df = self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6]
@@ -288,10 +292,21 @@ def subcomponent_report(
             with open(self.log_dir.joinpath(filename), "w") as f:
                 f.write(template.replace("<<ROWDATA>>", df.to_json(orient="records")))
 
-    def component_report_data(self, cutoff_secs: float = 0.1):
+    def component_report_data(self, cutoff_secs: float | None = None):
         """
         Return the data for the component report.
+
+        Parameters
+        ----------
+        cutoff_secs : float, optional
+            The cutoff time in seconds. Only expressions with a runtime greater than
+            this will be included in the report. This is used to avoid writing a
+            huge report full of expressions that run plenty fast. If not provided,
+            the default cutoff time from the settings is used.
         """
+        if cutoff_secs is None:
+            cutoff_secs = self.default_cutoff
+
         df = (
             self.data.groupby(["Component", "Expression"])
             .agg({"Time (µsec)": "sum"})
@@ -310,7 +325,7 @@ def component_report_data(self, cutoff_secs: float = 0.1):
     def component_report(
         self,
         filename: str | Path = "expression-timing-components.html",
-        cutoff_secs=0.1,
+        cutoff_secs: float | None = None,
         style: Literal["grid", "simple"] = "grid",
     ) -> None:
         """Write component-level aggregations to an HTML file.

From 877f279ff0ae2583c0da695fb6d096c6db4b9503 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Sat, 19 Apr 2025 18:50:51 -0500
Subject: [PATCH 6/7] works with MP

---
 activitysim/core/timing.py | 62 +++++++++++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 17 deletions(-)

diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py
index 2d1daf331b..8c38827b0f 100644
--- a/activitysim/core/timing.py
+++ b/activitysim/core/timing.py
@@ -165,29 +165,53 @@ class AnalyzeEvalTiming:
     Class to analyze the timing of expressions.
     """
 
-    def __init__(self, state: State):
+    def _read_log(self, log_file: Path) -> pd.DataFrame:
+        """Read the log file and return a DataFrame."""
+        df = pd.read_csv(log_file, sep="\t")
+        if "(msec)" in df.columns[0]:
+            df.columns = ["Time (µsec)"] + df.columns[1:].tolist()
+            df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000
+        elif "(sec)" in df.columns[0]:
+            df.columns = ["Time (µsec)"] + df.columns[1:].tolist()
+            df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000_000
+        else:
+            df.iloc[:, 0] = df.iloc[:, 0].astype(int)
+        return df
+
+    def __init__(self, state: State, collect_mp: bool = True) -> None:
         self.log_dir = state.get_log_file_path(str(Path("expr-performance")))
         self.default_cutoff = state.settings.expression_profile_cutoff
         raw_data = {}
         for f in self.log_dir.glob("*.log"):
-            df = pd.read_csv(f, sep="\t")
-            if "(msec)" in df.columns[0]:
-                df.columns = ["Time (µsec)"] + df.columns[1:].tolist()
-                df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000
-            elif "(sec)" in df.columns[0]:
-                df.columns = ["Time (µsec)"] + df.columns[1:].tolist()
-                df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000_000
-            else:
-                df.iloc[:, 0] = df.iloc[:, 0].astype(int)
-            raw_data[str(f.stem)] = df
-        d = pd.concat(raw_data, names=["Component"]).reset_index()
+            raw_data[str(f.stem)] = self._read_log(f)
+
+        if raw_data:
+            d = pd.concat(raw_data, names=["Component"]).reset_index()
+            d["Proc"] = "main"
+        else:
+            d = None
+
+        if collect_mp:
+            raw_data = {}
+            mp_log_dirs = state.get_log_file_path(".").glob("*-expr-performance")
+            for mp_log_dir in mp_log_dirs:
+                subproc_name = "-".join(mp_log_dir.stem.split("-")[:-2])
+                for f in mp_log_dir.glob("*.log"):
+                    raw_data[subproc_name, str(f.stem)] = self._read_log(f)
+            if raw_data:
+                d_mp = pd.concat(raw_data, names=["Proc", "Component"]).reset_index()
+                if d is None:
+                    d = d_mp
+                else:
+                    d = pd.concat([d, d_mp])
 
         # break trace labels into components and subcomponents
         d["Subcomponent"] = d["Component"].str.split(".", 1).str[1]
         d["Component"] = d["Component"].str.split(".", 1).str[0]
-        self.data = d[["Time (µsec)", "Component", "Subcomponent", "Expression"]]
-
-        self.data.sort_values(by=["Time (µsec)"], ascending=[False], inplace=True)
+        self.data = d[
+            ["Time (µsec)", "Proc", "Component", "Subcomponent", "Expression"]
+        ]
+        self.data = self.data.sort_values(by=["Time (µsec)"], ascending=[False])
 
     def subcomponent_report(
         self,
@@ -215,11 +239,11 @@ def subcomponent_report(
             cutoff_secs = self.default_cutoff
 
         # include only expressions that took longer than cutoff_secs
-        df = self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6]
+        df = self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6].copy()
 
         # convert the time to seconds
         df["Time (µsec)"] /= 1e6
-        df.rename(columns={"Time (µsec)": "Time (sec)"}, inplace=True)
+        df = df.rename(columns={"Time (µsec)": "Time (sec)"})
 
         if style == "simple":
             # format and write the report to HTML
@@ -258,6 +282,7 @@ def subcomponent_report(
             {dat}
             <script>{SORTABLE_JS}</script>
             """
+            self.log_dir.joinpath(filename).parent.mkdir(parents=True, exist_ok=True)
             with open(self.log_dir.joinpath(filename), "w") as f:
                 f.write(template)
         elif style == "grid":
@@ -289,6 +314,7 @@ def subcomponent_report(
             </body>
             </html>
             """
+            self.log_dir.joinpath(filename).parent.mkdir(parents=True, exist_ok=True)
             with open(self.log_dir.joinpath(filename), "w") as f:
                 f.write(template.replace("<<ROWDATA>>", df.to_json(orient="records")))
 
@@ -387,6 +413,7 @@ def component_report(
             {dat}
             <script>{SORTABLE_JS}</script>
             """
+            self.log_dir.joinpath(filename).parent.mkdir(parents=True, exist_ok=True)
             with open(self.log_dir.joinpath(filename), "w") as f:
                 f.write(template)
         elif style == "grid":
@@ -417,6 +444,7 @@ def component_report(
             </body>
             </html>
             """
+            self.log_dir.joinpath(filename).parent.mkdir(parents=True, exist_ok=True)
             with open(self.log_dir.joinpath(filename), "w") as f:
                 f.write(template.replace("<<ROWDATA>>", df.to_json(orient="records")))
         else:

From 25564f0a999fccadcc4ea19774a939fad8ef4468 Mon Sep 17 00:00:00 2001
From: Jeff Newman <jeff@driftless.xyz>
Date: Sat, 19 Apr 2025 19:28:49 -0500
Subject: [PATCH 7/7] documentation

---
 .../users-guide/performance/expr-profiling.md | 102 ++++++++++++++++++
 docs/users-guide/performance/index.md         |   1 +
 2 files changed, 103 insertions(+)
 create mode 100644 docs/users-guide/performance/expr-profiling.md

diff --git a/docs/users-guide/performance/expr-profiling.md b/docs/users-guide/performance/expr-profiling.md
new file mode 100644
index 0000000000..09914983f1
--- /dev/null
+++ b/docs/users-guide/performance/expr-profiling.md
@@ -0,0 +1,102 @@
+# Expression Profiling
+
+Part of the appeal of ActivitySim is in its flexibility: it is possible to craft
+a massive variety of mathematical forms and relationships through creative use
+of the expressions found in most component spec files. But as we have all learned
+from Spider-Man, with great power comes great responsibility. Users can write
+arbitrary code in spec files, and the runtime performance of ActivitySim will
+depend on the parsimony and efficiency of that code.
+
+Sometimes these spec files can be large, and it may be difficult to determine
+simply by inspection which expressions in a given spec file are faster or slower.
+ActivitySim now offers an expression-level profiling tool to assist in diagnosing
+performance problems that arise from inefficient spec files.
+
+```{important}
+At this time,
+expression profiling only works for the evaluation of expressions in "legacy" mode.
+It does not work in "sharrow" mode, as the compiled expressions run with sharrow
+are not run in a serial fashion and are not able to be profiled in the same way.
+```
+
+## Profiling an Entire Model Run
+
+The simplest way to use the expression profiler is to set the
+[`expression_profile`](activitysim.core.configuration.Settings.expression_profile)
+configuration setting in the top level model settings (typically `settings.yaml`):
+
+```yaml
+expression_profile: true
+```
+
+This will cause the profiler to be activated for all expressions in the model,
+across all components. This includes expressions in the spec files, as well as
+expressions in all preprocessors and annotators.  Each time the expressions in
+any spec file are evaluated, the profiler will record the time taken to evaluate
+each expression.  An "expr-performance" subdirectory will be created in the model's
+logging directory, and a new log file will be created each time the expressions in
+any spec file are evaluated. The file is named according to the `trace_label` found
+where the expressions are being evaluated. It will include a list of all the evaluated
+expressions from the spec file, along with the time taken to evaluate each expression.
+For multi-processed models, each subprocess will create its own log file directory,
+similar to the logging directory structure for the other model components.
+
+## Summary Outputs
+
+At the end of a model run where the `expression_profile` setting is active,
+ActivitySim will also create a pair of summary files in the "expr-performance"
+subdirectory.  The first is named "expression-timing-subcomponents.html",
+and contains a simple concatenation of the runtimes of
+expressions in the various subcomponents stored in the log files,
+filtered to only include expressions that tool a notable amount of time.
+By default, this is set to 0.1 seconds, but can be changed by setting the
+[`expression_profile_cutoff`](activitysim.core.configuration.Settings.expression_profile_cutoff)
+configuration setting in the model settings.
+
+The second file, "expression-timing-components.html", shows an aggregate
+summary of the runtimes for each expression,
+aggregated across all the log files. The aggregation is by model component and
+expression, so that this summary includes the total time taken to evaluate each
+expression within each model component, recognizing that identical expressions
+may be evaluated multiple times in different model subcomponents (e.g. across
+different purposes, or tour numbers, etc.).  This more aggregated summary is
+typically the one that will be most useful for identifying expressions that
+provide the most overall potential for performance improvement via streamlining.
+
+Users should note that the expression profiler is not a substitute for good coding
+practices.  It will not necessarily identify all performance problems, and it is not
+able to suggest improvements to the expressions.  It is simply a tool to help users
+identify which expressions are taking the most time to evaluate, and therefore
+which expressions are the best candidates for improvement.
+
+Also, users should understand that the expression profiler is not directly measuring the
+computational complexity of the expressions, but rather the time taken to evaluate
+the expressions. This time can be affected by a number of factors, including the
+complexity of the expression, the size of the data being processed, and whether
+there are other processes running on the machine at the same time competing for
+resources. For multiprocessing model runs, those other processes may include
+other the subprocesses of ActivitySim, which may lead to surprising results.
+
+There is also no adjustment made for parallelization of the expression evaluations.
+For example, if the same expression is evaluated in parallel across 8 processes on
+a machine with 8 cores, and each process takes 0.1 seconds to evaluate the expression,
+the profiler will still show that the expression took 0.8 seconds to evaluate, even
+though the total wall clock time taken to evaluate the expression across all processes
+was only 0.1 seconds.
+
+Profiling expressions also adds some overhead to the model run, so users should
+be careful about using the profiler in production runs.  It is recommended turn off
+the profiler in production runs, and only use it for debugging and development.
+
+## Profiling Individual Components
+
+The expression profiler can also be used to profile individual components, rather
+than the entire model. This is done by setting the `compute_settings.performance_log`
+attribute for the component in the model settings. This attribute can be set to the
+filename where the profiler log file should be written, which will override
+the default behavior of writing the log file to the "expr-performance" subdirectory.
+This feature only works for components that are run in a single process, and which
+have a `compute_settings` attribute. It is generally not recommended to use this
+feature unless a specific component is suspected of having atypical performance
+problems, as it will not provide the same summary reporting as profiling the entire
+model.
diff --git a/docs/users-guide/performance/index.md b/docs/users-guide/performance/index.md
index 2f722f621d..ab807125e1 100644
--- a/docs/users-guide/performance/index.md
+++ b/docs/users-guide/performance/index.md
@@ -23,6 +23,7 @@ These techniques are the focus of this section.
    Chunking to Reduce Peak Memory Usage <chunking>
    Compiling with Sharrow <sharrow>
    Skim Data Format <skim-data-format>
+   Expression Profiling <expr-profiling>
 ```
 
 ## Checklist for Performance Tuning