From c6e21944045ce8526bf2aee56c98905e77877272 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Fri, 18 Apr 2025 22:11:08 -0500 Subject: [PATCH 1/7] expression profiling --- activitysim/abm/models/summarize.py | 17 ++- activitysim/cli/run.py | 7 +- activitysim/core/assign.py | 49 +++--- activitysim/core/configuration/base.py | 9 ++ activitysim/core/configuration/top.py | 20 +++ activitysim/core/expressions.py | 1 + activitysim/core/interaction_simulate.py | 28 +++- activitysim/core/simulate.py | 46 ++++-- activitysim/core/timing.py | 185 +++++++++++++++++++++++ 9 files changed, 324 insertions(+), 38 deletions(-) create mode 100644 activitysim/core/timing.py diff --git a/activitysim/abm/models/summarize.py b/activitysim/abm/models/summarize.py index e842393fda..c877c636cd 100644 --- a/activitysim/abm/models/summarize.py +++ b/activitysim/abm/models/summarize.py @@ -4,11 +4,12 @@ import logging import os +from pathlib import Path import numpy as np import pandas as pd -from activitysim.core import expressions, workflow +from activitysim.core import expressions, timing, workflow from activitysim.core.configuration.base import PreprocessorSettings, PydanticReadable from activitysim.core.los import Network_LOS @@ -353,6 +354,12 @@ def summarize( } ) + if state.settings.expression_profile: + perf_log_file = Path(trace_label + ".log") + else: + perf_log_file = None + performance_timer = timing.EvalTiming(perf_log_file) + for i, row in spec.iterrows(): out_file = row["Output"] expr = row["Expression"] @@ -361,15 +368,19 @@ def summarize( if out_file.startswith("_"): logger.debug(f"Temp Variable: {expr} -> {out_file}") - locals_d[out_file] = eval(expr, globals(), locals_d) + with performance_timer.time_expression(expr): + locals_d[out_file] = eval(expr, globals(), locals_d) continue logger.debug(f"Summary: {expr} -> {out_file}.csv") - resultset = eval(expr, globals(), locals_d) + with performance_timer.time_expression(expr): + resultset = eval(expr, globals(), locals_d) resultset.to_csv( state.get_output_file_path( os.path.join(output_location, f"{out_file}.csv") ), index=False, ) + + performance_timer.write_log(state) diff --git a/activitysim/cli/run.py b/activitysim/cli/run.py index 03258be4b5..f910702155 100644 --- a/activitysim/cli/run.py +++ b/activitysim/cli/run.py @@ -11,7 +11,7 @@ import numpy as np -from activitysim.core import chunk, config, mem, tracing, workflow +from activitysim.core import chunk, config, mem, timing, tracing, workflow from activitysim.core.configuration import FileSystem, Settings logger = logging.getLogger(__name__) @@ -423,6 +423,11 @@ def run(args): if memory_sidecar_process: memory_sidecar_process.stop() + if state.settings.expression_profile: + # generate a summary of slower expression evaluation times + # across all models and write to a file + timing.AnalyzeEvalTiming(state).to_html() + return 0 diff --git a/activitysim/core/assign.py b/activitysim/core/assign.py index 4054b2aec9..8c5513416c 100644 --- a/activitysim/core/assign.py +++ b/activitysim/core/assign.py @@ -5,11 +5,12 @@ import logging from builtins import object, zip from collections import OrderedDict +from pathlib import Path import numpy as np import pandas as pd -from activitysim.core import chunk, util, workflow +from activitysim.core import chunk, timing, util, workflow logger = logging.getLogger(__name__) @@ -246,6 +247,12 @@ def to_series(x): assert assignment_expressions.shape[0] > 0 + if state.settings.expression_profile: + perf_log_file = Path(trace_label + ".log") + else: + perf_log_file = None + performance_timer = timing.EvalTiming(perf_log_file) + trace_assigned_locals = trace_results = None if trace_rows is not None: # convert to numpy array so we can slice ndarrays as well as series @@ -282,24 +289,25 @@ def to_series(x): n_randoms += 1 assignment_expressions.loc[expression_idx, "expression"] = expression if n_randoms: - try: - random_draws = state.get_rn_generator().normal_for_df( - df, broadcast=True, size=n_randoms - ) - except RuntimeError: - pass - else: - _locals_dict["random_draws"] = random_draws + with performance_timer.time_expression(""): + try: + random_draws = state.get_rn_generator().normal_for_df( + df, broadcast=True, size=n_randoms + ) + except RuntimeError: + pass + else: + _locals_dict["random_draws"] = random_draws - def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False): - if scale: - x = 1 + ((sigma * sigma) / (mu * mu)) - mu = np.log(mu / (np.sqrt(x))) - sigma = np.sqrt(np.log(x)) - assert broadcast - return np.exp(random_draws * sigma + mu) + def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False): + if scale: + x = 1 + ((sigma * sigma) / (mu * mu)) + mu = np.log(mu / (np.sqrt(x))) + sigma = np.sqrt(np.log(x)) + assert broadcast + return np.exp(random_draws * sigma + mu) - _locals_dict["rng_lognormal"] = rng_lognormal + _locals_dict["rng_lognormal"] = rng_lognormal sharrow_enabled = state.settings.sharrow @@ -327,7 +335,8 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False): if is_temp_singular(target) or is_throwaway(target): try: - x = eval(expression, globals(), _locals_dict) + with performance_timer.time_expression(expression): + x = eval(expression, globals(), _locals_dict) except Exception as err: logger.error( "assign_variables error: %s: %s", type(err).__name__, str(err) @@ -355,7 +364,8 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False): # FIXME should whitelist globals for security? globals_dict = {} - expr_values = to_series(eval(expression, globals_dict, _locals_dict)) + with performance_timer.time_expression(expression): + expr_values = to_series(eval(expression, globals_dict, _locals_dict)) if sharrow_enabled: if isinstance(expr_values.dtype, pd.api.types.CategoricalDtype): @@ -430,4 +440,5 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False): inplace=True, ) + performance_timer.write_log(state) return variables, trace_results, trace_assigned_locals diff --git a/activitysim/core/configuration/base.py b/activitysim/core/configuration/base.py index ec27fee994..9ce7f38555 100644 --- a/activitysim/core/configuration/base.py +++ b/activitysim/core/configuration/base.py @@ -234,6 +234,15 @@ def pandas_option_context(self): else: yield + performance_log: Path | None = None + """Log runtime performance to this file. + + The runtime performance log shows the time taken to evaluate each + expression in the specification files. It is useful for debugging + performance issues with complex expressions. + + If set to None (the default), no performance logging will be done.""" + def subcomponent_settings(self, subcomponent: str) -> ComputeSettings: """Get the sharrow settings for a particular subcomponent.""" return ComputeSettings( diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index 024f878a46..fa5978a6fc 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -537,6 +537,26 @@ def _check_store_skims_in_shm(self): own that pollutes the collected data. """ + expression_profile: bool | None = None + """ + Track the runtime of each individual expression in each spec file. + + .. versionadded:: 1.4 + + The default value of this setting is `None`, which sets no global control + of expression profiling, and allows this feature to be turned on or off + for individual components. If set to `True`, all components will have + expression profiling enabled, and the outputs will be written to files named + based on the trace label unless explicitly set in the `compute_settings` for + individual components. If set to `False`, all components will have expression + profiling disabled. + + This is generally a developer-only feature and not needed for regular usage. + It will add some overhead to the model run, which is only valuable if you + expect the review the expression runtimes with an eye towards improving them. + Production model users should typically have this set explicitly to `False`. + """ + benchmarking: bool = False """ Flag this model run as a benchmarking run. diff --git a/activitysim/core/expressions.py b/activitysim/core/expressions.py index 413636d3f4..9d21d39139 100644 --- a/activitysim/core/expressions.py +++ b/activitysim/core/expressions.py @@ -139,6 +139,7 @@ def compute_columns( df, _locals_dict, trace_rows=state.tracing.trace_targets(df), + trace_label=trace_label, ) if trace_results is not None: diff --git a/activitysim/core/interaction_simulate.py b/activitysim/core/interaction_simulate.py index 1089de3edc..6d9f2a86d6 100644 --- a/activitysim/core/interaction_simulate.py +++ b/activitysim/core/interaction_simulate.py @@ -7,12 +7,13 @@ from builtins import zip from collections import OrderedDict from datetime import timedelta +from pathlib import Path from typing import Mapping import numpy as np import pandas as pd -from activitysim.core import chunk, logit, simulate, tracing, util, workflow +from activitysim.core import chunk, logit, simulate, timing, tracing, util, workflow from activitysim.core.configuration.base import ComputeSettings logger = logging.getLogger(__name__) @@ -262,6 +263,18 @@ def to_series(x): exprs = spec.index labels = spec.index + # init a performance timer if needed + if ( + state.settings.expression_profile + and compute_settings.performance_log is None + ): + perf_log_file = Path(trace_label + ".log") + elif state.settings.expression_profile is False: + perf_log_file = None + else: + perf_log_file = compute_settings.performance_log + performance_timer = timing.EvalTiming(perf_log_file) + with compute_settings.pandas_option_context(): for expr, label, coefficient in zip(exprs, labels, spec.iloc[:, 0]): try: @@ -269,7 +282,8 @@ def to_series(x): if expr.startswith("_"): target = expr[: expr.index("@")] rhs = expr[expr.index("@") + 1 :] - v = to_series(eval(rhs, globals(), locals_d)) + with performance_timer.time_expression(expr): + v = to_series(eval(rhs, globals(), locals_d)) # update locals to allows us to ref previously assigned targets locals_d[target] = v @@ -284,10 +298,11 @@ def to_series(x): # they have a non-zero dummy coefficient to avoid being removed from spec as NOPs continue - if expr.startswith("@"): - v = to_series(eval(expr[1:], globals(), locals_d)) - else: - v = df.eval(expr, resolvers=[locals_d]) + with performance_timer.time_expression(expr): + if expr.startswith("@"): + v = to_series(eval(expr[1:], globals(), locals_d)) + else: + v = df.eval(expr, resolvers=[locals_d]) if check_for_variability and v.std() == 0: logger.info( @@ -402,6 +417,7 @@ def to_series(x): trace_label, "eval.trace_eval_results", trace_eval_results ) + performance_timer.write_log(state) chunk_sizer.log_df(trace_label, "v", None) chunk_sizer.log_df( trace_label, "eval.utilities", None diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py index fcffcf6066..7c8b09e937 100644 --- a/activitysim/core/simulate.py +++ b/activitysim/core/simulate.py @@ -21,6 +21,7 @@ configuration, logit, pathbuilder, + timing, tracing, util, workflow, @@ -627,6 +628,17 @@ def eval_utilities( if utilities is None or estimator or sharrow_enabled == "test": trace_label = tracing.extend_trace_label(trace_label, "eval_utils") + if ( + state.settings.expression_profile + and compute_settings.performance_log is None + ): + perf_log_file = Path(trace_label + ".log") + elif state.settings.expression_profile is False: + perf_log_file = None + else: + perf_log_file = compute_settings.performance_log + performance_timer = timing.EvalTiming(perf_log_file) + # avoid altering caller's passed-in locals_d parameter (they may be looping) locals_dict = assign.local_utilities(state) @@ -653,10 +665,13 @@ def eval_utilities( with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") - if expr.startswith("@"): - expression_value = eval(expr[1:], globals_dict, locals_dict) - else: - expression_value = choosers.eval(expr) + with performance_timer.time_expression(expr): + if expr.startswith("@"): + expression_value = eval( + expr[1:], globals_dict, locals_dict + ) + else: + expression_value = choosers.eval(expr) if len(w) > 0: for wrn in w: @@ -685,6 +700,7 @@ def eval_utilities( expression_values[i] = expression_value i += 1 + performance_timer.write_log(state) chunk_sizer.log_df(trace_label, "expression_values", expression_values) if estimator: @@ -846,7 +862,9 @@ def eval_utilities( return utilities -def eval_variables(state: workflow.State, exprs, df, locals_d=None): +def eval_variables( + state: workflow.State, exprs, df, locals_d=None, trace_label: str | None = None +): """ Evaluate a set of variable expressions from a spec in the context of a given data table. @@ -873,6 +891,9 @@ def eval_variables(state: workflow.State, exprs, df, locals_d=None): locals_d : Dict This is a dictionary of local variables that will be the environment for an evaluation of an expression that begins with @ + trace_label : str + The trace label to use for performance logging. If None, performance + logging is not activated. Returns ------- @@ -907,13 +928,20 @@ def to_array(x): return a + if state.settings.expression_profile and trace_label: + perf_log_file = Path(trace_label + ".log") + else: + perf_log_file = None + performance_timer = timing.EvalTiming(perf_log_file) + values = OrderedDict() for expr in exprs: try: - if expr.startswith("@"): - expr_values = to_array(eval(expr[1:], globals_dict, locals_dict)) - else: - expr_values = to_array(df.eval(expr)) + with performance_timer.time_expression(expr): + if expr.startswith("@"): + expr_values = to_array(eval(expr[1:], globals_dict, locals_dict)) + else: + expr_values = to_array(df.eval(expr)) # read model spec should ensure uniqueness, otherwise we should uniquify assert expr not in values values[expr] = expr_values diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py new file mode 100644 index 0000000000..dd1b014e12 --- /dev/null +++ b/activitysim/core/timing.py @@ -0,0 +1,185 @@ +from __future__ import annotations + +from contextlib import contextmanager +from pathlib import Path +from time import time_ns +from typing import TYPE_CHECKING + +import pandas as pd + +from .util import si_units + +if TYPE_CHECKING: + from .workflow import State + + +class NoTiming: + """Class that does no timing, serves as the default. + + This class is kept as simple as possible to avoid unnecessary overhead when + no timing is requested. + """ + + @contextmanager + def time_expression(self, expression: str): + """Context manager to time an expression.""" + yield + + def write_log(self, state: State) -> None: + """Write the log to a file.""" + pass + + +class EvalTiming(NoTiming): + def __new__(cls, log: Path | None = None, **kwargs): + if log is None: + return NoTiming() + else: + return super().__new__(cls) + + def __init__(self, log: Path | None = None, *, overwrite: bool = False): + """ + Timing class to log the time taken to evaluate expressions. + + Parameters + ---------- + log : Path | None, default None + Path to the log file. If None, no logging is done. If this is an + absolute path, the log file is created there. If this is just a + simple filename or a relative path, the log file is created relative + in or relative to the usual logging directory. + overwrite : bool, default False + If True, overwrite the log file if it already exists. If False, + create a new log file with a unique name. + """ + self.log_file = log + self.overwrite = overwrite + self.elapsed_times = {} + + @contextmanager + def time_expression(self, expression: str): + """Context manager to time an expression. + + Parameters + ---------- + expression : str + The expression to be timed. This is used as the key in the log file. + """ + + # when performance logging is not enabled, do nothing + if self.log_file is None: + yield + return + + # when performance logging is enabled, we track the time it takes to evaluate + # the expression and store it + start_time = time_ns() + yield + end_time = time_ns() + elapsed_time = end_time - start_time + if expression in self.elapsed_times: + self.elapsed_times[expression] += elapsed_time + else: + self.elapsed_times[expression] = elapsed_time + + def write_log(self, state: State) -> None: + """Write the log to a file. + + Parameters + ---------- + state : State + The state object containing configuration information. This is used + to determine the path for the log file, when the log file is not + given as an absolute path. + """ + if self.log_file is None: + return + + if self.log_file.is_absolute(): + filename = self.log_file + else: + filename = state.get_log_file_path( + str(Path("expr-performance") / self.log_file) + ) + + # if the log file already exists and overwrite is false, create a new file + proposed_filename = filename + n = 0 + while not self.overwrite and proposed_filename.exists(): + n += 1 + proposed_filename = filename.with_stem(filename.stem + f"-{n}") + filename = proposed_filename + + # ensure the parent directory exists + filename.parent.mkdir(parents=True, exist_ok=True) + + # Determine the scale for the elapsed times. We want to use an appropriate + # timescale for the elapsed times, which provides useful information without + # reporting excessive precision. + # If the smallest elapsed time is greater than 1 second, use seconds. + # If the smallest elapsed time is greater than 1 millisecond, use milliseconds. + # Otherwise, use microseconds, no one should care about nanoseconds. + min_t = 1_000_000_000 + for t in self.elapsed_times.values(): + if t < min_t: + min_t = t + if min_t > 1_000_000_000: + scale = 1_000_000_000 + label = "Time (sec)" + elif min_t > 1_000_000: + scale = 1_000_000 + label = "Time (msec)" + else: + scale = 1_000 + label = "Time (µsec)" + + # The timing log is written in a tab-separated format, with times in the + # first column so they are easy to scan through for anomalies. + with open(filename, "w") as f: + f.write(f"{label:11}\tExpression\n") + for expression, elapsed_time in self.elapsed_times.items(): + t = int(elapsed_time / scale) + f.write(f"{t: 11d}\t{expression}\n") + + +class AnalyzeEvalTiming: + """ + Class to analyze the timing of expressions. + """ + + def __init__(self, state: State): + self.log_dir = state.get_log_file_path(str(Path("expr-performance"))) + raw_data = {} + for f in self.log_dir.glob("*.log"): + df = pd.read_csv(f, sep="\t") + if "(msec)" in df.columns[0]: + df.columns = ["Time (µsec)"] + df.columns[1:].tolist() + df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000 + elif "(sec)" in df.columns[0]: + df.columns = ["Time (µsec)"] + df.columns[1:].tolist() + df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000_000 + else: + df.iloc[:, 0] = df.iloc[:, 0].astype(int) + raw_data[str(f.stem)] = df + d = pd.concat(raw_data, names=["Component"]).reset_index() + self.data = d[["Time (µsec)", "Component", "Expression"]] + self.data.sort_values(by=["Time (µsec)"], ascending=[False], inplace=True) + + def to_html( + self, filename: str | Path = "expression-timing.html", cutoff_secs=0.1 + ) -> None: + """Write the data to an HTML file. + + Parameters + ---------- + filename : str | Path + The name of the file to write the HTML to. If a relative path is given, + it will be written in the log directory. + cutoff_secs : float + The cutoff time in seconds. Only expressions with a runtime greater than + this will be included in the HTML file. This is used to avoid writing a + huge report full of expressions that run plenty fast. + """ + self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6].to_html( + self.log_dir.joinpath(filename), index=False + ) From f1e19710c7c899db4ca7e603e50debfa4589a8b4 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Sat, 19 Apr 2025 13:11:58 -0500 Subject: [PATCH 2/7] two reports --- activitysim/cli/run.py | 4 +- activitysim/core/timing.py | 155 +++++++++++++++++++++++++++++++++++-- 2 files changed, 152 insertions(+), 7 deletions(-) diff --git a/activitysim/cli/run.py b/activitysim/cli/run.py index f910702155..2b924f3408 100644 --- a/activitysim/cli/run.py +++ b/activitysim/cli/run.py @@ -426,7 +426,9 @@ def run(args): if state.settings.expression_profile: # generate a summary of slower expression evaluation times # across all models and write to a file - timing.AnalyzeEvalTiming(state).to_html() + analyze = timing.AnalyzeEvalTiming(state) + analyze.component_report() + analyze.subcomponent_report() return 0 diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py index dd1b014e12..edead45a88 100644 --- a/activitysim/core/timing.py +++ b/activitysim/core/timing.py @@ -3,7 +3,7 @@ from contextlib import contextmanager from pathlib import Path from time import time_ns -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Literal import pandas as pd @@ -142,6 +142,24 @@ def write_log(self, state: State) -> None: f.write(f"{t: 11d}\t{expression}\n") +def write_sortable_table(df: pd.DataFrame, filename: str | Path) -> None: + html_table = df.to_html(classes="sortable", index=False) + html_content = f""" + + + + + + + {html_table} + + + + """ + with open(filename, "w") as f: + f.write(html_content) + + class AnalyzeEvalTiming: """ Class to analyze the timing of expressions. @@ -162,11 +180,18 @@ def __init__(self, state: State): df.iloc[:, 0] = df.iloc[:, 0].astype(int) raw_data[str(f.stem)] = df d = pd.concat(raw_data, names=["Component"]).reset_index() - self.data = d[["Time (µsec)", "Component", "Expression"]] + + # break trace labels into components and subcomponents + d["Subcomponent"] = d["Component"].str.split(".", 1).str[1] + d["Component"] = d["Component"].str.split(".", 1).str[0] + self.data = d[["Time (µsec)", "Component", "Subcomponent", "Expression"]] + self.data.sort_values(by=["Time (µsec)"], ascending=[False], inplace=True) - def to_html( - self, filename: str | Path = "expression-timing.html", cutoff_secs=0.1 + def subcomponent_report( + self, + filename: str | Path = "expression-timing-subcomponents.html", + cutoff_secs=0.1, ) -> None: """Write the data to an HTML file. @@ -180,6 +205,124 @@ def to_html( this will be included in the HTML file. This is used to avoid writing a huge report full of expressions that run plenty fast. """ - self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6].to_html( - self.log_dir.joinpath(filename), index=False + + # include only expressions that took longer than cutoff_secs + df = self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6] + + # convert the time to seconds + df["Time (µsec)"] /= 1e6 + df.rename(columns={"Time (µsec)": "Time (sec)"}, inplace=True) + + # format and write the report to HTML + df = ( + df.style.format( + { + "Time (sec)": lambda x: f"{x:.3f}", + } + ) + .background_gradient( + axis=0, gmap=df["Time (sec)"], cmap="YlOrRd", subset=["Time (sec)"] + ) + .hide(axis="index") + .set_table_styles([{"selector": "th", "props": [("text-align", "left")]}]) + .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"]) + ) + df.to_html(self.log_dir.joinpath(filename), index=False) + + def component_report_data(self, cutoff_secs: float = 0.1): + """ + Return the data for the component report. + """ + df = ( + self.data.groupby(["Component", "Expression"]) + .agg({"Time (µsec)": "sum"}) + .reset_index()[["Time (µsec)", "Component", "Expression"]] + .sort_values(by=["Time (µsec)"], ascending=[False]) ) + + # include only expressions that took longer than cutoff_secs + df = df[df["Time (µsec)"] >= cutoff_secs * 1e6] + + # convert the time to seconds + df["Time (µsec)"] /= 1e6 + df.rename(columns={"Time (µsec)": "Time (sec)"}, inplace=True) + return df + + def component_report( + self, + filename: str | Path = "expression-timing-components.html", + cutoff_secs=0.1, + style: Literal["simple", "grid"] = "simple", + ) -> None: + """Write component-level aggregations to an HTML file. + + This will aggregate the expression timings by component, which may better + reveal expressions that are more problematic because they are evaluated + multiple times. + + Parameters + ---------- + filename : str | Path + The name of the file to write the HTML to. If a relative path is given, + it will be written in the log directory. + cutoff_secs : float + The cutoff time in seconds. Only expressions with a runtime greater than + this will be included in the HTML file. This is used to avoid writing a + huge report full of expressions that run plenty fast. + style : "simple" | "grid", default "simple" + The style of the report. Either "simple" or "grid". "simple" is a + simple HTML table, "grid" is a JavaScript data grid. + """ + + df = self.component_report_data(cutoff_secs=cutoff_secs) + + if style == "simple": + # format and write the report to HTML in a simple table + df = ( + df.style.format( + { + "Time (sec)": lambda x: f"{x:.3f}", + } + ) + .background_gradient( + axis=0, gmap=df["Time (sec)"], cmap="YlOrRd", subset=["Time (sec)"] + ) + .hide(axis="index") + .set_table_styles( + [{"selector": "th", "props": [("text-align", "left")]}] + ) + .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"]) + ) + df.to_html(self.log_dir.joinpath(filename), index=False) + elif style == "grid": + template = """ + + + + + + +
+ + + + """ + with open(self.log_dir.joinpath(filename), "w") as f: + f.write(template.replace("<>", df.to_json(orient="records"))) + else: + raise ValueError(f"Unknown style {style}. Must be 'simple' or 'grid'.") From 770b3363f25a359bbabebcac0510575fe6146618 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Sat, 19 Apr 2025 13:33:04 -0500 Subject: [PATCH 3/7] make simple report sortable --- activitysim/core/timing.py | 342 ++++++++++++++++++++++++++++++++++++- 1 file changed, 341 insertions(+), 1 deletion(-) diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py index edead45a88..333ab32ae3 100644 --- a/activitysim/core/timing.py +++ b/activitysim/core/timing.py @@ -293,7 +293,28 @@ def component_report( ) .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"]) ) - df.to_html(self.log_dir.joinpath(filename), index=False) + dat = df.to_html(index=False) + dat = dat.replace(" + + + + + Expression Timing Report + + + + {dat} + + """ + with open(self.log_dir.joinpath(filename), "w") as f: + f.write(template) elif style == "grid": template = """ @@ -326,3 +347,322 @@ def component_report( f.write(template.replace("<>", df.to_json(orient="records"))) else: raise ValueError(f"Unknown style {style}. Must be 'simple' or 'grid'.") + + +# Code below is from https://github.com/HubSpot/sortable +# Copyright (C) 2013 Adam Schwartz, http://adamschwartz.co +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + + +SORTABLE_JS = """ +(function() { + var SELECTOR, addEventListener, clickEvents, numberRegExp, sortable, touchDevice, trimRegExp; + + SELECTOR = 'table[data-sortable]'; + + numberRegExp = /^-?[£$¤]?[\d,.]+%?$/; + + trimRegExp = /^\s+|\s+$/g; + + clickEvents = ['click']; + + touchDevice = 'ontouchstart' in document.documentElement; + + if (touchDevice) { + clickEvents.push('touchstart'); + } + + addEventListener = function(el, event, handler) { + if (el.addEventListener != null) { + return el.addEventListener(event, handler, false); + } else { + return el.attachEvent("on" + event, handler); + } + }; + + sortable = { + init: function(options) { + var table, tables, _i, _len, _results; + if (options == null) { + options = {}; + } + if (options.selector == null) { + options.selector = SELECTOR; + } + tables = document.querySelectorAll(options.selector); + _results = []; + for (_i = 0, _len = tables.length; _i < _len; _i++) { + table = tables[_i]; + _results.push(sortable.initTable(table)); + } + return _results; + }, + initTable: function(table) { + var i, th, ths, _i, _len, _ref; + if (((_ref = table.tHead) != null ? _ref.rows.length : void 0) !== 1) { + return; + } + if (table.getAttribute('data-sortable-initialized') === 'true') { + return; + } + table.setAttribute('data-sortable-initialized', 'true'); + ths = table.querySelectorAll('th'); + for (i = _i = 0, _len = ths.length; _i < _len; i = ++_i) { + th = ths[i]; + if (th.getAttribute('data-sortable') !== 'false') { + sortable.setupClickableTH(table, th, i); + } + } + return table; + }, + setupClickableTH: function(table, th, i) { + var eventName, onClick, type, _i, _len, _results; + type = sortable.getColumnType(table, i); + onClick = function(e) { + var compare, item, newSortedDirection, position, row, rowArray, sorted, sortedDirection, tBody, ths, value, _compare, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _ref, _ref1; + if (e.handled !== true) { + e.handled = true; + } else { + return false; + } + sorted = this.getAttribute('data-sorted') === 'true'; + sortedDirection = this.getAttribute('data-sorted-direction'); + if (sorted) { + newSortedDirection = sortedDirection === 'ascending' ? 'descending' : 'ascending'; + } else { + newSortedDirection = type.defaultSortDirection; + } + ths = this.parentNode.querySelectorAll('th'); + for (_i = 0, _len = ths.length; _i < _len; _i++) { + th = ths[_i]; + th.setAttribute('data-sorted', 'false'); + th.removeAttribute('data-sorted-direction'); + } + this.setAttribute('data-sorted', 'true'); + this.setAttribute('data-sorted-direction', newSortedDirection); + tBody = table.tBodies[0]; + rowArray = []; + if (!sorted) { + if (type.compare != null) { + _compare = type.compare; + } else { + _compare = function(a, b) { + return b - a; + }; + } + compare = function(a, b) { + if (a[0] === b[0]) { + return a[2] - b[2]; + } + if (type.reverse) { + return _compare(b[0], a[0]); + } else { + return _compare(a[0], b[0]); + } + }; + _ref = tBody.rows; + for (position = _j = 0, _len1 = _ref.length; _j < _len1; position = ++_j) { + row = _ref[position]; + value = sortable.getNodeValue(row.cells[i]); + if (type.comparator != null) { + value = type.comparator(value); + } + rowArray.push([value, row, position]); + } + rowArray.sort(compare); + for (_k = 0, _len2 = rowArray.length; _k < _len2; _k++) { + row = rowArray[_k]; + tBody.appendChild(row[1]); + } + } else { + _ref1 = tBody.rows; + for (_l = 0, _len3 = _ref1.length; _l < _len3; _l++) { + item = _ref1[_l]; + rowArray.push(item); + } + rowArray.reverse(); + for (_m = 0, _len4 = rowArray.length; _m < _len4; _m++) { + row = rowArray[_m]; + tBody.appendChild(row); + } + } + if (typeof window['CustomEvent'] === 'function') { + return typeof table.dispatchEvent === "function" ? table.dispatchEvent(new CustomEvent('Sortable.sorted', { + bubbles: true + })) : void 0; + } + }; + _results = []; + for (_i = 0, _len = clickEvents.length; _i < _len; _i++) { + eventName = clickEvents[_i]; + _results.push(addEventListener(th, eventName, onClick)); + } + return _results; + }, + getColumnType: function(table, i) { + var row, specified, text, type, _i, _j, _len, _len1, _ref, _ref1, _ref2; + specified = (_ref = table.querySelectorAll('th')[i]) != null ? _ref.getAttribute('data-sortable-type') : void 0; + if (specified != null) { + return sortable.typesObject[specified]; + } + _ref1 = table.tBodies[0].rows; + for (_i = 0, _len = _ref1.length; _i < _len; _i++) { + row = _ref1[_i]; + text = sortable.getNodeValue(row.cells[i]); + _ref2 = sortable.types; + for (_j = 0, _len1 = _ref2.length; _j < _len1; _j++) { + type = _ref2[_j]; + if (type.match(text)) { + return type; + } + } + } + return sortable.typesObject.alpha; + }, + getNodeValue: function(node) { + var dataValue; + if (!node) { + return ''; + } + dataValue = node.getAttribute('data-value'); + if (dataValue !== null) { + return dataValue; + } + if (typeof node.innerText !== 'undefined') { + return node.innerText.replace(trimRegExp, ''); + } + return node.textContent.replace(trimRegExp, ''); + }, + setupTypes: function(types) { + var type, _i, _len, _results; + sortable.types = types; + sortable.typesObject = {}; + _results = []; + for (_i = 0, _len = types.length; _i < _len; _i++) { + type = types[_i]; + _results.push(sortable.typesObject[type.name] = type); + } + return _results; + } + }; + + sortable.setupTypes([ + { + name: 'numeric', + defaultSortDirection: 'descending', + match: function(a) { + return a.match(numberRegExp); + }, + comparator: function(a) { + return parseFloat(a.replace(/[^0-9.-]/g, ''), 10) || 0; + } + }, { + name: 'date', + defaultSortDirection: 'ascending', + reverse: true, + match: function(a) { + return !isNaN(Date.parse(a)); + }, + comparator: function(a) { + return Date.parse(a) || 0; + } + }, { + name: 'alpha', + defaultSortDirection: 'ascending', + match: function() { + return true; + }, + compare: function(a, b) { + return a.localeCompare(b); + } + } + ]); + + setTimeout(sortable.init, 0); + + if (typeof define === 'function' && define.amd) { + define(function() { + return sortable; + }); + } else if (typeof exports !== 'undefined') { + module.exports = sortable; + } else { + window.Sortable = sortable; + } + +}).call(this); +""" + +SORTABLE_CSS = """ +/* line 2, ../sass/_sortable.sass */ +table[data-sortable] { + border-collapse: collapse; + border-spacing: 0; +} +/* line 6, ../sass/_sortable.sass */ +table[data-sortable] th { + vertical-align: bottom; + font-weight: bold; +} +/* line 10, ../sass/_sortable.sass */ +table[data-sortable] th, table[data-sortable] td { + text-align: left; + padding: 10px; +} +/* line 14, ../sass/_sortable.sass */ +table[data-sortable] th:not([data-sortable="false"]) { + -webkit-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + -o-user-select: none; + user-select: none; + -webkit-tap-highlight-color: rgba(0, 0, 0, 0); + -webkit-touch-callout: none; + cursor: pointer; +} +/* line 26, ../sass/_sortable.sass */ +table[data-sortable] th:after { + content: ""; + visibility: hidden; + display: inline-block; + vertical-align: inherit; + height: 0; + width: 0; + border-width: 5px; + border-style: solid; + border-color: transparent; + margin-right: 1px; + margin-left: 10px; + float: right; +} +/* line 40, ../sass/_sortable.sass */ +table[data-sortable] th[data-sorted="true"]:after { + visibility: visible; +} +/* line 43, ../sass/_sortable.sass */ +table[data-sortable] th[data-sorted-direction="descending"]:after { + border-top-color: inherit; + margin-top: 8px; +} +/* line 47, ../sass/_sortable.sass */ +table[data-sortable] th[data-sorted-direction="ascending"]:after { + border-bottom-color: inherit; + margin-top: 3px; +}""" From 136a12c586e72d2fe0c683000ebdd663754ac64c Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Sat, 19 Apr 2025 13:47:54 -0500 Subject: [PATCH 4/7] change default to grid --- activitysim/core/timing.py | 91 +++++++++++++++++++++++++++++++------- 1 file changed, 75 insertions(+), 16 deletions(-) diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py index 333ab32ae3..140b0bacd7 100644 --- a/activitysim/core/timing.py +++ b/activitysim/core/timing.py @@ -192,6 +192,7 @@ def subcomponent_report( self, filename: str | Path = "expression-timing-subcomponents.html", cutoff_secs=0.1, + style: Literal["grid", "simple"] = "grid", ) -> None: """Write the data to an HTML file. @@ -204,6 +205,9 @@ def subcomponent_report( The cutoff time in seconds. Only expressions with a runtime greater than this will be included in the HTML file. This is used to avoid writing a huge report full of expressions that run plenty fast. + style : "simple" | "grid", default "simple" + The style of the report. Either "simple" or "grid". "simple" is a + simple HTML table, "grid" is a JavaScript data grid. """ # include only expressions that took longer than cutoff_secs @@ -213,21 +217,76 @@ def subcomponent_report( df["Time (µsec)"] /= 1e6 df.rename(columns={"Time (µsec)": "Time (sec)"}, inplace=True) - # format and write the report to HTML - df = ( - df.style.format( - { - "Time (sec)": lambda x: f"{x:.3f}", - } - ) - .background_gradient( - axis=0, gmap=df["Time (sec)"], cmap="YlOrRd", subset=["Time (sec)"] + if style == "simple": + # format and write the report to HTML + df = ( + df.style.format( + { + "Time (sec)": lambda x: f"{x:.3f}", + } + ) + .background_gradient( + axis=0, gmap=df["Time (sec)"], cmap="YlOrRd", subset=["Time (sec)"] + ) + .hide(axis="index") + .set_table_styles( + [{"selector": "th", "props": [("text-align", "left")]}] + ) + .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"]) ) - .hide(axis="index") - .set_table_styles([{"selector": "th", "props": [("text-align", "left")]}]) - .set_properties(**{"padding": "0 5px"}, subset=["Time (sec)"]) - ) - df.to_html(self.log_dir.joinpath(filename), index=False) + dat = df.to_html(index=False) + dat = dat.replace("
+ + + + + Expression Timing Report + + + + {dat} + + """ + with open(self.log_dir.joinpath(filename), "w") as f: + f.write(template) + elif style == "grid": + template = """ + + + + + + +
+ + + + """ + with open(self.log_dir.joinpath(filename), "w") as f: + f.write(template.replace("<>", df.to_json(orient="records"))) def component_report_data(self, cutoff_secs: float = 0.1): """ @@ -252,7 +311,7 @@ def component_report( self, filename: str | Path = "expression-timing-components.html", cutoff_secs=0.1, - style: Literal["simple", "grid"] = "simple", + style: Literal["grid", "simple"] = "grid", ) -> None: """Write component-level aggregations to an HTML file. @@ -333,7 +392,7 @@ def component_report( columnDefs: [ { field: "Time (sec)", flex: 1, sort: "desc" }, { field: "Component", flex: 2, filter: true }, - { field: "Expression", flex: 7, filter: true, sortable: false, wrapText: true, autoHeight: true } + { field: "Expression", flex: 7, filter: true, sortable: false, wrapText: true, autoHeight: true, editable: true } ] }; // Your Javascript code to create the Data Grid From 642b4860236f701cbb03b310398f01d15516685c Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Sat, 19 Apr 2025 13:56:01 -0500 Subject: [PATCH 5/7] configurable expression_profile_cutoff --- activitysim/core/configuration/top.py | 15 +++++++++++++++ activitysim/core/timing.py | 25 ++++++++++++++++++++----- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/activitysim/core/configuration/top.py b/activitysim/core/configuration/top.py index fa5978a6fc..2f3363e7a8 100644 --- a/activitysim/core/configuration/top.py +++ b/activitysim/core/configuration/top.py @@ -557,6 +557,21 @@ def _check_store_skims_in_shm(self): Production model users should typically have this set explicitly to `False`. """ + expression_profile_cutoff: float = 0.1 + """ + Minimum runtime for an expression to be included in the expression profile. + + .. versionadded:: 1.4 + + Expressions that take less than this amount of time to evaluate will not be + included in the summary report of expression profiling generated at the end + of a model run. For large scale models, this value can be increased to make + the report file smaller, as only the largest values will typically be of + interest. + + This setting has no effect if :py:attr:`expression_profile` is not `True`. + """ + benchmarking: bool = False """ Flag this model run as a benchmarking run. diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py index 140b0bacd7..2d1daf331b 100644 --- a/activitysim/core/timing.py +++ b/activitysim/core/timing.py @@ -167,6 +167,7 @@ class AnalyzeEvalTiming: def __init__(self, state: State): self.log_dir = state.get_log_file_path(str(Path("expr-performance"))) + self.default_cutoff = state.settings.expression_profile_cutoff raw_data = {} for f in self.log_dir.glob("*.log"): df = pd.read_csv(f, sep="\t") @@ -191,7 +192,7 @@ def __init__(self, state: State): def subcomponent_report( self, filename: str | Path = "expression-timing-subcomponents.html", - cutoff_secs=0.1, + cutoff_secs: float | None = None, style: Literal["grid", "simple"] = "grid", ) -> None: """Write the data to an HTML file. @@ -201,14 +202,17 @@ def subcomponent_report( filename : str | Path The name of the file to write the HTML to. If a relative path is given, it will be written in the log directory. - cutoff_secs : float + cutoff_secs : float, optional The cutoff time in seconds. Only expressions with a runtime greater than this will be included in the HTML file. This is used to avoid writing a - huge report full of expressions that run plenty fast. + huge report full of expressions that run plenty fast. If not provided, + the default cutoff time from the settings is used. style : "simple" | "grid", default "simple" The style of the report. Either "simple" or "grid". "simple" is a simple HTML table, "grid" is a JavaScript data grid. """ + if cutoff_secs is None: + cutoff_secs = self.default_cutoff # include only expressions that took longer than cutoff_secs df = self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6] @@ -288,10 +292,21 @@ def subcomponent_report( with open(self.log_dir.joinpath(filename), "w") as f: f.write(template.replace("<>", df.to_json(orient="records"))) - def component_report_data(self, cutoff_secs: float = 0.1): + def component_report_data(self, cutoff_secs: float | None = None): """ Return the data for the component report. + + Parameters + ---------- + cutoff_secs : float, optional + The cutoff time in seconds. Only expressions with a runtime greater than + this will be included in the report. This is used to avoid writing a + huge report full of expressions that run plenty fast. If not provided, + the default cutoff time from the settings is used. """ + if cutoff_secs is None: + cutoff_secs = self.default_cutoff + df = ( self.data.groupby(["Component", "Expression"]) .agg({"Time (µsec)": "sum"}) @@ -310,7 +325,7 @@ def component_report_data(self, cutoff_secs: float = 0.1): def component_report( self, filename: str | Path = "expression-timing-components.html", - cutoff_secs=0.1, + cutoff_secs: float | None = None, style: Literal["grid", "simple"] = "grid", ) -> None: """Write component-level aggregations to an HTML file. From 877f279ff0ae2583c0da695fb6d096c6db4b9503 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Sat, 19 Apr 2025 18:50:51 -0500 Subject: [PATCH 6/7] works with MP --- activitysim/core/timing.py | 62 +++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/activitysim/core/timing.py b/activitysim/core/timing.py index 2d1daf331b..8c38827b0f 100644 --- a/activitysim/core/timing.py +++ b/activitysim/core/timing.py @@ -165,29 +165,53 @@ class AnalyzeEvalTiming: Class to analyze the timing of expressions. """ - def __init__(self, state: State): + def _read_log(self, log_file: Path) -> pd.DataFrame: + """Read the log file and return a DataFrame.""" + df = pd.read_csv(log_file, sep="\t") + if "(msec)" in df.columns[0]: + df.columns = ["Time (µsec)"] + df.columns[1:].tolist() + df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000 + elif "(sec)" in df.columns[0]: + df.columns = ["Time (µsec)"] + df.columns[1:].tolist() + df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000_000 + else: + df.iloc[:, 0] = df.iloc[:, 0].astype(int) + return df + + def __init__(self, state: State, collect_mp: bool = True) -> None: self.log_dir = state.get_log_file_path(str(Path("expr-performance"))) self.default_cutoff = state.settings.expression_profile_cutoff raw_data = {} for f in self.log_dir.glob("*.log"): - df = pd.read_csv(f, sep="\t") - if "(msec)" in df.columns[0]: - df.columns = ["Time (µsec)"] + df.columns[1:].tolist() - df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000 - elif "(sec)" in df.columns[0]: - df.columns = ["Time (µsec)"] + df.columns[1:].tolist() - df.iloc[:, 0] = df.iloc[:, 0].astype(int) * 1_000_000 - else: - df.iloc[:, 0] = df.iloc[:, 0].astype(int) - raw_data[str(f.stem)] = df - d = pd.concat(raw_data, names=["Component"]).reset_index() + raw_data[str(f.stem)] = self._read_log(f) + + if raw_data: + d = pd.concat(raw_data, names=["Component"]).reset_index() + d["Proc"] = "main" + else: + d = None + + if collect_mp: + raw_data = {} + mp_log_dirs = state.get_log_file_path(".").glob("*-expr-performance") + for mp_log_dir in mp_log_dirs: + subproc_name = "-".join(mp_log_dir.stem.split("-")[:-2]) + for f in mp_log_dir.glob("*.log"): + raw_data[subproc_name, str(f.stem)] = self._read_log(f) + if raw_data: + d_mp = pd.concat(raw_data, names=["Proc", "Component"]).reset_index() + if d is None: + d = d_mp + else: + d = pd.concat([d, d_mp]) # break trace labels into components and subcomponents d["Subcomponent"] = d["Component"].str.split(".", 1).str[1] d["Component"] = d["Component"].str.split(".", 1).str[0] - self.data = d[["Time (µsec)", "Component", "Subcomponent", "Expression"]] - - self.data.sort_values(by=["Time (µsec)"], ascending=[False], inplace=True) + self.data = d[ + ["Time (µsec)", "Proc", "Component", "Subcomponent", "Expression"] + ] + self.data = self.data.sort_values(by=["Time (µsec)"], ascending=[False]) def subcomponent_report( self, @@ -215,11 +239,11 @@ def subcomponent_report( cutoff_secs = self.default_cutoff # include only expressions that took longer than cutoff_secs - df = self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6] + df = self.data[self.data["Time (µsec)"] >= cutoff_secs * 1e6].copy() # convert the time to seconds df["Time (µsec)"] /= 1e6 - df.rename(columns={"Time (µsec)": "Time (sec)"}, inplace=True) + df = df.rename(columns={"Time (µsec)": "Time (sec)"}) if style == "simple": # format and write the report to HTML @@ -258,6 +282,7 @@ def subcomponent_report( {dat} """ + self.log_dir.joinpath(filename).parent.mkdir(parents=True, exist_ok=True) with open(self.log_dir.joinpath(filename), "w") as f: f.write(template) elif style == "grid": @@ -289,6 +314,7 @@ def subcomponent_report( """ + self.log_dir.joinpath(filename).parent.mkdir(parents=True, exist_ok=True) with open(self.log_dir.joinpath(filename), "w") as f: f.write(template.replace("<>", df.to_json(orient="records"))) @@ -387,6 +413,7 @@ def component_report( {dat} """ + self.log_dir.joinpath(filename).parent.mkdir(parents=True, exist_ok=True) with open(self.log_dir.joinpath(filename), "w") as f: f.write(template) elif style == "grid": @@ -417,6 +444,7 @@ def component_report( """ + self.log_dir.joinpath(filename).parent.mkdir(parents=True, exist_ok=True) with open(self.log_dir.joinpath(filename), "w") as f: f.write(template.replace("<>", df.to_json(orient="records"))) else: From 25564f0a999fccadcc4ea19774a939fad8ef4468 Mon Sep 17 00:00:00 2001 From: Jeff Newman Date: Sat, 19 Apr 2025 19:28:49 -0500 Subject: [PATCH 7/7] documentation --- .../users-guide/performance/expr-profiling.md | 102 ++++++++++++++++++ docs/users-guide/performance/index.md | 1 + 2 files changed, 103 insertions(+) create mode 100644 docs/users-guide/performance/expr-profiling.md diff --git a/docs/users-guide/performance/expr-profiling.md b/docs/users-guide/performance/expr-profiling.md new file mode 100644 index 0000000000..09914983f1 --- /dev/null +++ b/docs/users-guide/performance/expr-profiling.md @@ -0,0 +1,102 @@ +# Expression Profiling + +Part of the appeal of ActivitySim is in its flexibility: it is possible to craft +a massive variety of mathematical forms and relationships through creative use +of the expressions found in most component spec files. But as we have all learned +from Spider-Man, with great power comes great responsibility. Users can write +arbitrary code in spec files, and the runtime performance of ActivitySim will +depend on the parsimony and efficiency of that code. + +Sometimes these spec files can be large, and it may be difficult to determine +simply by inspection which expressions in a given spec file are faster or slower. +ActivitySim now offers an expression-level profiling tool to assist in diagnosing +performance problems that arise from inefficient spec files. + +```{important} +At this time, +expression profiling only works for the evaluation of expressions in "legacy" mode. +It does not work in "sharrow" mode, as the compiled expressions run with sharrow +are not run in a serial fashion and are not able to be profiled in the same way. +``` + +## Profiling an Entire Model Run + +The simplest way to use the expression profiler is to set the +[`expression_profile`](activitysim.core.configuration.Settings.expression_profile) +configuration setting in the top level model settings (typically `settings.yaml`): + +```yaml +expression_profile: true +``` + +This will cause the profiler to be activated for all expressions in the model, +across all components. This includes expressions in the spec files, as well as +expressions in all preprocessors and annotators. Each time the expressions in +any spec file are evaluated, the profiler will record the time taken to evaluate +each expression. An "expr-performance" subdirectory will be created in the model's +logging directory, and a new log file will be created each time the expressions in +any spec file are evaluated. The file is named according to the `trace_label` found +where the expressions are being evaluated. It will include a list of all the evaluated +expressions from the spec file, along with the time taken to evaluate each expression. +For multi-processed models, each subprocess will create its own log file directory, +similar to the logging directory structure for the other model components. + +## Summary Outputs + +At the end of a model run where the `expression_profile` setting is active, +ActivitySim will also create a pair of summary files in the "expr-performance" +subdirectory. The first is named "expression-timing-subcomponents.html", +and contains a simple concatenation of the runtimes of +expressions in the various subcomponents stored in the log files, +filtered to only include expressions that tool a notable amount of time. +By default, this is set to 0.1 seconds, but can be changed by setting the +[`expression_profile_cutoff`](activitysim.core.configuration.Settings.expression_profile_cutoff) +configuration setting in the model settings. + +The second file, "expression-timing-components.html", shows an aggregate +summary of the runtimes for each expression, +aggregated across all the log files. The aggregation is by model component and +expression, so that this summary includes the total time taken to evaluate each +expression within each model component, recognizing that identical expressions +may be evaluated multiple times in different model subcomponents (e.g. across +different purposes, or tour numbers, etc.). This more aggregated summary is +typically the one that will be most useful for identifying expressions that +provide the most overall potential for performance improvement via streamlining. + +Users should note that the expression profiler is not a substitute for good coding +practices. It will not necessarily identify all performance problems, and it is not +able to suggest improvements to the expressions. It is simply a tool to help users +identify which expressions are taking the most time to evaluate, and therefore +which expressions are the best candidates for improvement. + +Also, users should understand that the expression profiler is not directly measuring the +computational complexity of the expressions, but rather the time taken to evaluate +the expressions. This time can be affected by a number of factors, including the +complexity of the expression, the size of the data being processed, and whether +there are other processes running on the machine at the same time competing for +resources. For multiprocessing model runs, those other processes may include +other the subprocesses of ActivitySim, which may lead to surprising results. + +There is also no adjustment made for parallelization of the expression evaluations. +For example, if the same expression is evaluated in parallel across 8 processes on +a machine with 8 cores, and each process takes 0.1 seconds to evaluate the expression, +the profiler will still show that the expression took 0.8 seconds to evaluate, even +though the total wall clock time taken to evaluate the expression across all processes +was only 0.1 seconds. + +Profiling expressions also adds some overhead to the model run, so users should +be careful about using the profiler in production runs. It is recommended turn off +the profiler in production runs, and only use it for debugging and development. + +## Profiling Individual Components + +The expression profiler can also be used to profile individual components, rather +than the entire model. This is done by setting the `compute_settings.performance_log` +attribute for the component in the model settings. This attribute can be set to the +filename where the profiler log file should be written, which will override +the default behavior of writing the log file to the "expr-performance" subdirectory. +This feature only works for components that are run in a single process, and which +have a `compute_settings` attribute. It is generally not recommended to use this +feature unless a specific component is suspected of having atypical performance +problems, as it will not provide the same summary reporting as profiling the entire +model. diff --git a/docs/users-guide/performance/index.md b/docs/users-guide/performance/index.md index 2f722f621d..ab807125e1 100644 --- a/docs/users-guide/performance/index.md +++ b/docs/users-guide/performance/index.md @@ -23,6 +23,7 @@ These techniques are the focus of this section. Chunking to Reduce Peak Memory Usage Compiling with Sharrow Skim Data Format + Expression Profiling ``` ## Checklist for Performance Tuning