From ccc2a6a1e08663488257cab06834f879b13e3060 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Wed, 5 Oct 2022 13:20:40 +0200
Subject: [PATCH 01/84] Move scripts for incremental benchmarking from the
 analyzer to the bench repo.

These scripts were written by Sarah Tilscher.

Co-authored-by: Sarah Tilscher <66023521+stilscher@users.noreply.github.com>
---
 .../incremental/benchmarking/efficiency.py    | 234 +++++++++++++++
 scripts/incremental/benchmarking/plot.py      | 144 +++++++++
 scripts/incremental/benchmarking/precision.py | 277 ++++++++++++++++++
 .../incremental/benchmarking/requirements.txt |   7 +
 scripts/incremental/benchmarking/stats.py     |  65 ++++
 scripts/incremental/benchmarking/utils.py     | 246 ++++++++++++++++
 .../incremental/build/build_compdb_zstd.sh    |   3 +
 7 files changed, 976 insertions(+)
 create mode 100644 scripts/incremental/benchmarking/efficiency.py
 create mode 100644 scripts/incremental/benchmarking/plot.py
 create mode 100644 scripts/incremental/benchmarking/precision.py
 create mode 100644 scripts/incremental/benchmarking/requirements.txt
 create mode 100644 scripts/incremental/benchmarking/stats.py
 create mode 100644 scripts/incremental/benchmarking/utils.py
 create mode 100755 scripts/incremental/build/build_compdb_zstd.sh
diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
new file mode 100644
index 000000000..c6abb95a0
--- /dev/null
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -0,0 +1,234 @@
+from pydriller import Repository, Git
+import utils
+import psutil
+import multiprocessing as mp
+import os
+import subprocess
+import itertools
+import shutil
+import json
+from datetime import datetime
+import sys
+import pandas as pd
+
+################################################################################
+# Usage: python3 incremental_smallcommits.py <full_path_analyzer_dir> <number_of_cores>
+# Executing the script will overwrite the directory 'result_efficiency' in the cwd.
+# The script for building the compilation database is assumed to be found in the analyzers script directory and the
+# config file is assumed to be found in the conf directory of the analyzers repository.
+# The single test runs are mapped to processors according to the coremapping. The one specified in the section below
+# should work for Intel machines, otherwise you might need to adapt it according to the description.
+if len(sys.argv) != 3:
+      print("Wrong number of parameters.\nUse script like this: python3 parallel_benchmarking.py <path to goblint directory> <number of processes>")
+      exit()
+result_dir    = os.path.join(os.getcwd(), 'result_efficiency')
+maxCLOC       = 50 # can be deactivated with None
+url           = "https://github.com/facebook/zstd"
+repo_name     = "zstd"
+build_compdb  = "build_compdb_zstd.sh"
+conf_base     = "zstd-race-baseline" # very minimal: "zstd-minimal"
+conf_incrpost = "zstd-race-incrpostsolver"
+begin         = datetime(2021,8,1)
+to            = datetime(2022,2,1) # minimal subset: datetime(2021,8,4)
+diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"]
+analyzer_dir  = sys.argv[1]
+only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables
+################################################################################
+try:
+    numcores = int(sys.argv[2])
+except ValueError:
+    print("Parameter should be a number.\nUse script like this: python3 parallel_benchmarking.py <absolute path to goblint directory> <number of processes>")
+    exit()
+avail_phys_cores = psutil.cpu_count(logical=False)
+allowedcores = avail_phys_cores - 1
+if not only_collect_results and numcores > allowedcores:
+    print("Not enough physical cores on this machine (exist: ", avail_phys_cores, " allowed: ", allowedcores, ")")
+    exit()
+# For equal load distribution, choose a processes to core mapping,
+# use only physical cores and have an equal number of processes per cache.
+# The layout of physical/logical cores and sharing of caches is machine dependent. To find out use: 'lscpu --all --extended'.
+# For our test server:
+coremapping1 = [i for i in range(numcores - numcores//2)]
+coremapping2 = [i for i in range(avail_phys_cores//2, avail_phys_cores//2 + numcores//2)]
+coremapping = [coremapping1[i//2] if i%2==0 else coremapping2[i//2] for i in range(len(coremapping1) + len(coremapping2))]
+################################################################################
+
+def filter_commits_false_pred(repo_path):
+    def pred(c):
+        relCLOC = utils.calculateRelCLOC(repo_path, c, diff_exclude)
+        return relCLOC == 0 or (maxCLOC is not None and relCLOC > maxCLOC)
+    return pred
+
+def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
+    count_analyzed = 0
+    count_skipped = 0
+    count_failed = 0
+    analyzed_commits = {}
+    repo_path = os.path.join(cwd, repo_name)
+
+    for commit in itertools.islice(itertools.filterfalse(filter_commits_false_pred(repo_path), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=cwd).traverse_commits()), from_c, to_c):
+        gr = Git(repo_path)
+
+        #print("\n" + commit.hash)
+        #print('changed LOC: ', commit.lines)
+        #print('merge commit: ', commit.merge)
+
+        # skip merge commits and commits that have no or less than maxCLOC of relevant code changes
+        relCLOC = utils.calculateRelCLOC(repo_path, commit, diff_exclude) # use this to filter commits by actually relevant changes
+        #print("relCLOC: ", relCLOC)
+        if relCLOC == 0 or (maxCLOC is not None and relCLOC > maxCLOC):
+            #print('Skip this commit: merge commit or too many relevant changed LOC')
+            count_skipped+=1
+            continue
+
+        # analyze
+        try_num = from_c + count_analyzed + count_failed + 1
+        outtry = os.path.join(outdir, str(try_num))
+        parent = gr.get_commit(commit.parents[0])
+        #print('Analyze this commit incrementally. #', try_num)
+
+        utils.reset_incremental_data(os.path.join(cwd, 'incremental_data'))
+        failed = True
+        try:
+            #print('Starting from parent', str(parent.hash), ".")
+            outparent = os.path.join(outtry, 'parent')
+            os.makedirs(outparent)
+            add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options)
+
+            #print('And now analyze', str(commit.hash), 'incrementally.')
+            outchild = os.path.join(outtry, 'child')
+            os.makedirs(outchild)
+            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options)
+
+            #print('And again incremental, this time with incremental postsolver')
+            outchildincrpost = os.path.join(outtry, 'child-incr-post')
+            os.makedirs(outchildincrpost)
+            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options)
+
+            #print('And again incremental, this time with incremental postsolver and reluctant')
+            outchildrel = os.path.join(outtry, 'child-rel')
+            os.makedirs(outchildrel)
+            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled']
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options)
+
+            count_analyzed+=1
+            failed = False
+        except subprocess.CalledProcessError as e:
+            print('Aborted because command ', e.cmd, 'failed.')
+            count_failed+=1
+        os.makedirs(outtry, exist_ok=True)
+        with open(os.path.join(outtry,'commit_properties.log'), "w+") as file:
+            json.dump({"hash": commit.hash, "parent_hash": parent.hash, "CLOC": commit.lines, "relCLOC": relCLOC, "failed": failed}, file)
+        analyzed_commits[try_num]=(str(commit.hash)[:6], relCLOC)
+
+    num_commits = count_analyzed + count_skipped + count_failed
+    print("\nCommits traversed in total: ", num_commits)
+    print("Analyzed: ", count_analyzed)
+    print("Failed: ", count_failed)
+    print("Skipped: ", count_skipped)
+
+def collect_data(outdir):
+    data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [],
+      utils.header_runtime_parent: [], utils.header_runtime_incr_child: [],
+      utils.header_runtime_incr_posts_child: [], utils.header_runtime_incr_posts_rel_child: [],
+      "Change in number of race warnings": []}
+    for t in os.listdir(outdir):
+        parentlog = os.path.join(outdir, t, 'parent', utils.analyzerlog)
+        childlog = os.path.join(outdir, t, 'child', utils.analyzerlog)
+        childpostslog = os.path.join(outdir, t, 'child-incr-post', utils.analyzerlog)
+        childpostsrellog = os.path.join(outdir, t, 'child-rel', utils.analyzerlog)
+        commit_prop_log = os.path.join(outdir, t, 'commit_properties.log')
+        t = int(t)
+        commit_prop = json.load(open(commit_prop_log, "r"))
+        data["Changed LOC"].append(commit_prop["CLOC"])
+        data["Relevant changed LOC"].append(commit_prop["relCLOC"])
+        data["Failed?"].append(commit_prop["failed"])
+        data["Commit"].append(commit_prop["hash"][:7])
+        if commit_prop["failed"] == True:
+            data[utils.header_runtime_parent].append(0)
+            data[utils.header_runtime_incr_child].append(0)
+            data[utils.header_runtime_incr_posts_child].append(0)
+            data[utils.header_runtime_incr_posts_rel_child].append(0)
+            data["Changed/Added/Removed functions"].append(0)
+            data["Change in number of race warnings"].append(0)
+            continue
+        parent_info = utils.extract_from_analyzer_log(parentlog)
+        child_info = utils.extract_from_analyzer_log(childlog)
+        child_posts_info = utils.extract_from_analyzer_log(childpostslog)
+        child_posts_rel_info = utils.extract_from_analyzer_log(childpostsrellog)
+        data["Changed/Added/Removed functions"].append(int(child_info["changed"]) + int(child_info["added"]) + int(child_info["removed"]))
+        data[utils.header_runtime_parent].append(float(parent_info["runtime"]))
+        data[utils.header_runtime_incr_child].append(float(child_info["runtime"]))
+        data[utils.header_runtime_incr_posts_child].append(float(child_posts_info["runtime"]))
+        data[utils.header_runtime_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"]))
+        data["Change in number of race warnings"].append(int(child_info["race_warnings"] - int(parent_info["race_warnings"])))
+    return data
+
+def runperprocess(core, from_c, to_c):
+    if not only_collect_results:
+        psutil.Process().cpu_affinity([core])
+    cwd  = os.getcwd()
+    outdir = os.path.join(cwd, 'out')
+    if not only_collect_results:
+        if os.path.exists(outdir) and os.path.isdir(outdir):
+          shutil.rmtree(outdir)
+        analyze_small_commits_in_repo(cwd, outdir, from_c, to_c)
+    data_set = collect_data(outdir)
+    df = pd.DataFrame(data_set)
+    #df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0])))
+    print(df)
+    df.to_csv('results.csv', sep =';')
+
+def analyze_chunks_of_commits_in_parallel():
+    processes = []
+
+    # calculate actual number of interesting commits up-front to allow for similar load distribution
+    iter = itertools.filterfalse(filter_commits_false_pred(os.path.join(os.getcwd(), repo_name)), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=os.getcwd()).traverse_commits())
+    num_commits = sum(1 for _ in iter)
+    print("Number of potentially interesting commits:", num_commits)
+    perprocess = num_commits // numcores if num_commits % numcores == 0 else num_commits // numcores + 1
+    print("Per process: " + str(perprocess))
+
+    for i in range(numcores):
+        dir = "process" + str(i)
+        if not only_collect_results:
+            os.mkdir(dir)
+        os.chdir(dir)
+        # run script
+        start = perprocess * i
+        end = perprocess * (i + 1) if i < numcores - 1 else num_commits
+        if not only_collect_results:
+            p = mp.Process(target=runperprocess, args=[coremapping[i], start, end])
+            p.start()
+            processes.append(p)
+            # time.sleep(random.randint(5,60)) # add random delay between process creation to try to reduce interference
+        else:
+            runperprocess(coremapping[i], start, end)
+        os.chdir(result_dir)
+
+    for p in processes:
+        p.join()
+
+def merge_results():
+    filename = "results.csv"
+    frames = []
+    for process_dir in os.listdir("."):
+        path = os.path.join(process_dir, filename)
+        if os.path.exists(path):
+            t = pd.read_csv(path, index_col=0, sep=";")
+            frames.append(t)
+    if len(frames) > 0:
+        df = pd.concat(frames)
+        #df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0])))
+        df.to_csv('total_results.csv', sep=";")
+
+
+if not only_collect_results:
+    os.mkdir(result_dir)
+os.chdir(result_dir)
+
+analyze_chunks_of_commits_in_parallel()
+merge_results()
diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
new file mode 100644
index 000000000..4720fd56a
--- /dev/null
+++ b/scripts/incremental/benchmarking/plot.py
@@ -0,0 +1,144 @@
+import utils
+import os
+import shutil
+
+def cummulative_distr_compare2(outdir, result_csv_filename):
+    num_bins = 2000
+    outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf"
+    outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf"
+    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
+
+    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child])
+    datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"}
+    dataincr = {"values": data[1], "label": "Incremental analysis of commit"}
+    utils.cummulative_distr_plot([datanonincr, dataincr], base, outfile_nonincr_vs_incr)
+
+    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
+    dataincr = {"values": data[0], "label": "Incremental analysis of commit"}
+    datarelincr = {"values": data[1], "label": "Reluctant incremental analysis of commit"}
+    utils.cummulative_distr_plot([dataincr, datarelincr], base, outfile_incr_vs_incrrel, logscale=True)
+
+def cummulative_distr_all3(outdir, result_csv_filename):
+    num_bins = 2000
+    outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf"
+    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
+
+    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
+    datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"}
+    dataincr = {"values": data[1], "label": "Incremental analysis of commit"}
+    datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"}
+    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
+
+def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
+    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
+
+    # plot incremental vs non-incremental
+    diff = df.loc[:,utils.header_runtime_parent] - df.loc[:,utils.header_runtime_incr_child]
+    utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr)
+
+    # plot reluctant vs. basic incremental
+    diff = df.loc[:,utils.header_runtime_incr_child] - df.loc[:,utils.header_runtime_incr_posts_rel_child]
+    utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel)
+
+def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
+    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
+
+    # plot incremental vs non-incremental
+    print(df[utils.header_runtime_incr_child].astype('float'))
+    diff = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float')
+    utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr)
+
+    # plot reluctant vs. basic incremental
+    diff = 1 - df.loc[:,utils.header_runtime_incr_posts_rel_child] / df.loc[:,utils.header_runtime_incr_child]
+    utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel)
+
+def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False):
+    df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges)
+    diff1 = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float')
+    diff2 = 1 - df[utils.header_runtime_incr_posts_child].astype('float') / df[utils.header_runtime_incr_child].astype('float')
+    diff3 = 1 - df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_incr_posts_child].astype('float')
+    diff4 = 1 - df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_parent].astype('float')
+    step = 0.01
+    for i, diff in enumerate([diff1,diff2,diff3,diff4]):
+        # output textwidth in latex with
+        # \usepackage{layouts}
+        # \printinunitsof{cm}\prntlen{\textwidth}
+        # \printinunitsof{in}\prntlen{\textwidth}
+        # -> 17.7917cm / 7.00697in
+        textwidth = 7
+        xlimleft = None
+        xlimright = 1.05
+        xlabel = "Relative speedup" if i==3 else None
+        ylabel = "\# Commits" if i==0 or i==3 else None
+        outfile = os.path.join(outdir, "efficiency_figure_" + str(i) + ".pgf")
+        if i == 0:
+            size = (textwidth/3+0.1, textwidth/4) # additional ylabel
+        elif i == 1:
+            xlimleft = -0.3
+            size = (textwidth/3-0.1/2, textwidth/4) # missing ylabel
+        elif i == 3:
+            size = (textwidth, textwidth/4)
+            xlimright = 1.02
+            step = 0.005
+        else:
+            size = (textwidth/3-0.1/2, textwidth/4) # missing ylabel
+        utils.hist_plot(diff, step, None, xlabel, ylabel, outfile,
+            size, xlim_left=xlimleft, xlim_right=xlimright, cutoffs=None)
+
+        # print statistics
+        for e in diff:
+            if (xlimleft and e < xlimleft) or (xlimright and e > xlimright):
+                print("excluded", e, "from efficiency figure", i)
+    diff1 = df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float')
+    diff2 = df[utils.header_runtime_incr_posts_child].astype('float') / df[utils.header_runtime_parent].astype('float')
+    diff3 = df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_parent].astype('float')
+    for n, diff in [("incr", diff1), ("+ incr postsolver", diff2), ("+ reluctant", diff3)]:
+        print("80% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.8) * 100, "%")
+        print("75% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.75) * 100, "%")
+
+
+def paper_precision_graph(results_precision, filename, outdir):
+    df = utils.get_data_from_json(os.path.join(results_precision, filename))
+
+    # Plot precision loss after x commits, where x is in {1, 2, 5, 10, 15}
+    lessprec1 = 'intermediate precision.1.precision.lessprec'
+    lessprec2 = 'intermediate precision.2.precision.lessprec'
+    lessprec5 = 'intermediate precision.5.precision.lessprec'
+    lessprec10 = 'intermediate precision.10.precision.lessprec'
+    lessprec15 = 'intermediate precision.15.precision.lessprec'
+    lessprecfinal = 'final precision.lessprec'
+    total1 = 'intermediate precision.1.precision.total'
+    total2 = 'intermediate precision.2.precision.total'
+    total5 = 'intermediate precision.5.precision.total'
+    total10 = 'intermediate precision.10.precision.total'
+    total15 = 'intermediate precision.15.precision.total'
+    totalfinal = 'final precision.total'
+
+    data = []
+    for i in range(len(df.index)):
+        x = [1,2,5,10,15,df.iloc[i]['length']]
+        vals = df.iloc[i][[lessprec1, lessprec2, lessprec5, lessprec10, lessprec15, lessprecfinal]].values
+        total = df.iloc[i][[total1, total2, total5, total10, total15, totalfinal]].values
+        x = [x[i] for i in range(len(x)) if vals[i] == vals[i]]
+        y = [vals[i] / total[i] for i in range(len(vals)) if vals[i] == vals[i] and total[i] == total[i]]
+        data.append((x,y))
+    halftextwidth = 3.3
+    size=(halftextwidth,halftextwidth*2/3)
+    utils.scatter_plot(data, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size)
+
+
+
+# efficiency plots
+results_efficiency = "result_efficiency"
+outdir = "figures"
+if os.path.exists(outdir):
+    shutil.rmtree(outdir)
+os.mkdir(outdir)
+filename = "total_results.csv"
+paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
+
+
+# precision plot
+results_precision = "result_precision"
+filename = "results.json"
+paper_precision_graph(results_precision, filename, outdir)
diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
new file mode 100644
index 000000000..a9c848114
--- /dev/null
+++ b/scripts/incremental/benchmarking/precision.py
@@ -0,0 +1,277 @@
+import utils
+from pydriller import Repository, Git
+import psutil
+import os
+import sys
+from datetime import datetime
+import json
+import shutil
+import pytz
+import multiprocessing as mp
+
+
+################################################################################
+# Usage: python3 incremental_smallcommits.py <full_path_analyzer_dir> <number_of_cores>
+# Executing the script will overwrite the directory 'result_precision' in the cwd.
+# The script for building the compilation database is assumed to be found in the analyzers script directory and the
+# config file is assumed to be found in the conf directory of the analyzers repository.
+if len(sys.argv) != 3:
+      print("Wrong number of parameters.\nUse script like this: python3 parallel_benchmarking.py <absolute path to goblint directory> <number of processes>")
+      exit()
+res_dir = os.path.abspath('result_precision')
+maxCLOC       = None
+url           = "https://github.com/facebook/zstd"
+repo_name     = "zstd"
+build_compdb  = "build_compdb_zstd.sh"
+conf          = "zstd-race-incrpostsolver"
+begin         = datetime(2021,8,1)
+to            = datetime(2022,2,1)
+diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"]
+analyzer_dir  = sys.argv[1]
+try:
+    numcores = int(sys.argv[2])
+except ValueError:
+    print("Parameter should be a number.\nUse script like this: python3 parallel_benchmarking.py <path to goblint directory> <number of processes>")
+    exit()
+only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables
+################################################################################
+
+utc = pytz.UTC
+compare_commits = [1,2,5,10,15]
+skipSeqShorterEq = 5 # minimum number of incremental commits in chain
+
+def start_commit_for_sequence_search():
+    current_commit = ""
+    for commit in Repository(url, to=to, only_in_branch='dev', order='reverse', clone_repo_to=res_dir).traverse_commits():
+        current_commit = commit
+        break
+    gr = Git(os.path.join(res_dir, repo_name))
+    return current_commit, gr
+
+def find_sequences_rec(gr, commit, seq, seq_list, starting_points):
+    commit_date = commit.committer_date.replace(tzinfo=None)
+    if commit_date < begin:
+        if len(seq) > skipSeqShorterEq:
+            print("found seq of length: " + str(len(seq)))
+            seq_list.insert(0,seq)
+    elif commit.merge:
+        seq.insert(0,commit.hash)
+        if len(seq) > skipSeqShorterEq:
+            print("found seq of length: " + str(len(seq)))
+            seq_list.insert(0,seq)
+        for ph in commit.parents:
+            parent_commit = gr.get_commit(ph)
+            if ph not in starting_points:
+                starting_points.insert(0,ph)
+                find_sequences_rec(gr, parent_commit, [], seq_list, starting_points)
+    else:
+        seq.insert(0,commit.hash)
+        for p in commit.parents:
+            parent_commit = gr.get_commit(p)
+            find_sequences_rec(gr, parent_commit, seq, seq_list, starting_points)
+
+def find_sequences():
+    seq_list = []
+    starting_points=[]
+    start_commit, gr = start_commit_for_sequence_search()
+    starting_points.insert(0,start_commit.hash)
+    find_sequences_rec(gr, start_commit, [], seq_list, starting_points)
+    seq_list.sort(key=len, reverse=True)
+    print("summary")
+    total = 0
+    maxlen = max(map(lambda x : len(x), seq_list))
+    for i in range(0,maxlen + 1):
+        c = sum(map(lambda x : len(x) == i, seq_list))
+        total += c
+        print("length " + str(i) + ": " + str(c))
+    print("total: " + str(len(seq_list)))
+    assert(total == len(seq_list))
+    print("avg len: " + str(sum(map(lambda x : len(x), seq_list))/len(list(map(lambda x : len(x), seq_list)))))
+    with open('sequences.json', 'w') as outfile:
+        json.dump(seq_list, outfile, indent=4)
+    return seq_list
+
+def analyze_series_in_repo(series):
+    prev_commit = ""
+    commit_num = 0
+    repo_path = os.path.abspath(repo_name)
+    out_dir = os.path.abspath('out')
+    with open('sequence.json', 'w') as outfile:
+        json.dump(series, outfile, indent=4)
+    dummy_c_file = "file.c"
+    with open(dummy_c_file, 'w') as file:
+        file.write("int main() { return 0; }")
+        file.close()
+
+    for commit in Repository(url, since=begin, only_commits=series, clone_repo_to=os.getcwd()).traverse_commits():
+        gr = Git(repo_path)
+
+        # print("\n" + commit.hash)
+        # print('changed LOC: ', commit.lines)
+        # print('merge commit: ', commit.merge)
+
+        # check that given series is a path of sequential commits in the repository
+        msg = "Commit " + prev_commit[:7] + "is not a parent commit of " + commit.hash[:7] + " (parents: " + ','.join(commit.parents) + ")"
+        assert (prev_commit == "" or prev_commit in commit.parents), msg
+
+        relCLOC = utils.calculateRelCLOC(repo_path, commit, diff_exclude)
+
+        # analyze
+        out_commit = os.path.join(out_dir, str(commit_num))
+        os.makedirs(out_commit)
+        with open(os.path.join(out_commit,'commit_properties.log'), "w+") as file:
+            json.dump({"hash": commit.hash, "parent_hash": prev_commit, "CLOC": commit.lines, "relCLOC": relCLOC}, file)
+
+        if commit_num == 0:
+            # analyze initial commit non-incrementally
+            try:
+                # print('Analyze ', str(commit.hash), ' as initial commit.')
+                add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save']
+                utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_commit, conf, add_options)
+                prev_commit = commit.hash
+            except utils.subprocess.CalledProcessError as e:
+                print('Aborted initial because command ', e.cmd, 'failed.')
+                print('Fix the problem or choose a different commit to start the accumulative analysis from.')
+                exit()
+        else:
+            # analyze every following commit based on the latest previous commit for which the analysis succeeded
+            try:
+                if os.path.isdir("backup_incremental_data"):
+                    shutil.rmtree("backup_incremental_data")
+                shutil.copytree("incremental_data", "backup_incremental_data")
+
+                # compare only for 10th and last run
+                if commit_num in compare_commits or commit_num == len(series) - 1:
+                    # analyze commit non-incrementally and save run for comparison
+                    # print('Analyze', str(commit.hash), 'non-incrementally (#', commit_num, ').')
+                    out_nonincr = os.path.join(out_commit, 'non-incr')
+                    os.makedirs(out_nonincr)
+                    file_original_run = os.path.join(out_nonincr, "compare-data-nonincr")
+                    add_options = ['--enable', 'incremental.only-rename', '--set', 'save_run', file_original_run]
+                    utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_nonincr, conf, add_options)
+
+                # analyze commit incrementally based on the previous commit and save run for comparison
+                # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').')
+                out_incr = os.path.join(out_commit, 'incr')
+                os.makedirs(out_incr)
+                file_incremental_run = os.path.join(out_incr, "compare-data-incr")
+                add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run]
+                utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options)
+
+                if commit_num in compare_commits or commit_num == len(series) - 1:
+                    # compare stored data of original and incremental run
+                    # print('Compare both runs.')
+                    out_compare = os.path.join(out_commit, 'compare')
+                    os.makedirs(out_compare)
+                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, conf, file_incremental_run, file_original_run)
+
+            except utils.subprocess.CalledProcessError as e:
+                print('Aborted because command ', e.cmd, 'failed.')
+                shutil.rmtree("incremental_data")
+                shutil.copytree("backup_incremental_data", "incremental_data")
+
+        prev_commit = commit.hash
+        commit_num += 1
+
+def runperprocess(core, seq_list, q):
+    psutil.Process().cpu_affinity([core])
+    while not q.empty():
+        i = q.get()
+        serie = seq_list[i]
+        dir = "series" + str(i)
+        os.mkdir(dir)
+        os.chdir(dir)
+        analyze_series_in_repo(serie)
+        os.chdir(res_dir)
+
+def analyze_seq_in_parallel(seq_list):
+    avail_phys_cores = psutil.cpu_count(logical=False)
+    allowedcores = avail_phys_cores - 1
+    if numcores > allowedcores:
+        print("Not enough physical cores on this maching (exist: ", avail_phys_cores, " allowed: ", allowedcores, ")")
+        exit()
+    # For equal load distribution, choose a processes to core mapping,
+    # use only physical cores and have an equal number of processes per cache.
+    # The layout of physical/logical cores and sharing of caches is machine dependent. To find out use: 'lscpu --all --extended'.
+    # For our test server:
+    coremapping = [i for i in range(numcores - numcores//2)] + [i for i in range(avail_phys_cores//2, avail_phys_cores//2 + numcores//2)]
+    processes = []
+
+    # set up Queue with each serie as task
+    q = mp.Queue()
+    for i in range(len(seq_list)):
+        q.put(i)
+
+    for j in range(numcores):
+        # start process for analysing series on core j
+        c = coremapping[j]
+        p = mp.Process(target=runperprocess, args=[c, seq_list.copy(), q])
+        p.start()
+        processes.append(p)
+    for p in processes:
+        p.join()
+
+
+def merge_results(outfilename):
+    wd = os.getcwd()
+    seq_summaries = []
+    result_sums = {str(i): {"precpertotal": {"equal": 0, "moreprec": 0, "lessprec": 0, "incomp": 0, "total": 0}, "number_of_commits": 0, "relCLOC": 0} for i in compare_commits}
+    num_seq = 0
+    for s in map(lambda x: os.path.abspath(x), os.listdir(wd)):
+        if not os.path.isdir(s) or os.path.basename(s)[:6] != "series":
+            continue
+        num_seq += 1
+        os.chdir(s)
+        with open('sequence.json', 'r') as file:
+            seq = json.load(file)
+        # lookup comparison results
+        outdir = os.path.join(s, "out")
+        commits = os.listdir(outdir)
+        commits.sort(key = lambda x: int(x))
+        int_prec = {str(i): {"precision": None, "relCLOC": None} for i in compare_commits}
+        final_prec = None
+        relCLOC = 0
+        for i in filter(lambda x: x != "0", commits):
+            ith_dir = os.path.join(outdir, i)
+            compare_log_path = os.path.join(ith_dir, "compare", utils.comparelog)
+            with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f:
+                relCLOC += json.load(f)["relCLOC"]
+            if int(i) in compare_commits:
+                if os.path.isdir(ith_dir) and os.path.exists(compare_log_path):
+                    int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path)
+                    int_prec[i]["relCLOC"] = relCLOC
+                    if int_prec[i]["precision"]:
+                        result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])}
+                        result_sums[i]["number_of_commits"] += 1
+                        result_sums[i]["relCLOC"] += relCLOC
+            if int(i) != 0 and int(i) == len(commits) - 1:
+                if os.path.exists(compare_log_path):
+                    final_prec = utils.extract_precision_from_compare_log(compare_log_path)
+        summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC}
+        seq_summaries.append(summary)
+        os.chdir(wd)
+    result_avgs = {i: None for i in result_sums.keys()}
+    for i, ps in result_sums.items():
+        if ps["number_of_commits"] != 0:
+            avg_prec = {k: ps["precpertotal"].get(k,0) / ps["number_of_commits"] for k in set(ps["precpertotal"])}
+            result_avgs[i] = {"precpertotal_avg": avg_prec, "relCLOC_avg": ps["relCLOC"] / ps["number_of_commits"]}
+    res = {"seq_summary":  seq_summaries, "prec_avgs": result_avgs}
+    with open(outfilename, "w") as f:
+        json.dump(res, f, indent=4)
+    res
+
+
+if not only_collect_results:
+    os.mkdir(res_dir)
+os.chdir(res_dir)
+
+if not only_collect_results:
+    print("find sequences to analyze")
+    seq_list = find_sequences()
+
+    print("\nanalyze sequences in parallel")
+    analyze_seq_in_parallel(seq_list)
+
+print("\nmerge results")
+results_filename = "results.json"
+merge_results(results_filename)
diff --git a/scripts/incremental/benchmarking/requirements.txt b/scripts/incremental/benchmarking/requirements.txt
new file mode 100644
index 000000000..7d705bd6a
--- /dev/null
+++ b/scripts/incremental/benchmarking/requirements.txt
@@ -0,0 +1,7 @@
+brokenaxes==0.5.0
+matplotlib==3.5.1
+numpy==1.19.5
+pandas==1.4.1
+psutil==5.9.0
+PyDriller==2.1
+pytz==2021.1
diff --git a/scripts/incremental/benchmarking/stats.py b/scripts/incremental/benchmarking/stats.py
new file mode 100644
index 000000000..3a2909ec3
--- /dev/null
+++ b/scripts/incremental/benchmarking/stats.py
@@ -0,0 +1,65 @@
+import utils
+from pydriller import Repository
+from datetime import datetime
+import os
+import sys
+
+if __name__ == '__main__':
+  if len(sys.argv) != 2:
+      print("Wrong number of parameters.\nUse script like this: python3 incremental_stats.py <absolute path to goblint directory>")
+      exit()
+
+analyzer_dir  = sys.argv[1]
+url = 'https://github.com/facebook/zstd'
+repo_name = 'zstd'
+begin = datetime(2021,8,1)
+to = datetime(2022,2,1)
+maxCLOC       = 50
+dirs_to_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"]
+
+cwd  = os.getcwd()
+outdir = os.path.join(cwd, 'out')
+repo_path = os.path.normpath(os.path.join(cwd, repo_name))
+paths_to_exclude = list(map(lambda x: os.path.join(repo_path, x), dirs_to_exclude))
+
+analyzed_commits = {}
+total_commits = 0
+count_nochanges = 0
+count_merge = 0
+count_big = 0
+count_small = 0
+
+def iter_repo():
+    global analyzed_commits
+    global total_commits
+    global count_merge
+    global count_nochanges
+    global count_big
+    global count_small
+
+    for commit in Repository(url, since=begin, to=to, clone_repo_to=cwd).traverse_commits():
+        total_commits += 1
+
+        # count merge commits
+        if commit.merge:
+            count_merge += 1
+            continue
+
+        # count commits that have less than maxCLOC of relevant code changes
+        relCLOC = utils.calculateRelCLOC(repo_path, commit, paths_to_exclude) # use this to filter commits by actually relevant changes
+        if relCLOC == 0:
+            count_nochanges += 1
+            continue
+
+        if maxCLOC is not None and relCLOC > maxCLOC:
+            count_big += 1
+            continue
+
+        count_small += 1
+
+iter_repo()
+print("\nCommits traversed in total: ", total_commits)
+print("Merge commits: ", count_merge)
+print("Commits without any relevant changes: ", count_nochanges)
+print("Big commits: ", count_big)
+print("Small commits with relevant changes: ", count_small)
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
new file mode 100644
index 000000000..c45bf36ae
--- /dev/null
+++ b/scripts/incremental/benchmarking/utils.py
@@ -0,0 +1,246 @@
+import os
+import shutil
+from pathlib import Path
+import subprocess
+from pydriller import Git
+import re
+import pandas
+import json
+import numpy as np
+import brokenaxes
+import matplotlib as mpl
+mpl.use("pgf")
+mpl.rcParams.update({
+    "pgf.texsystem": "pdflatex",
+    'pgf.rcfonts': False,
+    'text.usetex': True,
+    'font.family': 'serif',
+    'font.size': 9,
+    'axes.titlesize': 9,
+    'legend.fontsize': 9,
+    'figure.titlesize': 9,
+    'figure.dpi': 300,
+    'xtick.labelsize': 9,
+    'ytick.labelsize': 9,
+
+})
+import matplotlib.pyplot as plt
+from matplotlib.ticker import ScalarFormatter
+
+header_runtime_parent = "Runtime for parent commit (non-incremental)"
+header_runtime_incr_child = "Runtime for commit (incremental)"
+header_runtime_incr_posts_child = "Runtime for commit (incremental + incr postsolver)"
+header_runtime_incr_posts_rel_child = "Runtime for commit (incremental + incr postsolver + reluctant)"
+
+preparelog = "prepare.log"
+analyzerlog = "analyzer.log"
+comparelog = "compare.log"
+
+def reset_incremental_data(incr_data_dir):
+    if os.path.exists(incr_data_dir) and os.path.isdir(incr_data_dir):
+        shutil.rmtree(incr_data_dir)
+
+def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, outdir, conf, extra_options):
+    gr.checkout(commit_hash)
+    conf_path = os.path.join(analyzer_dir, 'conf', conf + '.json')
+
+    # print configuration
+    with open(outdir+'/config.out', "a+") as file:
+        with open(conf_path, "r") as c:
+            file.write("config: " + c.read())
+            file.write("\n")
+            file.write("added options:\n")
+            for o in extra_options:
+                file.write(o + " ")
+            file.close()
+
+    prepare_command = ['sh', os.path.join(analyzer_dir, 'scripts', build_compdb)]
+    with open(os.path.join(outdir, preparelog), "w+") as outfile:
+        subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT)
+        outfile.close()
+
+    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *extra_options, repo_path]
+    with open(os.path.join(outdir, analyzerlog), "w+") as outfile:
+        subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)
+        outfile.close()
+
+def compare_runs(analyzer_dir, dummy_c_file, outdir, conf, compare_data_1, compare_data_2):
+    options = ['--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), '--disable', 'printstats', '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2]
+    analyze_command = [os.path.join(analyzer_dir, 'goblint'), *options, dummy_c_file]
+    with open(os.path.join(outdir, comparelog), "w+") as outfile:
+        subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)
+        outfile.close()
+
+def calculateRelCLOC(repo_path, commit, diff_exclude):
+    diff_exclude = list(map(lambda x: os.path.join(repo_path, x), diff_exclude))
+    relcloc = 0
+    for f in commit.modified_files:
+        _, extension = os.path.splitext(f.filename)
+        if not (extension == ".h" or extension == ".c"):
+            continue
+        filepath = f.new_path
+        if filepath is None:
+            filepath = f.old_path
+        parents = Path(filepath).parents
+        parents = list(map(lambda x: os.path.join(repo_path, x), parents))
+        if any(dir in parents for dir in diff_exclude):
+            continue
+        relcloc = relcloc + f.added_lines + f.deleted_lines
+    return relcloc
+
+def find_line(pattern, log):
+    with open (log, 'r') as file:
+        for line in file:
+            m = re.search(pattern, line)
+            if m:
+                file.close()
+                return m.groupdict()
+        return None
+
+def extract_from_analyzer_log(log):
+    runtime_pattern = 'TOTAL[ ]+(?P<runtime>[0-9\.]+) s'
+    change_info_pattern = 'change_info = { unchanged = (?P<unchanged>[0-9]*); changed = (?P<changed>[0-9]*); added = (?P<added>[0-9]*); removed = (?P<removed>[0-9]*) }'
+    r = find_line(runtime_pattern, log)
+    ch = find_line(change_info_pattern, log) or {"unchanged": 0, "changed": 0, "added": 0, "removed": 0}
+    d = dict(list(r.items()) + list(ch.items()))
+    with open(log, "r") as file:
+        num_racewarnings = file.read().count('[Warning][Race]')
+        d["race_warnings"] = num_racewarnings
+        file.close()
+    return d
+
+def extract_precision_from_compare_log(log):
+    pattern = "equal: (?P<equal>[0-9]+), more precise: (?P<moreprec>[0-9]+), less precise: (?P<lessprec>[0-9]+), incomparable: (?P<incomp>[0-9]+), total: (?P<total>[0-9]+)"
+    precision = find_line(pattern, log)
+    return {k: int(v) for k,v in precision.items()} if precision else None
+
+def barplot(data_set):
+    df = pandas.DataFrame(data_set["data"], index=data_set["index"]) # TODO: index=analyzed_commits
+    df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0])))
+    print(df)
+    df.to_csv('results.csv')
+
+    df.plot.bar(rot=0, width=0.7, figsize=(25,10))
+    plt.xticks(rotation=45, ha='right', rotation_mode='anchor')
+    plt.xlabel('Commit')
+    plt.tight_layout()
+    plt.savefig("figure.pdf")
+
+def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetectedChanges=False):
+    df=pandas.read_csv(result_csv_file, index_col='Commit', sep=";")
+    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
+
+    # clean dataset (remove all rows for which any of the runtime entries is 0 which means that the respective analysis
+    # run failed)
+    df = df[(df[header_runtime_parent] != 0)]
+    if filterRelCLOC:
+        df = df[df["Relevant changed LOC"] > 0]
+    if filterDetectedChanges:
+        df = df[df["Changed/Added/Removed functions"] > 0]
+    return df
+
+def get_data_from_json(result_file):
+    with open(result_file) as f:
+        d = json.load(f)
+    df=pandas.json_normalize(d['seq_summary'])
+    return df
+
+def create_cum_data(dataFrame, num_bins, relColumns):
+    min = dataFrame[relColumns].min().min()
+    max = dataFrame[relColumns].max().max()
+    bins = np.linspace(min,max,num=num_bins+1)
+    data = []
+    base = []
+    for c in relColumns:
+        valuesc, basec = np.histogram(dataFrame.loc[:,c], bins=bins)
+        base = basec
+        cum = np.cumsum(valuesc, dtype=np.float)
+        cum[cum==0] = np.nan
+        data = data + [cum]
+    return data, base[:-1]
+
+def cummulative_distr_plot(data_sets, base, outfile, figsize=None, title=None, logscale=False):
+    if figsize:
+        plt.figure(figsize=figsize)
+    else:
+        plt.figure()
+    for d in data_sets:
+        plt.plot(d["values"], base, label=d["label"])
+    plt.xlabel('Number of Commits')
+    if logscale:
+        plt.ylabel('Runtime in s ($log_{2}$ scale)')
+        plt.yscale('log', base=2)
+        plt.gca().yaxis.set_major_formatter(ScalarFormatter())
+        plt.xlim(left=0)
+        plt.ylim(bottom=95)
+        #plt.yticks(np.arange(100,1500,100))
+    else:
+        plt.ylabel('Runtime in s')
+    plt.tight_layout()
+    plt.legend()
+    plt.title(title)
+    plt.savefig(outfile)
+
+def hist_plot(data, step, title, xlabel, ylabel, outfile, size, xlim_left=None, xlim_right=None, cutoffs=None):
+    min = data.min()
+    max = data.max()
+    min = min//step
+    max = max//step + 1
+    bins = np.arange(min*step,(max+1)*step,step)
+
+    if cutoffs:
+        plt.figure()
+        bax = brokenaxes.brokenaxes(ylims=cutoffs, hspace=0.05, left = 0.18, bottom = 0.16)
+        bax.hist(data, bins, histtype='bar')
+        plt.xlabel(xlabel, labelpad=0)
+        plt.ylabel(ylabel, labelpad=0)
+        if title: plt.title(title)
+        plt.savefig(outfile, bbox_inches='tight')
+    else:
+        fig = plt.figure()
+        width, height = size
+        fig.set_size_inches(w=width, h=height)
+        plt.hist(data, bins)
+        if xlim_left:
+            plt.xlim(left=xlim_left, right=xlim_right)
+        else:
+            plt.xlim(right=xlim_right)
+        if xlabel: plt.xlabel(xlabel)
+        if ylabel: plt.ylabel(ylabel)
+        if title: plt.title(title)
+        plt.tight_layout(pad=0.4)
+        plt.savefig(outfile)
+
+def hist_subplots(ax, data, step):
+    min = data.min()
+    max = data.max()
+    min = min//step
+    max = max//step + 1
+    bins = np.arange(min*step,(max+1)*step,step)
+    ax.hist(data, bins)
+
+def four_hist_subplots(data, title, xlabel, ylabel, outfile):
+    step = 0.01
+    fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2,tight_layout=True)
+    for i, ax in enumerate([ax1,ax2,ax3,ax4]):
+        hist_subplots(ax, data, step)
+        ax.title.set_text(title[i])
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.tight_layout()
+    fig.savefig(outfile)
+
+def scatter_plot(data, xlabel, ylabel, outfile, size):
+    fig = plt.figure()
+    width, height = size
+    fig.set_size_inches(w=width, h=height)
+    colors=['red','azure','blue','brown','chartreuse','chocolate','darkblue','darkgreen','seagreen','green','indigo','orangered','orange','coral','olive','mediumseagreen','grey','teal']
+    markers = ['x','+','o','s','p','*','D','d','v','^','<','>','1','2','3','4','H','P']
+    linestyles = ['dashed']
+    for i, (x, y) in enumerate(data):
+        plt.plot(x,y, marker='x', linewidth=0.4, markersize=1, alpha=0.85, color=colors[i % len(colors)], linestyle=linestyles[i % len(linestyles)])
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.ylim(bottom=-0.005, top=0.19)
+    plt.tight_layout(pad=0.4)
+    plt.savefig(outfile)
diff --git a/scripts/incremental/build/build_compdb_zstd.sh b/scripts/incremental/build/build_compdb_zstd.sh
new file mode 100755
index 000000000..baedce33c
--- /dev/null
+++ b/scripts/incremental/build/build_compdb_zstd.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+sed -i 's/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT).*/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -DZSTD_NO_INTRINSICS/' programs/Makefile
+LC_ALL=C.UTF-8 compiledb make -j 1 zstd

From 73cebe1c873f9667f749b8e15a0fea09f1e89716 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 11:33:11 +0200
Subject: [PATCH 02/84] Change incremental benchmarking scripts for running
 with sqlite

---
 .../incremental/benchmarking/efficiency.py    | 36 ++++++++++++-------
 scripts/incremental/benchmarking/utils.py     | 10 ++++--
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index c6abb95a0..4023c84eb 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -23,13 +23,13 @@
       exit()
 result_dir    = os.path.join(os.getcwd(), 'result_efficiency')
 maxCLOC       = 50 # can be deactivated with None
-url           = "https://github.com/facebook/zstd"
-repo_name     = "zstd"
-build_compdb  = "build_compdb_zstd.sh"
-conf_base     = "zstd-race-baseline" # very minimal: "zstd-minimal"
-conf_incrpost = "zstd-race-incrpostsolver"
+url           = "https://github.com/sqlite/sqlite"
+repo_name     = "sqlite"
+build_compdb  = "../build/build_compdb_sqlite.sh"
+conf_base     = "large-program" # very minimal: "zstd-minimal"
+conf_incrpost = "large-program" #TODO: Use incremental postprocessing
 begin         = datetime(2021,8,1)
-to            = datetime(2022,2,1) # minimal subset: datetime(2021,8,4)
+to            = datetime(2021,8,10) # minimal subset: datetime(2021,8,4)
 diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"]
 analyzer_dir  = sys.argv[1]
 only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables
@@ -93,26 +93,36 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
             #print('Starting from parent', str(parent.hash), ".")
             outparent = os.path.join(outtry, 'parent')
             os.makedirs(outparent)
-            add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options)
+
+            def append_to_repo_path(file):
+                return os.path.join(repo_path, file)
+
+            sqlite_files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c']
+            sqlite_files = list(map(append_to_repo_path, sqlite_files))
+
+            pseudo_repo_path = ""
+
+            options = sqlite_files + ['-v', '--set', 'pre.cppflags[+]', '-DSQLITE_DEBUG', '--disable', 'ana.base.context.non-ptr', '--disable', 'ana.int.def_exc', '--disable', 'sem.unknown_function.spawn', '--set', 'ana.thread.domain', 'plain', '--enable', 'exp.earlyglobs', '--set', 'ana.base.privatization', 'none', '--set', 'pre.cppflags[+]', '-DGOBLINT_NO_BSEARCH', '--set', 'pre.cppflags[+]', '-DGOBLINT_NO_ASSERT', '--set', 'result', 'json-messages', '--set', 'ana.activated', '[\"base\",\"mallocWrapper\"]', '--set', 'ana.ctx_insens[+]', 'base', '--set', 'ana.ctx_insens[+]', 'mallocWrapper']
+            add_options = options + ['--disable', 'incremental.load', '--enable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, parent.hash, outparent, conf_base, add_options)
 
             #print('And now analyze', str(commit.hash), 'incrementally.')
             outchild = os.path.join(outtry, 'child')
             os.makedirs(outchild)
-            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options)
+            add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchild, conf_base, add_options)
 
             #print('And again incremental, this time with incremental postsolver')
             outchildincrpost = os.path.join(outtry, 'child-incr-post')
             os.makedirs(outchildincrpost)
-            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options)
+            add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options)
 
             #print('And again incremental, this time with incremental postsolver and reluctant')
             outchildrel = os.path.join(outtry, 'child-rel')
             os.makedirs(outchildrel)
             add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled']
-            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options)
+            utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options)
 
             count_analyzed+=1
             failed = False
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index c45bf36ae..1ba99411c 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -54,12 +54,18 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash,
                 file.write(o + " ")
             file.close()
 
-    prepare_command = ['sh', os.path.join(analyzer_dir, 'scripts', build_compdb)]
+    script_path = os.path.abspath(os.path.dirname(__file__))
+
+    prepare_command = ['sh', os.path.join(script_path, build_compdb)]
     with open(os.path.join(outdir, preparelog), "w+") as outfile:
         subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT)
         outfile.close()
 
-    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *extra_options, repo_path]
+    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *extra_options]
+
+    if (repo_path != ""):
+        analyze_command.append(repo_path)
+
     with open(os.path.join(outdir, analyzerlog), "w+") as outfile:
         subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)
         outfile.close()

From c9e5566c33e5885efb906fb36396eedd0477487c Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 12:41:08 +0200
Subject: [PATCH 03/84] Update path for conf

---
 scripts/incremental/benchmarking/efficiency.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 4023c84eb..8ac91072f 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -26,8 +26,8 @@
 url           = "https://github.com/sqlite/sqlite"
 repo_name     = "sqlite"
 build_compdb  = "../build/build_compdb_sqlite.sh"
-conf_base     = "large-program" # very minimal: "zstd-minimal"
-conf_incrpost = "large-program" #TODO: Use incremental postprocessing
+conf_base     = os.path.join("examples", "large-program") # very minimal: "zstd-minimal"
+conf_incrpost = os.path.join("examples", "large-program") #TODO: Use incremental postprocessing
 begin         = datetime(2021,8,1)
 to            = datetime(2021,8,10) # minimal subset: datetime(2021,8,4)
 diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"]

From e1388676014a19581ed710b276ff738bccec5337 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 12:41:31 +0200
Subject: [PATCH 04/84] Add build_compdb_sqlite.sh

---
 scripts/incremental/build/build_compdb_sqlite.sh | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100755 scripts/incremental/build/build_compdb_sqlite.sh

diff --git a/scripts/incremental/build/build_compdb_sqlite.sh b/scripts/incremental/build/build_compdb_sqlite.sh
new file mode 100755
index 000000000..65ef7ce04
--- /dev/null
+++ b/scripts/incremental/build/build_compdb_sqlite.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+# sed -i 's/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT).*/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -DZSTD_NO_INTRINSICS/' programs/Makefile
+sh configure
+LC_ALL=C.UTF-8 compiledb make -j 1 sqlite3.lo

From 7bd5d926c3f074cd0f9a6861e156432d71af7860 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 14:03:11 +0200
Subject: [PATCH 05/84] Update configuration for sqlite

---
 scripts/incremental/benchmarking/efficiency.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 8ac91072f..220987eef 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -26,8 +26,8 @@
 url           = "https://github.com/sqlite/sqlite"
 repo_name     = "sqlite"
 build_compdb  = "../build/build_compdb_sqlite.sh"
-conf_base     = os.path.join("examples", "large-program") # very minimal: "zstd-minimal"
-conf_incrpost = os.path.join("examples", "large-program") #TODO: Use incremental postprocessing
+conf_base     = os.path.join("custom", "sqlite-minimal") # very minimal: "zstd-minimal"
+conf_incrpost = os.path.join("custom", "sqlite-minimal") #TODO: Use incremental postprocessing
 begin         = datetime(2021,8,1)
 to            = datetime(2021,8,10) # minimal subset: datetime(2021,8,4)
 diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"]
@@ -102,7 +102,7 @@ def append_to_repo_path(file):
 
             pseudo_repo_path = ""
 
-            options = sqlite_files + ['-v', '--set', 'pre.cppflags[+]', '-DSQLITE_DEBUG', '--disable', 'ana.base.context.non-ptr', '--disable', 'ana.int.def_exc', '--disable', 'sem.unknown_function.spawn', '--set', 'ana.thread.domain', 'plain', '--enable', 'exp.earlyglobs', '--set', 'ana.base.privatization', 'none', '--set', 'pre.cppflags[+]', '-DGOBLINT_NO_BSEARCH', '--set', 'pre.cppflags[+]', '-DGOBLINT_NO_ASSERT', '--set', 'result', 'json-messages', '--set', 'ana.activated', '[\"base\",\"mallocWrapper\"]', '--set', 'ana.ctx_insens[+]', 'base', '--set', 'ana.ctx_insens[+]', 'mallocWrapper']
+            options = sqlite_files + ['-v']
             add_options = options + ['--disable', 'incremental.load', '--enable', 'incremental.save']
             utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, parent.hash, outparent, conf_base, add_options)
 

From dffaeac626aefce01fa4da16aa7a3e61d46dc50a Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 15:54:30 +0200
Subject: [PATCH 06/84] efficiency.py: Make it configurable which project to
 benchmark incremental goblint on.

---
 .../incremental/benchmarking/efficiency.py    | 70 ++++++++++++-------
 scripts/incremental/benchmarking/projects.py  | 43 ++++++++++++
 2 files changed, 86 insertions(+), 27 deletions(-)
 create mode 100644 scripts/incremental/benchmarking/projects.py

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 220987eef..6c7f53292 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -11,6 +11,9 @@
 import sys
 import pandas as pd
 
+# Some basic settings for the different projects (currently zstd, sqlite)
+import projects
+
 ################################################################################
 # Usage: python3 incremental_smallcommits.py <full_path_analyzer_dir> <number_of_cores>
 # Executing the script will overwrite the directory 'result_efficiency' in the cwd.
@@ -18,26 +21,37 @@
 # config file is assumed to be found in the conf directory of the analyzers repository.
 # The single test runs are mapped to processors according to the coremapping. The one specified in the section below
 # should work for Intel machines, otherwise you might need to adapt it according to the description.
-if len(sys.argv) != 3:
-      print("Wrong number of parameters.\nUse script like this: python3 parallel_benchmarking.py <path to goblint directory> <number of processes>")
-      exit()
+usage = "Use script like this: python3 parallel_benchmarking.py <path to goblint directory> <project-name> <number of processes>"
+if len(sys.argv) != 4:
+    print("Wrong number of parameters.\n" + usage)
+    exit()
+
+# Load some project dependent settings:
+project = projects.projects.get(sys.argv[2])
+if project == None:
+    print("Given Project " + project + " is not one of the supported projects. Add a new project by modifying projects.py.")
+    exit()
+
+url = project.url
+repo_name = project.repo_name
+build_compdb = project.build_compdb
+conf_base = project.conf_base
+conf_incrpost = project.conf_incrpost
+begin = project.begin
+to = project.to
+files = project.files
+
+# Project independent settings
+diff_exclude = project.diff_exclude
 result_dir    = os.path.join(os.getcwd(), 'result_efficiency')
 maxCLOC       = 50 # can be deactivated with None
-url           = "https://github.com/sqlite/sqlite"
-repo_name     = "sqlite"
-build_compdb  = "../build/build_compdb_sqlite.sh"
-conf_base     = os.path.join("custom", "sqlite-minimal") # very minimal: "zstd-minimal"
-conf_incrpost = os.path.join("custom", "sqlite-minimal") #TODO: Use incremental postprocessing
-begin         = datetime(2021,8,1)
-to            = datetime(2021,8,10) # minimal subset: datetime(2021,8,4)
-diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"]
 analyzer_dir  = sys.argv[1]
 only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables
 ################################################################################
 try:
-    numcores = int(sys.argv[2])
+    numcores = int(sys.argv[3])
 except ValueError:
-    print("Parameter should be a number.\nUse script like this: python3 parallel_benchmarking.py <absolute path to goblint directory> <number of processes>")
+    print("Parameter should be a number.\nUse script like this:" + usage)
     exit()
 avail_phys_cores = psutil.cpu_count(logical=False)
 allowedcores = avail_phys_cores - 1
@@ -66,6 +80,17 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
     analyzed_commits = {}
     repo_path = os.path.join(cwd, repo_name)
 
+    options = []
+    if files == [] or files == None:
+        # If no list of files is given for the project, we analyze the repo using compiledb. For that, we pass the repo_path to goblint.
+        repo_path_goblint = repo_path
+    else:
+        def append_to_repo_path(file):
+            return os.path.join(repo_path, file)
+        # A list of files is given for the project. Pass these to goblint, but not the repo_path.
+        repo_path_goblint = ""
+        options = list(map(append_to_repo_path, files))
+
     for commit in itertools.islice(itertools.filterfalse(filter_commits_false_pred(repo_path), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=cwd).traverse_commits()), from_c, to_c):
         gr = Git(repo_path)
 
@@ -94,35 +119,26 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
             outparent = os.path.join(outtry, 'parent')
             os.makedirs(outparent)
 
-            def append_to_repo_path(file):
-                return os.path.join(repo_path, file)
-
-            sqlite_files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c']
-            sqlite_files = list(map(append_to_repo_path, sqlite_files))
-
-            pseudo_repo_path = ""
-
-            options = sqlite_files + ['-v']
             add_options = options + ['--disable', 'incremental.load', '--enable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, parent.hash, outparent, conf_base, add_options)
+            utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, parent.hash, outparent, conf_base, add_options)
 
             #print('And now analyze', str(commit.hash), 'incrementally.')
             outchild = os.path.join(outtry, 'child')
             os.makedirs(outchild)
             add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchild, conf_base, add_options)
+            utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchild, conf_base, add_options)
 
             #print('And again incremental, this time with incremental postsolver')
             outchildincrpost = os.path.join(outtry, 'child-incr-post')
             os.makedirs(outchildincrpost)
             add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options)
+            utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options)
 
             #print('And again incremental, this time with incremental postsolver and reluctant')
             outchildrel = os.path.join(outtry, 'child-rel')
             os.makedirs(outchildrel)
-            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled']
-            utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options)
+            add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled']
+            utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options)
 
             count_analyzed+=1
             failed = False
diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
new file mode 100644
index 000000000..04df7fe7b
--- /dev/null
+++ b/scripts/incremental/benchmarking/projects.py
@@ -0,0 +1,43 @@
+import os
+import datetime
+
+from dataclasses import dataclass
+
+@dataclass
+class ProjectConfig:
+    url: str
+    repo_name: str
+    build_compdb: str
+    conf_base: str
+    conf_incrpost: str
+    begin: datetime.datetime
+    to: datetime.datetime
+    diff_exclude: list[str]
+    '''Files to analyze. If this list is not empty, the given files will be analyzed (not those in the compiledb)'''
+    files: list[str]
+
+sqlite = ProjectConfig(
+    url           = "https://github.com/sqlite/sqlite",
+    repo_name     = "sqlite",
+    build_compdb  = "../build/build_compdb_sqlite.sh",
+    conf_base     = os.path.join("custom", "sqlite-minimal"), # very minimal: "zstd-minimal"
+    conf_incrpost = os.path.join("custom", "sqlite-minimal"), #TODO: Use incremental postprocessing,
+    begin         = datetime.datetime(2021,8,1),
+    to            = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4)
+    diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
+    files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c']
+)
+
+zstd = ProjectConfig(
+    url           = "https://github.com/facebook/zstd",
+    repo_name     = "zstd",
+    build_compdb  = "../build/build_compdb_zstd.sh",
+    conf_base     = os.path.join("custom", "zstd-race-baseline"), # very minimal: "zstd-minimal"
+    conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"),
+    begin         = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal"
+    to            = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4)
+    diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
+    files = None
+)
+
+projects = {"sqlite": sqlite, "zstd": zstd}

From 764372e2191e6bc3138f340b93676048bdbc3791 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 16:48:34 +0200
Subject: [PATCH 07/84] Make precision take paramters for which project to run
 on

---
 .../incremental/benchmarking/efficiency.py    | 31 ++++--------
 scripts/incremental/benchmarking/precision.py | 48 ++++++++++++-------
 scripts/incremental/benchmarking/projects.py  |  2 +-
 scripts/incremental/benchmarking/utils.py     | 14 ++++--
 4 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 6c7f53292..f9aa42d94 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -21,7 +21,7 @@
 # config file is assumed to be found in the conf directory of the analyzers repository.
 # The single test runs are mapped to processors according to the coremapping. The one specified in the section below
 # should work for Intel machines, otherwise you might need to adapt it according to the description.
-usage = "Use script like this: python3 parallel_benchmarking.py <path to goblint directory> <project-name> <number of processes>"
+usage = "Use script like this: python3 efficiency.py <path to goblint directory> <project-name> <number of processes>"
 if len(sys.argv) != 4:
     print("Wrong number of parameters.\n" + usage)
     exit()
@@ -40,9 +40,9 @@
 begin = project.begin
 to = project.to
 files = project.files
+diff_exclude = project.diff_exclude
 
 # Project independent settings
-diff_exclude = project.diff_exclude
 result_dir    = os.path.join(os.getcwd(), 'result_efficiency')
 maxCLOC       = 50 # can be deactivated with None
 analyzer_dir  = sys.argv[1]
@@ -80,17 +80,6 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
     analyzed_commits = {}
     repo_path = os.path.join(cwd, repo_name)
 
-    options = []
-    if files == [] or files == None:
-        # If no list of files is given for the project, we analyze the repo using compiledb. For that, we pass the repo_path to goblint.
-        repo_path_goblint = repo_path
-    else:
-        def append_to_repo_path(file):
-            return os.path.join(repo_path, file)
-        # A list of files is given for the project. Pass these to goblint, but not the repo_path.
-        repo_path_goblint = ""
-        options = list(map(append_to_repo_path, files))
-
     for commit in itertools.islice(itertools.filterfalse(filter_commits_false_pred(repo_path), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=cwd).traverse_commits()), from_c, to_c):
         gr = Git(repo_path)
 
@@ -119,26 +108,26 @@ def append_to_repo_path(file):
             outparent = os.path.join(outtry, 'parent')
             os.makedirs(outparent)
 
-            add_options = options + ['--disable', 'incremental.load', '--enable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, parent.hash, outparent, conf_base, add_options)
+            add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options, files)
 
             #print('And now analyze', str(commit.hash), 'incrementally.')
             outchild = os.path.join(outtry, 'child')
             os.makedirs(outchild)
-            add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchild, conf_base, add_options)
+            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options, files)
 
             #print('And again incremental, this time with incremental postsolver')
             outchildincrpost = os.path.join(outtry, 'child-incr-post')
             os.makedirs(outchildincrpost)
-            add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options)
+            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options, files)
 
             #print('And again incremental, this time with incremental postsolver and reluctant')
             outchildrel = os.path.join(outtry, 'child-rel')
             os.makedirs(outchildrel)
-            add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled']
-            utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options)
+            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled']
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options, files)
 
             count_analyzed+=1
             failed = False
diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index a9c848114..510731026 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -9,30 +9,44 @@
 import pytz
 import multiprocessing as mp
 
+# Some basic settings for the different projects (currently zstd, sqlite)
+import projects
 
 ################################################################################
 # Usage: python3 incremental_smallcommits.py <full_path_analyzer_dir> <number_of_cores>
 # Executing the script will overwrite the directory 'result_precision' in the cwd.
 # The script for building the compilation database is assumed to be found in the analyzers script directory and the
 # config file is assumed to be found in the conf directory of the analyzers repository.
-if len(sys.argv) != 3:
-      print("Wrong number of parameters.\nUse script like this: python3 parallel_benchmarking.py <absolute path to goblint directory> <number of processes>")
+usage = "Use script like this: python3 precision.py <absolute path to goblint directory> <project> <number of processes>"
+if len(sys.argv) != 4:
+      print("Wrong number of parameters.\n" + usage)
       exit()
-res_dir = os.path.abspath('result_precision')
-maxCLOC       = None
-url           = "https://github.com/facebook/zstd"
-repo_name     = "zstd"
-build_compdb  = "build_compdb_zstd.sh"
-conf          = "zstd-race-incrpostsolver"
-begin         = datetime(2021,8,1)
-to            = datetime(2022,2,1)
-diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"]
-analyzer_dir  = sys.argv[1]
+
+# Load some project dependent settings:
+project = projects.projects.get(sys.argv[2])
+if project == None:
+    print("Given Project " + project + " is not one of the supported projects. Add a new project by modifying projects.py.")
+    exit()
+
+url = project.url
+repo_name = project.repo_name
+build_compdb = project.build_compdb
+conf = project.conf_base
+begin = project.begin
+to = project.to
+diff_exclude = project.diff_exclude
+files = project.files
+
 try:
-    numcores = int(sys.argv[2])
+    numcores = int(sys.argv[3])
 except ValueError:
-    print("Parameter should be a number.\nUse script like this: python3 parallel_benchmarking.py <path to goblint directory> <number of processes>")
+    print("Parameter should be a number.\n" + usage)
     exit()
+
+# Project independent settings
+analyzer_dir = sys.argv[1]
+res_dir = os.path.abspath('result_precision')
+maxCLOC = None
 only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables
 ################################################################################
 
@@ -127,7 +141,7 @@ def analyze_series_in_repo(series):
             try:
                 # print('Analyze ', str(commit.hash), ' as initial commit.')
                 add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save']
-                utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_commit, conf, add_options)
+                utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_commit, conf, add_options, files)
                 prev_commit = commit.hash
             except utils.subprocess.CalledProcessError as e:
                 print('Aborted initial because command ', e.cmd, 'failed.')
@@ -148,7 +162,7 @@ def analyze_series_in_repo(series):
                     os.makedirs(out_nonincr)
                     file_original_run = os.path.join(out_nonincr, "compare-data-nonincr")
                     add_options = ['--enable', 'incremental.only-rename', '--set', 'save_run', file_original_run]
-                    utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_nonincr, conf, add_options)
+                    utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_nonincr, conf, add_options, files)
 
                 # analyze commit incrementally based on the previous commit and save run for comparison
                 # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').')
@@ -156,7 +170,7 @@ def analyze_series_in_repo(series):
                 os.makedirs(out_incr)
                 file_incremental_run = os.path.join(out_incr, "compare-data-incr")
                 add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run]
-                utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options)
+                utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files)
 
                 if commit_num in compare_commits or commit_num == len(series) - 1:
                     # compare stored data of original and incremental run
diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index 04df7fe7b..732bd54a7 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -35,7 +35,7 @@ class ProjectConfig:
     conf_base     = os.path.join("custom", "zstd-race-baseline"), # very minimal: "zstd-minimal"
     conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"),
     begin         = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal"
-    to            = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4)
+    to            = datetime.datetime(2021,10,10), # minimal subset: datetime(2021,8,4)
     diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
     files = None
 )
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 1ba99411c..b0c310fa8 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -40,7 +40,8 @@ def reset_incremental_data(incr_data_dir):
     if os.path.exists(incr_data_dir) and os.path.isdir(incr_data_dir):
         shutil.rmtree(incr_data_dir)
 
-def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, outdir, conf, extra_options):
+def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, outdir, conf, extra_options, files):
+
     gr.checkout(commit_hash)
     conf_path = os.path.join(analyzer_dir, 'conf', conf + '.json')
 
@@ -61,9 +62,16 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash,
         subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT)
         outfile.close()
 
-    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *extra_options]
+    files = []
+    if files != [] and files != None:
+        def append_to_repo_path(file):
+            return os.path.join(repo_path, file)
+        files = list(map(append_to_repo_path, files))
+
+    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *files, *extra_options]
 
-    if (repo_path != ""):
+    # If the list of files was empty, we pass the repo_path to goblint
+    if not files:
         analyze_command.append(repo_path)
 
     with open(os.path.join(outdir, analyzerlog), "w+") as outfile:

From c773156fb22092c8c1f7cadaa25dc5b6c2f5d1e0 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 17:00:02 +0200
Subject: [PATCH 08/84] Do not use dataclasses for compatibility with python
 3.6

---
 scripts/incremental/benchmarking/projects.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index 732bd54a7..1dd5030d7 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -1,9 +1,6 @@
 import os
 import datetime
 
-from dataclasses import dataclass
-
-@dataclass
 class ProjectConfig:
     url: str
     repo_name: str
@@ -16,6 +13,17 @@ class ProjectConfig:
     '''Files to analyze. If this list is not empty, the given files will be analyzed (not those in the compiledb)'''
     files: list[str]
 
+    def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin, to, diff_exclude, files):
+        self.url = url
+        self.repo_name = repo_name
+        self.build_compdb = build_compdb
+        self.conf_base = conf_base
+        self.conf_incrpost = conf_incrpost
+        self.begin = begin
+        self.to = to
+        self.diff_exclude = diff_exclude
+        self.files = files
+
 sqlite = ProjectConfig(
     url           = "https://github.com/sqlite/sqlite",
     repo_name     = "sqlite",

From 7009d93abfd7042a1975984fd51d4ce1dae5e16c Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 17:00:31 +0200
Subject: [PATCH 09/84] Improve error message

---
 scripts/incremental/benchmarking/efficiency.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index f9aa42d94..7c63aa839 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -29,7 +29,7 @@
 # Load some project dependent settings:
 project = projects.projects.get(sys.argv[2])
 if project == None:
-    print("Given Project " + project + " is not one of the supported projects. Add a new project by modifying projects.py.")
+    print("Given Project  \"" + sys.argv[2] + "\" is not one of the supported projects. Add a new project by modifying projects.py.\n" + usage)
     exit()
 
 url = project.url

From 11e92bfd480d8c77571382fda7c836f3cda533a6 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 17:03:01 +0200
Subject: [PATCH 10/84] Change annotation of type for compatibility with Python
 3.6

---
 scripts/incremental/benchmarking/projects.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index 1dd5030d7..503b63f3d 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -9,9 +9,9 @@ class ProjectConfig:
     conf_incrpost: str
     begin: datetime.datetime
     to: datetime.datetime
-    diff_exclude: list[str]
+    diff_exclude: list # list[str]
     '''Files to analyze. If this list is not empty, the given files will be analyzed (not those in the compiledb)'''
-    files: list[str]
+    files: list # list[str]
 
     def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin, to, diff_exclude, files):
         self.url = url

From 2b7eafd4e504d5fd4a8e43253f00c476ab9b2469 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 17:04:32 +0200
Subject: [PATCH 11/84] Add configuration for incremental postsolver to sqlite
 benchmarking config.

---
 scripts/incremental/benchmarking/projects.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index 503b63f3d..d2efffefe 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -29,7 +29,7 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     repo_name     = "sqlite",
     build_compdb  = "../build/build_compdb_sqlite.sh",
     conf_base     = os.path.join("custom", "sqlite-minimal"), # very minimal: "zstd-minimal"
-    conf_incrpost = os.path.join("custom", "sqlite-minimal"), #TODO: Use incremental postprocessing,
+    conf_incrpost = os.path.join("custom", "sqlite-minimal-incrpostsolver"),
     begin         = datetime.datetime(2021,8,1),
     to            = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4)
     diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],

From 150075221b64ccd4116e9b349e5f45297a60bb53 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 6 Oct 2022 17:54:28 +0200
Subject: [PATCH 12/84] Utils.analyze_commit: Fix handling of case that file
 list is passed.

---
 scripts/incremental/benchmarking/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index b0c310fa8..c53b30b4a 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -62,13 +62,13 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash,
         subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT)
         outfile.close()
 
-    files = []
-    if files != [] and files != None:
+    file_list = []
+    if files:
         def append_to_repo_path(file):
             return os.path.join(repo_path, file)
-        files = list(map(append_to_repo_path, files))
+        file_list = list(map(append_to_repo_path, files))
 
-    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *files, *extra_options]
+    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *file_list, *extra_options]
 
     # If the list of files was empty, we pass the repo_path to goblint
     if not files:

From 79c940f8f39e02e249687e890d0ccc71bef017a1 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 7 Oct 2022 09:50:13 +0200
Subject: [PATCH 13/84] Print analyze command and commit hash in config.out
 created by incremental benchmarking script.

---
 scripts/incremental/benchmarking/utils.py | 36 +++++++++++++++--------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index c53b30b4a..539abc301 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -45,7 +45,19 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash,
     gr.checkout(commit_hash)
     conf_path = os.path.join(analyzer_dir, 'conf', conf + '.json')
 
-    # print configuration
+    # Creat the analyze command
+    file_list = []
+    if files:
+        def append_to_repo_path(file):
+            return os.path.join(repo_path, file)
+        file_list = list(map(append_to_repo_path, files))
+
+    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *file_list, *extra_options]
+    # If the list of files was empty, we pass the repo_path to goblint
+    if not files:
+        analyze_command.append(repo_path)
+
+    # print configuration and analyze command
     with open(outdir+'/config.out', "a+") as file:
         with open(conf_path, "r") as c:
             file.write("config: " + c.read())
@@ -53,27 +65,25 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash,
             file.write("added options:\n")
             for o in extra_options:
                 file.write(o + " ")
+            file.write("\n\n")
+
+            file.write("analyze command:\n")
+            for c in analyze_command:
+                file.write(c + " ")
+            file.write("\n\n")
+
+            file.write("Commit hash:\n" + commit_hash + "\n")
             file.close()
 
     script_path = os.path.abspath(os.path.dirname(__file__))
 
+    # Prepare the repo
     prepare_command = ['sh', os.path.join(script_path, build_compdb)]
     with open(os.path.join(outdir, preparelog), "w+") as outfile:
         subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT)
         outfile.close()
 
-    file_list = []
-    if files:
-        def append_to_repo_path(file):
-            return os.path.join(repo_path, file)
-        file_list = list(map(append_to_repo_path, files))
-
-    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *file_list, *extra_options]
-
-    # If the list of files was empty, we pass the repo_path to goblint
-    if not files:
-        analyze_command.append(repo_path)
-
+    # Run the analysis
     with open(os.path.join(outdir, analyzerlog), "w+") as outfile:
         subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)
         outfile.close()

From a248b6bd2f29c521c1037321befdf25ffbd3ed99 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 7 Oct 2022 10:01:41 +0200
Subject: [PATCH 14/84] Add -v option to analyze commands in efficiency script.

---
 scripts/incremental/benchmarking/efficiency.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 7c63aa839..8c18e92c6 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -108,25 +108,27 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
             outparent = os.path.join(outtry, 'parent')
             os.makedirs(outparent)
 
-            add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save']
+            default_options = ['-v']
+
+            add_options = default_options + ['--disable', 'incremental.load', '--enable', 'incremental.save']
             utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options, files)
 
             #print('And now analyze', str(commit.hash), 'incrementally.')
             outchild = os.path.join(outtry, 'child')
             os.makedirs(outchild)
-            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save']
+            add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save']
             utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options, files)
 
             #print('And again incremental, this time with incremental postsolver')
             outchildincrpost = os.path.join(outtry, 'child-incr-post')
             os.makedirs(outchildincrpost)
-            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save']
+            add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save']
             utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options, files)
 
             #print('And again incremental, this time with incremental postsolver and reluctant')
             outchildrel = os.path.join(outtry, 'child-rel')
             os.makedirs(outchildrel)
-            add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled']
+            add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled']
             utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options, files)
 
             count_analyzed+=1

From 6d1a67d8356192355ead24ec3d2c47cc7d89be77 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Mon, 28 Nov 2022 22:06:15 +0100
Subject: [PATCH 15/84] Add figlet as project.

Not completely working yet, as conflicting flags are collected in the compile_commands file for figlet.
---
 scripts/incremental/benchmarking/projects.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index d2efffefe..9c79dd5af 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -40,7 +40,7 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     url           = "https://github.com/facebook/zstd",
     repo_name     = "zstd",
     build_compdb  = "../build/build_compdb_zstd.sh",
-    conf_base     = os.path.join("custom", "zstd-race-baseline"), # very minimal: "zstd-minimal"
+    conf_base     = os.path.join("custom", "zstd-race"), # very minimal: "zstd-minimal"
     conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"),
     begin         = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal"
     to            = datetime.datetime(2021,10,10), # minimal subset: datetime(2021,8,4)
@@ -48,4 +48,16 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     files = None
 )
 
-projects = {"sqlite": sqlite, "zstd": zstd}
+figlet = ProjectConfig(
+    url           = "https://github.com/cmatsuoka/figlet",
+    repo_name     = "figlet",
+    build_compdb  = "../build/build_compdb_figlet.sh",
+    conf_base     = os.path.join("custom", "figlet"),
+    conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"),
+    begin         = datetime.datetime(2010,1,1),
+    to            = datetime.datetime(2022,10,10),
+    diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
+    files = None
+)
+
+projects = {"sqlite": sqlite, "zstd": zstd, "figlet": figlet}

From 595de4cd7b28dbc01b0c0a300e3eedaf12ee5b02 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 29 Nov 2022 14:23:05 +0100
Subject: [PATCH 16/84] Adapt regular expression to extract runtime from
 output.

---
 scripts/incremental/benchmarking/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 539abc301..f8d2908f4 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -122,7 +122,7 @@ def find_line(pattern, log):
         return None
 
 def extract_from_analyzer_log(log):
-    runtime_pattern = 'TOTAL[ ]+(?P<runtime>[0-9\.]+) s'
+    runtime_pattern = 'Default[ ]+(?P<runtime>[0-9\.]+)s'
     change_info_pattern = 'change_info = { unchanged = (?P<unchanged>[0-9]*); changed = (?P<changed>[0-9]*); added = (?P<added>[0-9]*); removed = (?P<removed>[0-9]*) }'
     r = find_line(runtime_pattern, log)
     ch = find_line(change_info_pattern, log) or {"unchanged": 0, "changed": 0, "added": 0, "removed": 0}

From 8e5888f57919afadfd01c551fc152f546142b044 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 29 Nov 2022 14:24:32 +0100
Subject: [PATCH 17/84] incremental benchmarking: Use Makefile instead of
 compiledb for figlet.

---
 scripts/incremental/benchmarking/projects.py | 6 +++---
 scripts/incremental/benchmarking/utils.py    | 9 +++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index 9c79dd5af..3cc8b180a 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -51,13 +51,13 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
 figlet = ProjectConfig(
     url           = "https://github.com/cmatsuoka/figlet",
     repo_name     = "figlet",
-    build_compdb  = "../build/build_compdb_figlet.sh",
+    build_compdb  = None,
     conf_base     = os.path.join("custom", "figlet"),
     conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"),
     begin         = datetime.datetime(2010,1,1),
     to            = datetime.datetime(2022,10,10),
-    diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
-    files = None
+    diff_exclude  = [],
+    files = ['Makefile']
 )
 
 projects = {"sqlite": sqlite, "zstd": zstd, "figlet": figlet}
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index f8d2908f4..a9d320d34 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -78,10 +78,11 @@ def append_to_repo_path(file):
     script_path = os.path.abspath(os.path.dirname(__file__))
 
     # Prepare the repo
-    prepare_command = ['sh', os.path.join(script_path, build_compdb)]
-    with open(os.path.join(outdir, preparelog), "w+") as outfile:
-        subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT)
-        outfile.close()
+    if build_compdb != None:
+        prepare_command = ['sh', os.path.join(script_path, build_compdb)]
+        with open(os.path.join(outdir, preparelog), "w+") as outfile:
+            subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT)
+            outfile.close()
 
     # Run the analysis
     with open(os.path.join(outdir, analyzerlog), "w+") as outfile:

From 8bcc304225e13ebc0f77f8d833ba9338f5e4427d Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 29 Nov 2022 18:31:20 +0100
Subject: [PATCH 18/84] Add biuld_compdb_figlet.sh

---
 scripts/incremental/build/build_compdb_figlet.sh | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100755 scripts/incremental/build/build_compdb_figlet.sh

diff --git a/scripts/incremental/build/build_compdb_figlet.sh b/scripts/incremental/build/build_compdb_figlet.sh
new file mode 100755
index 000000000..7dc7672cc
--- /dev/null
+++ b/scripts/incremental/build/build_compdb_figlet.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+# do nothing

From 97ea54ef65b4ae4b8ec3ed4f406cc15796ca246b Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 29 Nov 2022 18:52:36 +0100
Subject: [PATCH 19/84] Add preliminary setup for chrony incremental
 benchmarks.

Make does not succeed yet, so this requires further adaption.
---
 scripts/incremental/benchmarking/projects.py     | 14 +++++++++++++-
 scripts/incremental/build/build_compdb_chrony.sh |  6 ++++++
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100755 scripts/incremental/build/build_compdb_chrony.sh

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index 3cc8b180a..448260fa1 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -60,4 +60,16 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     files = ['Makefile']
 )
 
-projects = {"sqlite": sqlite, "zstd": zstd, "figlet": figlet}
+chrony = ProjectConfig(
+    url           = "https://git.tuxfamily.org/chrony/chrony.git",
+    repo_name     = "chrony",
+    build_compdb  = "../build/build_compdb_chrony.sh",
+    conf_base     = os.path.join("custom", "figlet"),
+    conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"),
+    begin         = datetime.datetime(2010,1,1),
+    to            = datetime.datetime(2022,10,10),
+    diff_exclude  = [],
+    files = ['Makefile']
+)
+
+projects = {"sqlite": sqlite, "chrony": chrony, "figlet": figlet, "zstd": zstd}
diff --git a/scripts/incremental/build/build_compdb_chrony.sh b/scripts/incremental/build/build_compdb_chrony.sh
new file mode 100755
index 000000000..9edaf9ab8
--- /dev/null
+++ b/scripts/incremental/build/build_compdb_chrony.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+# git clean -fdx
+# ./configure
+# make -j 1 chronyd | tee build.log
+# compiledb --parse build.log
+./configure && bear -- make chronyd

From 587a2dd47f2291ebfc52a7d1d17168be856f67df Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 29 Nov 2022 18:56:51 +0100
Subject: [PATCH 20/84] Reset time range of considered commits for zstd.

Resets the range of considered commits to be the same as originally in analyzer/#778.
---
 scripts/incremental/benchmarking/projects.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index 3cc8b180a..48953eeaf 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -43,7 +43,7 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     conf_base     = os.path.join("custom", "zstd-race"), # very minimal: "zstd-minimal"
     conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"),
     begin         = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal"
-    to            = datetime.datetime(2021,10,10), # minimal subset: datetime(2021,8,4)
+    to            = datetime.datetime(2022,2,1), # minimal subset: datetime(2021,8,4)
     diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
     files = None
 )

From 0345ce7123ce5aa890b60280721ff99dc275343b Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 29 Nov 2022 22:26:02 +0100
Subject: [PATCH 21/84] Update build script for chrony.

---
 scripts/incremental/build/build_compdb_chrony.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/incremental/build/build_compdb_chrony.sh b/scripts/incremental/build/build_compdb_chrony.sh
index 9edaf9ab8..0ba4a563c 100755
--- a/scripts/incremental/build/build_compdb_chrony.sh
+++ b/scripts/incremental/build/build_compdb_chrony.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
-# git clean -fdx
-# ./configure
-# make -j 1 chronyd | tee build.log
-# compiledb --parse build.log
-./configure && bear -- make chronyd
+git clean -fdx
+./configure
+make -j 1 chronyd | tee build.log
+compiledb --parse build.log
+# ./configure && bear -- make chronyd

From c943f9f8e65f7bfa08187e2512507787ac40fc69 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 29 Nov 2022 22:28:06 +0100
Subject: [PATCH 22/84] Fix project configuration for chrony to not provide
 files (but use compiledb).

---
 scripts/incremental/benchmarking/projects.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index 448260fa1..d2e80b8ee 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -61,15 +61,15 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
 )
 
 chrony = ProjectConfig(
-    url           = "https://git.tuxfamily.org/chrony/chrony.git",
-    repo_name     = "chrony",
-    build_compdb  = "../build/build_compdb_chrony.sh",
-    conf_base     = os.path.join("custom", "figlet"),
-    conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"),
-    begin         = datetime.datetime(2010,1,1),
-    to            = datetime.datetime(2022,10,10),
-    diff_exclude  = [],
-    files = ['Makefile']
+    url="https://git.tuxfamily.org/chrony/chrony.git",
+    repo_name="chrony",
+    build_compdb="../build/build_compdb_chrony.sh",
+    conf_base=os.path.join("custom", "figlet"),
+    conf_incrpost=os.path.join("custom", "figlet-incrpostsolver"),
+    begin=datetime.datetime(2020, 1, 1),
+    to=datetime.datetime(2022, 10, 10),
+    diff_exclude=[],
+    files=None
 )
 
 projects = {"sqlite": sqlite, "chrony": chrony, "figlet": figlet, "zstd": zstd}

From 2639efb0e808b9a2417d291e8a7c1f0ad209d668 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Wed, 30 Nov 2022 11:27:51 +0100
Subject: [PATCH 23/84] Use github mirror for chrony..

The repo at tuxfamily does not allow for multiple connections/parallel clones.
---
 scripts/incremental/benchmarking/projects.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index ac0495adf..fdcf082d6 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -61,7 +61,9 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
 )
 
 chrony = ProjectConfig(
-    url="https://git.tuxfamily.org/chrony/chrony.git",
+    # Official repo is at https://git.tuxfamily.org/chrony/chrony.git,
+    # but does not allow multiple parallel clones. So use mirror on GitHub.
+    url="https://github.com/mlichvar/chrony.git",
     repo_name="chrony",
     build_compdb="../build/build_compdb_chrony.sh",
     conf_base=os.path.join("custom", "figlet"),

From 1eb1a8288b50aaef20aeab7bfc891f62451abfdb Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 9 Dec 2022 11:05:52 +0100
Subject: [PATCH 24/84] Plot.py: Create cummulative graphs again, combining
 results from 3 configs in one graph.

Comments out the creation using "paper_efficiency_graphs" and "paper_precision_graph".
.
---
 scripts/incremental/benchmarking/plot.py  | 30 ++++++++++++++---------
 scripts/incremental/benchmarking/utils.py |  4 ++-
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 4720fd56a..d062314d0 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -2,32 +2,35 @@
 import os
 import shutil
 
-def cummulative_distr_compare2(outdir, result_csv_filename):
+def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
     outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf"
     outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf"
-    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
+    df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
-    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child])
+    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
     datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"}
     dataincr = {"values": data[1], "label": "Incremental analysis of commit"}
-    utils.cummulative_distr_plot([datanonincr, dataincr], base, outfile_nonincr_vs_incr)
+    datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"}
+
+    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
     dataincr = {"values": data[0], "label": "Incremental analysis of commit"}
     datarelincr = {"values": data[1], "label": "Reluctant incremental analysis of commit"}
-    utils.cummulative_distr_plot([dataincr, datarelincr], base, outfile_incr_vs_incrrel, logscale=True)
 
-def cummulative_distr_all3(outdir, result_csv_filename):
+    utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True)
+
+def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
     outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf"
-    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
+    df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
     datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"}
     dataincr = {"values": data[1], "label": "Incremental analysis of commit"}
     datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"}
-    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
+    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
 
 def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
@@ -135,10 +138,13 @@ def paper_precision_graph(results_precision, filename, outdir):
     shutil.rmtree(outdir)
 os.mkdir(outdir)
 filename = "total_results.csv"
-paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
 
+# paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
+
+cummulative_distr_compare2(results_efficiency, filename, outdir)
+cummulative_distr_all3(results_efficiency, filename, outdir)
 
 # precision plot
-results_precision = "result_precision"
-filename = "results.json"
-paper_precision_graph(results_precision, filename, outdir)
+# results_precision = "result_precision"
+# filename = "results.json"
+# paper_precision_graph(results_precision, filename, outdir)
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index a9d320d34..c417fe1ef 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -184,7 +184,7 @@ def create_cum_data(dataFrame, num_bins, relColumns):
         data = data + [cum]
     return data, base[:-1]
 
-def cummulative_distr_plot(data_sets, base, outfile, figsize=None, title=None, logscale=False):
+def cummulative_distr_plot(data_sets, base, figure_dir, outfile, figsize=None, title=None, logscale=False):
     if figsize:
         plt.figure(figsize=figsize)
     else:
@@ -204,6 +204,8 @@ def cummulative_distr_plot(data_sets, base, outfile, figsize=None, title=None, l
     plt.tight_layout()
     plt.legend()
     plt.title(title)
+
+    outfile = os.path.join(figure_dir, outfile)
     plt.savefig(outfile)
 
 def hist_plot(data, step, title, xlabel, ylabel, outfile, size, xlim_left=None, xlim_right=None, cutoffs=None):

From 3d7620a9ab20feb2a89f16622b4249f4b4ec55be Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 9 Dec 2022 11:22:25 +0100
Subject: [PATCH 25/84] Plot.py: Iterate over folders of form
 result_efficiency_[project], for given projects.

---
 scripts/incremental/benchmarking/plot.py | 50 ++++++++++++++++--------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index d062314d0..797653d50 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -30,7 +30,7 @@ def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir):
     datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"}
     dataincr = {"values": data[1], "label": "Incremental analysis of commit"}
     datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"}
-    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
+    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=False)
 
 def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
@@ -132,19 +132,35 @@ def paper_precision_graph(results_precision, filename, outdir):
 
 
 # efficiency plots
-results_efficiency = "result_efficiency"
-outdir = "figures"
-if os.path.exists(outdir):
-    shutil.rmtree(outdir)
-os.mkdir(outdir)
-filename = "total_results.csv"
-
-# paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
-
-cummulative_distr_compare2(results_efficiency, filename, outdir)
-cummulative_distr_all3(results_efficiency, filename, outdir)
-
-# precision plot
-# results_precision = "result_precision"
-# filename = "results.json"
-# paper_precision_graph(results_precision, filename, outdir)
+
+def main():
+    projects = ["figlet", "chrony", "zstd"]
+    results_efficiency = "result_efficiency_"
+
+    for project in projects:
+        project_efficiency_results = results_efficiency + project
+
+        if not os.path.exists(project_efficiency_results):
+            print("Results for project " + project + " do not exist. Skipping.")
+            continue
+        else:
+            print("Creating plots for project " + project + ".")
+
+        outdir = os.path.join("figures", project)
+        if os.path.exists(outdir):
+            shutil.rmtree(outdir)
+        os.makedirs(outdir)
+        filename = "total_results.csv"
+
+        cummulative_distr_compare2(project_efficiency_results, filename, outdir)
+        cummulative_distr_all3(project_efficiency_results, filename, outdir)
+
+        # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
+
+
+        # precision plot
+        # results_precision = "result_precision"
+        # filename = "results.json"
+        # paper_precision_graph(results_precision, filename, outdir)
+
+main()
\ No newline at end of file

From 33b687fbdb4809fc2a055e02b39230f8544713c6 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 9 Dec 2022 11:26:45 +0100
Subject: [PATCH 26/84] Change cummulative_distr_compare2 to again only create
 comparisons between 2 configs.

---
 scripts/incremental/benchmarking/plot.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 797653d50..0b905a0bd 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -11,9 +11,8 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
     datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"}
     dataincr = {"values": data[1], "label": "Incremental analysis of commit"}
-    datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"}
 
-    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr)
+    utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
     dataincr = {"values": data[0], "label": "Incremental analysis of commit"}
@@ -157,7 +156,6 @@ def main():
 
         # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
 
-
         # precision plot
         # results_precision = "result_precision"
         # filename = "results.json"

From 1d1fc2a1db7c947d7db1b1eaca5c71d570d06449 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 9 Dec 2022 12:15:58 +0100
Subject: [PATCH 27/84] Logarithmic plots: choose y-min depending on minimum of
 plotted data (rather than a constant).

---
 scripts/incremental/benchmarking/utils.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index c417fe1ef..0db9dd001 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -1,4 +1,6 @@
 import os
+import sys
+import math
 import shutil
 from pathlib import Path
 import subprocess
@@ -184,12 +186,21 @@ def create_cum_data(dataFrame, num_bins, relColumns):
         data = data + [cum]
     return data, base[:-1]
 
+def largest_power_of_two_smaller(x):
+    p = math.floor(math.log2(x)) - 1
+    p = max(1, p)
+    2 ** p
+
 def cummulative_distr_plot(data_sets, base, figure_dir, outfile, figsize=None, title=None, logscale=False):
     if figsize:
         plt.figure(figsize=figsize)
     else:
         plt.figure()
+    min = sys.maxsize
     for d in data_sets:
+        min_d = d["values"].min()
+        if min_d < min:
+            min = min_d
         plt.plot(d["values"], base, label=d["label"])
     plt.xlabel('Number of Commits')
     if logscale:
@@ -197,7 +208,7 @@ def cummulative_distr_plot(data_sets, base, figure_dir, outfile, figsize=None, t
         plt.yscale('log', base=2)
         plt.gca().yaxis.set_major_formatter(ScalarFormatter())
         plt.xlim(left=0)
-        plt.ylim(bottom=95)
+        plt.ylim(bottom=largest_power_of_two_smaller(min))
         #plt.yticks(np.arange(100,1500,100))
     else:
         plt.ylabel('Runtime in s')

From 00944abc0133159b302f04ffe89c035ffb2f40ce Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 9 Dec 2022 12:16:17 +0100
Subject: [PATCH 28/84] Plot 3-way comparision again logarithmically.

---
 scripts/incremental/benchmarking/plot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 0b905a0bd..eabc2e768 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -29,7 +29,7 @@ def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir):
     datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"}
     dataincr = {"values": data[1], "label": "Incremental analysis of commit"}
     datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"}
-    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=False)
+    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
 
 def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)

From 527398497e3999cfc75498f1414ab39e85b8a4db Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Mon, 12 Dec 2022 10:51:17 +0100
Subject: [PATCH 29/84] Avoid artefact in cummulative graph.

---
 scripts/incremental/benchmarking/utils.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 0db9dd001..1b42d80de 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -183,6 +183,17 @@ def create_cum_data(dataFrame, num_bins, relColumns):
         base = basec
         cum = np.cumsum(valuesc, dtype=np.float)
         cum[cum==0] = np.nan
+
+        # If there is a tail of values that are the same, set the ones after its first occurrence to NaN.
+        # In the resulting graph, this avoids the artefact that all the lines go up to the largest y-value of any line.
+        last = len(cum) - 1
+        last_value = cum[last]
+        for i in range(last - 1 , 0, -1):
+            if cum[i] == last_value:
+                cum[i + 1] = np.nan
+            else:
+                break
+
         data = data + [cum]
     return data, base[:-1]
 

From 533608520bbe6ac6e5ca7f54cdd507fa4b143fa1 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Mon, 12 Dec 2022 11:22:30 +0100
Subject: [PATCH 30/84] Plot incremental analysis with incremental postsolver
 separately.

---
 scripts/incremental/benchmarking/plot.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index eabc2e768..a41d0ee67 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -2,6 +2,11 @@
 import os
 import shutil
 
+description_non_incr = "Non-incremental analysis"
+description_incr = "Incremental analysis"
+description_incr_post = "Incremental analysis with incremental postsolver"
+description_incr_rel ="Reluctant incremental analysis with incremental postsolver"
+
 def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
     outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf"
@@ -9,14 +14,14 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
-    datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"}
-    dataincr = {"values": data[1], "label": "Incremental analysis of commit"}
+    datanonincr = {"values": data[0], "label": description_non_incr}
+    dataincr = {"values": data[1], "label": description_incr}
 
     utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
-    dataincr = {"values": data[0], "label": "Incremental analysis of commit"}
-    datarelincr = {"values": data[1], "label": "Reluctant incremental analysis of commit"}
+    dataincr = {"values": data[0], "label": description_incr}
+    datarelincr = {"values": data[1], "label": description_incr_rel}
 
     utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True)
 
@@ -25,11 +30,12 @@ def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir):
     outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
-    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
-    datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"}
-    dataincr = {"values": data[1], "label": "Incremental analysis of commit"}
-    datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"}
-    utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
+    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_child, utils.header_runtime_incr_posts_rel_child])
+    data_non_incr = {"values": data[0], "label": description_non_incr}
+    data_incr = {"values": data[1], "label": description_incr}
+    data_incr_post = {"values": data[2], "label": description_incr_post}
+    data_incr_rel = {"values": data[3], "label": description_incr_rel}
+    utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
 
 def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)

From 36bf6b33171ee66374b5da4fde1a7f1c01caaa7d Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Mon, 12 Dec 2022 16:07:01 +0100
Subject: [PATCH 31/84] Rename function cummulative_distr_all3 ->
 cummulative_distr_all4

---
 scripts/incremental/benchmarking/plot.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index a41d0ee67..ab53a7bd2 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -25,7 +25,7 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
 
     utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True)
 
-def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir):
+def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
     outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
@@ -158,7 +158,7 @@ def main():
         filename = "total_results.csv"
 
         cummulative_distr_compare2(project_efficiency_results, filename, outdir)
-        cummulative_distr_all3(project_efficiency_results, filename, outdir)
+        cummulative_distr_all4(project_efficiency_results, filename, outdir)
 
         # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
 

From 800db88a11cc2ae52bdad3714a1ce0dc6dc525e0 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Mon, 12 Dec 2022 16:21:47 +0100
Subject: [PATCH 32/84] Change output for .pdf files to .pgf.

---
 scripts/incremental/benchmarking/plot.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index ab53a7bd2..a50bac0ed 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -9,8 +9,8 @@
 
 def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
-    outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf"
-    outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf"
+    outfile_nonincr_vs_incr = "figure_cum_distr_incr.pgf"
+    outfile_incr_vs_incrrel = "figure_cum_distr_rel.pgf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
@@ -27,7 +27,7 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
 
 def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
-    outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf"
+    outfile_nonincr_vs_incr = "figure_cum_distr_all3.pgf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_child, utils.header_runtime_incr_posts_rel_child])
@@ -42,11 +42,11 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
 
     # plot incremental vs non-incremental
     diff = df.loc[:,utils.header_runtime_parent] - df.loc[:,utils.header_runtime_incr_child]
-    utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr)
+    utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pgf"), cutoffs_incr)
 
     # plot reluctant vs. basic incremental
     diff = df.loc[:,utils.header_runtime_incr_child] - df.loc[:,utils.header_runtime_incr_posts_rel_child]
-    utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel)
+    utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pgf"), cutoffs_rel)
 
 def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
@@ -54,11 +54,11 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
     # plot incremental vs non-incremental
     print(df[utils.header_runtime_incr_child].astype('float'))
     diff = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float')
-    utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr)
+    utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pgf"), cutoffs_incr)
 
     # plot reluctant vs. basic incremental
     diff = 1 - df.loc[:,utils.header_runtime_incr_posts_rel_child] / df.loc[:,utils.header_runtime_incr_child]
-    utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel)
+    utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pgf"), cutoffs_rel)
 
 def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False):
     df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges)

From 3beedd8026dbc1fae199be676003800d12b7b732 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Mon, 12 Dec 2022 16:54:11 +0100
Subject: [PATCH 33/84] Change figsize for cummulative_distr functions.

---
 scripts/incremental/benchmarking/plot.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index a50bac0ed..baf597b2f 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -2,10 +2,14 @@
 import os
 import shutil
 
-description_non_incr = "Non-incremental analysis"
-description_incr = "Incremental analysis"
-description_incr_post = "Incremental analysis with incremental postsolver"
-description_incr_rel ="Reluctant incremental analysis with incremental postsolver"
+description_non_incr = "Non-Inc"
+description_incr = "Inc"
+description_incr_post = "Inc-Post"
+description_incr_rel ="Rel"
+
+# measures in inches
+textwidth = 7
+figsize = (textwidth / 2.5, textwidth / 3)
 
 def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
@@ -17,13 +21,13 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     datanonincr = {"values": data[0], "label": description_non_incr}
     dataincr = {"values": data[1], "label": description_incr}
 
-    utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr)
+    utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr, figsize = figsize)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
     dataincr = {"values": data[0], "label": description_incr}
     datarelincr = {"values": data[1], "label": description_incr_rel}
 
-    utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True)
+    utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True, figsize = figsize)
 
 def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
@@ -35,7 +39,7 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     data_incr = {"values": data[1], "label": description_incr}
     data_incr_post = {"values": data[2], "label": description_incr_post}
     data_incr_rel = {"values": data[3], "label": description_incr_rel}
-    utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
+    utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize = figsize, logscale=True)
 
 def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)

From 3c67b91b8ec43e737ccbba23b51f6789507b5c29 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Wed, 14 Dec 2022 13:50:37 +0100
Subject: [PATCH 34/84] Revert "Change figsize for cummulative_distr
 functions."

This reverts commit 3beedd8026dbc1fae199be676003800d12b7b732.
---
 scripts/incremental/benchmarking/plot.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index baf597b2f..a50bac0ed 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -2,14 +2,10 @@
 import os
 import shutil
 
-description_non_incr = "Non-Inc"
-description_incr = "Inc"
-description_incr_post = "Inc-Post"
-description_incr_rel ="Rel"
-
-# measures in inches
-textwidth = 7
-figsize = (textwidth / 2.5, textwidth / 3)
+description_non_incr = "Non-incremental analysis"
+description_incr = "Incremental analysis"
+description_incr_post = "Incremental analysis with incremental postsolver"
+description_incr_rel ="Reluctant incremental analysis with incremental postsolver"
 
 def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
@@ -21,13 +17,13 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     datanonincr = {"values": data[0], "label": description_non_incr}
     dataincr = {"values": data[1], "label": description_incr}
 
-    utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr, figsize = figsize)
+    utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
     dataincr = {"values": data[0], "label": description_incr}
     datarelincr = {"values": data[1], "label": description_incr_rel}
 
-    utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True, figsize = figsize)
+    utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True)
 
 def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
@@ -39,7 +35,7 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     data_incr = {"values": data[1], "label": description_incr}
     data_incr_post = {"values": data[2], "label": description_incr_post}
     data_incr_rel = {"values": data[3], "label": description_incr_rel}
-    utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize = figsize, logscale=True)
+    utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
 
 def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)

From 162e7436c738df45c8f850f4c5c7863840534830 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Wed, 14 Dec 2022 13:50:56 +0100
Subject: [PATCH 35/84] Revert "Change output for .pdf files to .pgf."

This reverts commit 800db88a11cc2ae52bdad3714a1ce0dc6dc525e0.
---
 scripts/incremental/benchmarking/plot.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index a50bac0ed..ab53a7bd2 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -9,8 +9,8 @@
 
 def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
-    outfile_nonincr_vs_incr = "figure_cum_distr_incr.pgf"
-    outfile_incr_vs_incrrel = "figure_cum_distr_rel.pgf"
+    outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf"
+    outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
@@ -27,7 +27,7 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
 
 def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
-    outfile_nonincr_vs_incr = "figure_cum_distr_all3.pgf"
+    outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_child, utils.header_runtime_incr_posts_rel_child])
@@ -42,11 +42,11 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
 
     # plot incremental vs non-incremental
     diff = df.loc[:,utils.header_runtime_parent] - df.loc[:,utils.header_runtime_incr_child]
-    utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pgf"), cutoffs_incr)
+    utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr)
 
     # plot reluctant vs. basic incremental
     diff = df.loc[:,utils.header_runtime_incr_child] - df.loc[:,utils.header_runtime_incr_posts_rel_child]
-    utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pgf"), cutoffs_rel)
+    utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel)
 
 def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
@@ -54,11 +54,11 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
     # plot incremental vs non-incremental
     print(df[utils.header_runtime_incr_child].astype('float'))
     diff = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float')
-    utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pgf"), cutoffs_incr)
+    utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr)
 
     # plot reluctant vs. basic incremental
     diff = 1 - df.loc[:,utils.header_runtime_incr_posts_rel_child] / df.loc[:,utils.header_runtime_incr_child]
-    utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pgf"), cutoffs_rel)
+    utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel)
 
 def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False):
     df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges)

From 57025e346c434cfdd7e94829e1c7fa6b7243b3f3 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 15 Dec 2022 11:24:21 +0100
Subject: [PATCH 36/84] Plot: Create precision plots again.

---
 scripts/incremental/benchmarking/plot.py | 39 +++++++++++++++---------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index ab53a7bd2..3d3a88797 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -141,30 +141,39 @@ def paper_precision_graph(results_precision, filename, outdir):
 def main():
     projects = ["figlet", "chrony", "zstd"]
     results_efficiency = "result_efficiency_"
+    results_precision = "result_precision_"
 
     for project in projects:
-        project_efficiency_results = results_efficiency + project
+        efficiency_results = results_efficiency + project
+        precision_results = results_precision + project
 
-        if not os.path.exists(project_efficiency_results):
+
+        if not (os.path.exists(efficiency_results) or os.path.exists(precision_results)):
             print("Results for project " + project + " do not exist. Skipping.")
             continue
         else:
             print("Creating plots for project " + project + ".")
 
-        outdir = os.path.join("figures", project)
-        if os.path.exists(outdir):
-            shutil.rmtree(outdir)
-        os.makedirs(outdir)
-        filename = "total_results.csv"
-
-        cummulative_distr_compare2(project_efficiency_results, filename, outdir)
-        cummulative_distr_all4(project_efficiency_results, filename, outdir)
-
-        # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
+        figures_dir = os.path.join("figures", project)
+        if os.path.exists(figures_dir):
+            shutil.rmtree(figures_dir)
+        os.makedirs(figures_dir)
+
+        if os.path.exists(efficiency_results):
+            efficieny_filename = "total_results.csv"
+            print("Creating efficiency plots.")
+            cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir)
+            cummulative_distr_all4(efficiency_results, efficieny_filename, figures_dir)
+            # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
+        else:
+            print("No efficiency results available.")
 
         # precision plot
-        # results_precision = "result_precision"
-        # filename = "results.json"
-        # paper_precision_graph(results_precision, filename, outdir)
+        if os.path.exists(precision_results):
+            precision_filename = "results.json"
+            print("Creating precision plots.")
+            paper_precision_graph(precision_results, precision_filename, figures_dir)
+        else:
+            print("No precision results available.")
 
 main()
\ No newline at end of file

From fdb7ffb1402b8020d9922026902290e17f1722b4 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 15 Dec 2022 13:24:43 +0100
Subject: [PATCH 37/84] Remove printstats from argumeents for compare_runs.

---
 scripts/incremental/benchmarking/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 1b42d80de..e62b1c411 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -92,7 +92,7 @@ def append_to_repo_path(file):
         outfile.close()
 
 def compare_runs(analyzer_dir, dummy_c_file, outdir, conf, compare_data_1, compare_data_2):
-    options = ['--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), '--disable', 'printstats', '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2]
+    options = ['--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2]
     analyze_command = [os.path.join(analyzer_dir, 'goblint'), *options, dummy_c_file]
     with open(os.path.join(outdir, comparelog), "w+") as outfile:
         subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)

From e946610d115956063c89977182a63fb041478efd Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 15 Dec 2022 13:33:25 +0100
Subject: [PATCH 38/84] Define analyzed branch per repository; needed for
 precision.py.

---
 scripts/incremental/benchmarking/precision.py |  3 ++-
 scripts/incremental/benchmarking/projects.py  | 16 +++++++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index 510731026..df92a6106 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -36,6 +36,7 @@
 to = project.to
 diff_exclude = project.diff_exclude
 files = project.files
+branch = project.branch
 
 try:
     numcores = int(sys.argv[3])
@@ -56,7 +57,7 @@
 
 def start_commit_for_sequence_search():
     current_commit = ""
-    for commit in Repository(url, to=to, only_in_branch='dev', order='reverse', clone_repo_to=res_dir).traverse_commits():
+    for commit in Repository(url, to=to, only_in_branch=branch, order='reverse', clone_repo_to=res_dir).traverse_commits():
         current_commit = commit
         break
     gr = Git(os.path.join(res_dir, repo_name))
diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index fdcf082d6..f04b63aec 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -12,8 +12,9 @@ class ProjectConfig:
     diff_exclude: list # list[str]
     '''Files to analyze. If this list is not empty, the given files will be analyzed (not those in the compiledb)'''
     files: list # list[str]
+    branch: str
 
-    def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin, to, diff_exclude, files):
+    def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin, to, diff_exclude, files, branch):
         self.url = url
         self.repo_name = repo_name
         self.build_compdb = build_compdb
@@ -23,6 +24,7 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
         self.to = to
         self.diff_exclude = diff_exclude
         self.files = files
+        self.branch = branch
 
 sqlite = ProjectConfig(
     url           = "https://github.com/sqlite/sqlite",
@@ -33,7 +35,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     begin         = datetime.datetime(2021,8,1),
     to            = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4)
     diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
-    files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c']
+    files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c'],
+    branch = "master"
 )
 
 zstd = ProjectConfig(
@@ -45,7 +48,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     begin         = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal"
     to            = datetime.datetime(2022,2,1), # minimal subset: datetime(2021,8,4)
     diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
-    files = None
+    files = None,
+    branch = "dev"
 )
 
 figlet = ProjectConfig(
@@ -57,7 +61,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     begin         = datetime.datetime(2010,1,1),
     to            = datetime.datetime(2022,10,10),
     diff_exclude  = [],
-    files = ['Makefile']
+    files = ['Makefile'],
+    branch = "master"
 )
 
 chrony = ProjectConfig(
@@ -71,7 +76,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     begin=datetime.datetime(2020, 1, 1),
     to=datetime.datetime(2022, 10, 10),
     diff_exclude=[],
-    files=None
+    files=None,
+    branch = "master"
 )
 
 projects = {"sqlite": sqlite, "chrony": chrony, "figlet": figlet, "zstd": zstd}

From 9d03382ec984001bd4788196b3f043396053b327 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 15 Dec 2022 18:07:05 +0100
Subject: [PATCH 39/84] Extract analysis and solving time in efficiency script.

---
 .../incremental/benchmarking/efficiency.py    | 45 ++++++++++++++-----
 scripts/incremental/benchmarking/plot.py      | 30 ++++++-------
 scripts/incremental/benchmarking/utils.py     | 37 ++++++++++++---
 3 files changed, 80 insertions(+), 32 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 8c18e92c6..5672f9062 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -149,8 +149,12 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
 
 def collect_data(outdir):
     data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [],
-      utils.header_runtime_parent: [], utils.header_runtime_incr_child: [],
-      utils.header_runtime_incr_posts_child: [], utils.header_runtime_incr_posts_rel_child: [],
+      utils.runtime_header_parent: [], utils.runtime_header_incr_child: [],
+      utils.runtime_header_incr_posts_child: [], utils.runtime_header_incr_posts_rel_child: [],
+      utils.analysis_header_parent: [], utils.analysis_header_incr_child: [],
+      utils.analysis_header_incr_posts_child: [], utils.analysis_header_incr_posts_rel_child: [],
+      utils.solving_header_parent: [], utils.solving_header_incr_child: [],
+      utils.solving_header_incr_posts_child: [], utils.solving_header_incr_posts_rel_child: [],
       "Change in number of race warnings": []}
     for t in os.listdir(outdir):
         parentlog = os.path.join(outdir, t, 'parent', utils.analyzerlog)
@@ -165,10 +169,19 @@ def collect_data(outdir):
         data["Failed?"].append(commit_prop["failed"])
         data["Commit"].append(commit_prop["hash"][:7])
         if commit_prop["failed"] == True:
-            data[utils.header_runtime_parent].append(0)
-            data[utils.header_runtime_incr_child].append(0)
-            data[utils.header_runtime_incr_posts_child].append(0)
-            data[utils.header_runtime_incr_posts_rel_child].append(0)
+            data[utils.runtime_header_parent].append(0)
+            data[utils.runtime_header_incr_child].append(0)
+            data[utils.runtime_header_incr_posts_child].append(0)
+            data[utils.runtime_header_incr_posts_rel_child].append(0)
+            data[utils.analysis_header_parent].append(0)
+            data[utils.analysis_header_incr_child].append(0)
+            data[utils.analysis_header_incr_posts_child].append(0)
+            data[utils.analysis_header_incr_posts_rel_child].append(0)
+            data[utils.solving_header_parent].append(0)
+            data[utils.solving_header_incr_child].append(0)
+            data[utils.solving_header_incr_posts_child].append(0)
+            data[utils.solving_header_incr_posts_rel_child].append(0)
+
             data["Changed/Added/Removed functions"].append(0)
             data["Change in number of race warnings"].append(0)
             continue
@@ -177,10 +190,22 @@ def collect_data(outdir):
         child_posts_info = utils.extract_from_analyzer_log(childpostslog)
         child_posts_rel_info = utils.extract_from_analyzer_log(childpostsrellog)
         data["Changed/Added/Removed functions"].append(int(child_info["changed"]) + int(child_info["added"]) + int(child_info["removed"]))
-        data[utils.header_runtime_parent].append(float(parent_info["runtime"]))
-        data[utils.header_runtime_incr_child].append(float(child_info["runtime"]))
-        data[utils.header_runtime_incr_posts_child].append(float(child_posts_info["runtime"]))
-        data[utils.header_runtime_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"]))
+        data[utils.runtime_header_parent].append(float(parent_info["runtime"]))
+        data[utils.runtime_header_incr_child].append(float(child_info["runtime"]))
+        data[utils.runtime_header_incr_posts_child].append(float(child_posts_info["runtime"]))
+        data[utils.runtime_header_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"]))
+
+
+        data[utils.analysis_header_parent].append(float(parent_info["analysis_time"]))
+        data[utils.analysis_header_incr_child].append(float(child_info["analysis_time"]))
+        data[utils.analysis_header_incr_posts_child].append(float(child_posts_info["analysis_time"]))
+        data[utils.analysis_header_incr_posts_rel_child].append(float(child_posts_rel_info["analysis_time"]))
+
+        data[utils.solving_header_parent].append(float(parent_info["solving_time"]))
+        data[utils.solving_header_incr_child].append(float(child_info["solving_time"]))
+        data[utils.solving_header_incr_posts_child].append(float(child_posts_info["solving_time"]))
+        data[utils.solving_header_incr_posts_rel_child].append(float(child_posts_rel_info["solving_time"]))
+
         data["Change in number of race warnings"].append(int(child_info["race_warnings"] - int(parent_info["race_warnings"])))
     return data
 
diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 3d3a88797..98474d84d 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -13,13 +13,13 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
-    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
+    data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_parent, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child])
     datanonincr = {"values": data[0], "label": description_non_incr}
     dataincr = {"values": data[1], "label": description_incr}
 
     utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr)
 
-    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child])
+    data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child])
     dataincr = {"values": data[0], "label": description_incr}
     datarelincr = {"values": data[1], "label": description_incr_rel}
 
@@ -30,7 +30,7 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
-    data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_child, utils.header_runtime_incr_posts_rel_child])
+    data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_parent, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child])
     data_non_incr = {"values": data[0], "label": description_non_incr}
     data_incr = {"values": data[1], "label": description_incr}
     data_incr_post = {"values": data[2], "label": description_incr_post}
@@ -41,31 +41,31 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
 
     # plot incremental vs non-incremental
-    diff = df.loc[:,utils.header_runtime_parent] - df.loc[:,utils.header_runtime_incr_child]
+    diff = df.loc[:,utils.runtime_header_parent] - df.loc[:,utils.runtime_header_incr_child]
     utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr)
 
     # plot reluctant vs. basic incremental
-    diff = df.loc[:,utils.header_runtime_incr_child] - df.loc[:,utils.header_runtime_incr_posts_rel_child]
+    diff = df.loc[:,utils.runtime_header_incr_child] - df.loc[:,utils.runtime_header_incr_posts_rel_child]
     utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel)
 
 def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
 
     # plot incremental vs non-incremental
-    print(df[utils.header_runtime_incr_child].astype('float'))
-    diff = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float')
+    print(df[utils.runtime_header_incr_child].astype('float'))
+    diff = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float')
     utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr)
 
     # plot reluctant vs. basic incremental
-    diff = 1 - df.loc[:,utils.header_runtime_incr_posts_rel_child] / df.loc[:,utils.header_runtime_incr_child]
+    diff = 1 - df.loc[:,utils.runtime_header_incr_posts_rel_child] / df.loc[:,utils.runtime_header_incr_child]
     utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel)
 
 def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False):
     df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges)
-    diff1 = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float')
-    diff2 = 1 - df[utils.header_runtime_incr_posts_child].astype('float') / df[utils.header_runtime_incr_child].astype('float')
-    diff3 = 1 - df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_incr_posts_child].astype('float')
-    diff4 = 1 - df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_parent].astype('float')
+    diff1 = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float')
+    diff2 = 1 - df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_incr_child].astype('float')
+    diff3 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_incr_posts_child].astype('float')
+    diff4 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_parent].astype('float')
     step = 0.01
     for i, diff in enumerate([diff1,diff2,diff3,diff4]):
         # output textwidth in latex with
@@ -97,9 +97,9 @@ def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=Fal
         for e in diff:
             if (xlimleft and e < xlimleft) or (xlimright and e > xlimright):
                 print("excluded", e, "from efficiency figure", i)
-    diff1 = df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float')
-    diff2 = df[utils.header_runtime_incr_posts_child].astype('float') / df[utils.header_runtime_parent].astype('float')
-    diff3 = df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_parent].astype('float')
+    diff1 = df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float')
+    diff2 = df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_parent].astype('float')
+    diff3 = df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_parent].astype('float')
     for n, diff in [("incr", diff1), ("+ incr postsolver", diff2), ("+ reluctant", diff3)]:
         print("80% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.8) * 100, "%")
         print("75% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.75) * 100, "%")
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index e62b1c411..c8cc214e0 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -29,10 +29,29 @@
 import matplotlib.pyplot as plt
 from matplotlib.ticker import ScalarFormatter
 
-header_runtime_parent = "Runtime for parent commit (non-incremental)"
-header_runtime_incr_child = "Runtime for commit (incremental)"
-header_runtime_incr_posts_child = "Runtime for commit (incremental + incr postsolver)"
-header_runtime_incr_posts_rel_child = "Runtime for commit (incremental + incr postsolver + reluctant)"
+runtime_prefix = "Runtime"
+analysis_prefix = "Analysis"
+solving_prefix = "Solving"
+
+header_parent = " for parent commit (non-incremental)"
+header_incr_child = " for commit (incremental)"
+header_incr_posts_child = " for commit (incremental + incr postsolver)"
+header_incr_posts_rel_child = " for commit (incremental + incr postsolver + reluctant)"
+
+runtime_header_parent = runtime_prefix + header_parent
+runtime_header_incr_child = runtime_prefix + header_incr_child
+runtime_header_incr_posts_child = runtime_prefix + header_incr_posts_child
+runtime_header_incr_posts_rel_child = runtime_prefix + header_incr_posts_rel_child
+
+analysis_header_parent = analysis_prefix + header_parent
+analysis_header_incr_child = analysis_prefix + header_incr_child
+analysis_header_incr_posts_child = analysis_prefix + header_incr_posts_child
+analysis_header_incr_posts_rel_child = analysis_prefix + header_incr_posts_rel_child
+
+solving_header_parent = solving_prefix + header_parent
+solving_header_incr_child = solving_prefix + header_incr_child
+solving_header_incr_posts_child = solving_prefix + header_incr_posts_child
+solving_header_incr_posts_rel_child = solving_prefix + header_incr_posts_rel_child
 
 preparelog = "prepare.log"
 analyzerlog = "analyzer.log"
@@ -126,10 +145,14 @@ def find_line(pattern, log):
 
 def extract_from_analyzer_log(log):
     runtime_pattern = 'Default[ ]+(?P<runtime>[0-9\.]+)s'
+    analysis_time_pattern = 'analysis[ ]+(?P<analysis_time>[0-9\.]+)s'
+    solving_time_pattern = 'solving[ ]+(?P<solving_time>[0-9\.]+)s'
     change_info_pattern = 'change_info = { unchanged = (?P<unchanged>[0-9]*); changed = (?P<changed>[0-9]*); added = (?P<added>[0-9]*); removed = (?P<removed>[0-9]*) }'
-    r = find_line(runtime_pattern, log)
+    runtime = find_line(runtime_pattern, log)
+    analysis_time = find_line(analysis_time_pattern, log)
+    solving_time = find_line(solving_time_pattern, log)
     ch = find_line(change_info_pattern, log) or {"unchanged": 0, "changed": 0, "added": 0, "removed": 0}
-    d = dict(list(r.items()) + list(ch.items()))
+    d = dict(list(runtime.items()) + list(analysis_time.items()) + list(solving_time.items()) + list(ch.items()))
     with open(log, "r") as file:
         num_racewarnings = file.read().count('[Warning][Race]')
         d["race_warnings"] = num_racewarnings
@@ -159,7 +182,7 @@ def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetect
 
     # clean dataset (remove all rows for which any of the runtime entries is 0 which means that the respective analysis
     # run failed)
-    df = df[(df[header_runtime_parent] != 0)]
+    df = df[(df[header_parent] != 0)]
     if filterRelCLOC:
         df = df[df["Relevant changed LOC"] > 0]
     if filterDetectedChanges:

From 203d9dc696c2f338e19cccfcf9afce348349141d Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 16 Dec 2022 10:00:51 +0100
Subject: [PATCH 40/84] Fix efficiency plot.

---
 scripts/incremental/benchmarking/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index c8cc214e0..9247c61c7 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -182,7 +182,7 @@ def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetect
 
     # clean dataset (remove all rows for which any of the runtime entries is 0 which means that the respective analysis
     # run failed)
-    df = df[(df[header_parent] != 0)]
+    df = df[(df[runtime_header_parent] != 0)]
     if filterRelCLOC:
         df = df[df["Relevant changed LOC"] > 0]
     if filterDetectedChanges:

From 9f9a4e2453dbcce77e445dc5997cee71c082fc03 Mon Sep 17 00:00:00 2001
From: stilscher <66023521+stilscher@users.noreply.github.com>
Date: Fri, 16 Dec 2022 15:29:17 +0100
Subject: [PATCH 41/84] add boxplot generation as alternative for precision
 graph

---
 scripts/incremental/benchmarking/plot.py  | 27 +++++++++++++++++++++++
 scripts/incremental/benchmarking/utils.py | 14 +++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 98474d84d..82004f600 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -104,6 +104,33 @@ def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=Fal
         print("80% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.8) * 100, "%")
         print("75% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.75) * 100, "%")
 
+def paper_precision_graph_box(results_precision, filename, outdir):
+    df = utils.get_data_from_json(os.path.join(results_precision, filename))
+
+    # Plot precision loss after x commits, where x is in {1, 2, 5, 10, 15}
+    lessprec1 = 'intermediate precision.1.precision.lessprec'
+    lessprec2 = 'intermediate precision.2.precision.lessprec'
+    lessprec5 = 'intermediate precision.5.precision.lessprec'
+    lessprec10 = 'intermediate precision.10.precision.lessprec'
+    lessprec15 = 'intermediate precision.15.precision.lessprec'
+    total1 = 'intermediate precision.1.precision.total'
+    total2 = 'intermediate precision.2.precision.total'
+    total5 = 'intermediate precision.5.precision.total'
+    total10 = 'intermediate precision.10.precision.total'
+    total15 = 'intermediate precision.15.precision.total'
+
+    x = [1,2,5,10,15]
+    data = []
+    lessprec = [lessprec1, lessprec2, lessprec5, lessprec10, lessprec15]
+    total = [total1, total2, total5, total10, total15]
+    for l, t in zip(lessprec, total):
+        ratio = df[l] / df[t]
+        data.append(ratio.dropna())
+
+    halftextwidth = 3.3
+    size=(halftextwidth,halftextwidth*2/3)
+    utils.quantile_plot(data, x, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size)
+
 
 def paper_precision_graph(results_precision, filename, outdir):
     df = utils.get_data_from_json(os.path.join(results_precision, filename))
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 9247c61c7..b74bc2806 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -307,7 +307,7 @@ def scatter_plot(data, xlabel, ylabel, outfile, size):
     width, height = size
     fig.set_size_inches(w=width, h=height)
     colors=['red','azure','blue','brown','chartreuse','chocolate','darkblue','darkgreen','seagreen','green','indigo','orangered','orange','coral','olive','mediumseagreen','grey','teal']
-    markers = ['x','+','o','s','p','*','D','d','v','^','<','>','1','2','3','4','H','P']
+    #markers = ['x','+','o','s','p','*','D','d','v','^','<','>','1','2','3','4','H','P']
     linestyles = ['dashed']
     for i, (x, y) in enumerate(data):
         plt.plot(x,y, marker='x', linewidth=0.4, markersize=1, alpha=0.85, color=colors[i % len(colors)], linestyle=linestyles[i % len(linestyles)])
@@ -316,3 +316,15 @@ def scatter_plot(data, xlabel, ylabel, outfile, size):
     plt.ylim(bottom=-0.005, top=0.19)
     plt.tight_layout(pad=0.4)
     plt.savefig(outfile)
+
+def quantile_plot(data, x, xlabel, ylabel, outfile, size):
+    fig = plt.figure()
+    width, height = size
+    fig.set_size_inches(w=width, h=height)
+    plt.boxplot(data, flierprops=dict(markersize=3), positions=x)
+    plt.xlabel(xlabel)
+    plt.ylabel(ylabel)
+    plt.ylim(bottom=-0.005, top=0.19)
+    plt.tight_layout(pad=0.4)
+    plt.savefig(outfile)
+    print(outfile)

From 67ff095e6123b91b9511a5d3c67ab7b280ad3bb1 Mon Sep 17 00:00:00 2001
From: stilscher <66023521+stilscher@users.noreply.github.com>
Date: Fri, 16 Dec 2022 15:30:39 +0100
Subject: [PATCH 42/84] fix naming

---
 scripts/incremental/benchmarking/plot.py  | 2 +-
 scripts/incremental/benchmarking/utils.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 82004f600..63bcd69cd 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -129,7 +129,7 @@ def paper_precision_graph_box(results_precision, filename, outdir):
 
     halftextwidth = 3.3
     size=(halftextwidth,halftextwidth*2/3)
-    utils.quantile_plot(data, x, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size)
+    utils.box_plot(data, x, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size)
 
 
 def paper_precision_graph(results_precision, filename, outdir):
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index b74bc2806..0491ee450 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -317,7 +317,7 @@ def scatter_plot(data, xlabel, ylabel, outfile, size):
     plt.tight_layout(pad=0.4)
     plt.savefig(outfile)
 
-def quantile_plot(data, x, xlabel, ylabel, outfile, size):
+def box_plot(data, x, xlabel, ylabel, outfile, size):
     fig = plt.figure()
     width, height = size
     fig.set_size_inches(w=width, h=height)

From f358d136d193f67192a0594a5383d1010f7563df Mon Sep 17 00:00:00 2001
From: stilscher <66023521+stilscher@users.noreply.github.com>
Date: Fri, 16 Dec 2022 15:34:41 +0100
Subject: [PATCH 43/84] fix ylim

---
 scripts/incremental/benchmarking/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 0491ee450..798fbbcc1 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -324,7 +324,6 @@ def box_plot(data, x, xlabel, ylabel, outfile, size):
     plt.boxplot(data, flierprops=dict(markersize=3), positions=x)
     plt.xlabel(xlabel)
     plt.ylabel(ylabel)
-    plt.ylim(bottom=-0.005, top=0.19)
     plt.tight_layout(pad=0.4)
     plt.savefig(outfile)
     print(outfile)

From 1c6f0182fe8a129c1ad874aee3db2793a520fb51 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sat, 17 Dec 2022 11:19:14 +0100
Subject: [PATCH 44/84] Extract walltime instead of CPU time. This adapts to
 the changed output format of Goblints runtime stats.

---
 scripts/incremental/benchmarking/utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 798fbbcc1..4a98d8f7e 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -144,9 +144,10 @@ def find_line(pattern, log):
         return None
 
 def extract_from_analyzer_log(log):
-    runtime_pattern = 'Default[ ]+(?P<runtime>[0-9\.]+)s'
-    analysis_time_pattern = 'analysis[ ]+(?P<analysis_time>[0-9\.]+)s'
-    solving_time_pattern = 'solving[ ]+(?P<solving_time>[0-9\.]+)s'
+    # First comes the cpu time (which is ignored); we look at the walltime.
+    runtime_pattern = 'Default[ ]+[0-9\.]+s[ ]+(?P<runtime>[0-9\.]+)s'
+    analysis_time_pattern = 'analysis[ ]+[0-9\.]+s[ ]+(?P<analysis_time>[0-9\.]+)s'
+    solving_time_pattern = 'solving[ ]+[0-9\.]+s[ ]+(?P<solving_time>[0-9\.]+)s'
     change_info_pattern = 'change_info = { unchanged = (?P<unchanged>[0-9]*); changed = (?P<changed>[0-9]*); added = (?P<added>[0-9]*); removed = (?P<removed>[0-9]*) }'
     runtime = find_line(runtime_pattern, log)
     analysis_time = find_line(analysis_time_pattern, log)

From 621e84dd6990d6e9c34fe4988de91ab9903cfd46 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sat, 17 Dec 2022 14:02:15 +0100
Subject: [PATCH 45/84] Peform non-incremental run on child commit as well.

---
 .../incremental/benchmarking/efficiency.py    | 51 ++++++++++++-------
 scripts/incremental/benchmarking/plot.py      | 18 +++----
 scripts/incremental/benchmarking/utils.py     |  4 ++
 3 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 5672f9062..fea267198 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -113,6 +113,13 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
             add_options = default_options + ['--disable', 'incremental.load', '--enable', 'incremental.save']
             utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options, files)
 
+            #print('And now analyze', str(commit.hash), 'from scratch.')
+            outchild_non_incr = os.path.join(outtry, 'child-non-incr')
+            os.makedirs(outchild_non_incr)
+            # Do not save in this run to not pollute results
+            add_options = default_options + ['--disable', 'incremental.load', '--disable', 'incremental.save']
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild_non_incr, conf_base, add_options, files)
+
             #print('And now analyze', str(commit.hash), 'incrementally.')
             outchild = os.path.join(outtry, 'child')
             os.makedirs(outchild)
@@ -120,16 +127,16 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
             utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options, files)
 
             #print('And again incremental, this time with incremental postsolver')
-            outchildincrpost = os.path.join(outtry, 'child-incr-post')
-            os.makedirs(outchildincrpost)
+            outchild_incr_post = os.path.join(outtry, 'child-incr-post')
+            os.makedirs(outchild_incr_post)
             add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save']
-            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options, files)
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild_incr_post, conf_incrpost, add_options, files)
 
             #print('And again incremental, this time with incremental postsolver and reluctant')
-            outchildrel = os.path.join(outtry, 'child-rel')
-            os.makedirs(outchildrel)
+            outchild_rel = os.path.join(outtry, 'child-rel')
+            os.makedirs(outchild_rel)
             add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled']
-            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options, files)
+            utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild_rel, conf_incrpost, add_options, files)
 
             count_analyzed+=1
             failed = False
@@ -149,18 +156,19 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
 
 def collect_data(outdir):
     data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [],
-      utils.runtime_header_parent: [], utils.runtime_header_incr_child: [],
+      utils.runtime_header_parent: [], utils.runtime_header_non_incr_child: [], utils.runtime_header_incr_child: [],
       utils.runtime_header_incr_posts_child: [], utils.runtime_header_incr_posts_rel_child: [],
-      utils.analysis_header_parent: [], utils.analysis_header_incr_child: [],
+      utils.analysis_header_parent: [], utils.analysis_header_non_incr_child: [], utils.analysis_header_incr_child: [],
       utils.analysis_header_incr_posts_child: [], utils.analysis_header_incr_posts_rel_child: [],
-      utils.solving_header_parent: [], utils.solving_header_incr_child: [],
+      utils.solving_header_parent: [], utils.solving_header_non_incr_child: [], utils.solving_header_incr_child: [],
       utils.solving_header_incr_posts_child: [], utils.solving_header_incr_posts_rel_child: [],
       "Change in number of race warnings": []}
     for t in os.listdir(outdir):
-        parentlog = os.path.join(outdir, t, 'parent', utils.analyzerlog)
-        childlog = os.path.join(outdir, t, 'child', utils.analyzerlog)
-        childpostslog = os.path.join(outdir, t, 'child-incr-post', utils.analyzerlog)
-        childpostsrellog = os.path.join(outdir, t, 'child-rel', utils.analyzerlog)
+        parent_log = os.path.join(outdir, t, 'parent', utils.analyzerlog)
+        child_non_incr_log = os.path.join(outdir, t, 'child-non-incr', utils.analyzerlog)
+        child_log = os.path.join(outdir, t, 'child', utils.analyzerlog)
+        child_posts_log = os.path.join(outdir, t, 'child-incr-post', utils.analyzerlog)
+        child_posts_rel_log = os.path.join(outdir, t, 'child-rel', utils.analyzerlog)
         commit_prop_log = os.path.join(outdir, t, 'commit_properties.log')
         t = int(t)
         commit_prop = json.load(open(commit_prop_log, "r"))
@@ -170,14 +178,17 @@ def collect_data(outdir):
         data["Commit"].append(commit_prop["hash"][:7])
         if commit_prop["failed"] == True:
             data[utils.runtime_header_parent].append(0)
+            data[utils.runtime_header_non_incr_child].append(0)
             data[utils.runtime_header_incr_child].append(0)
             data[utils.runtime_header_incr_posts_child].append(0)
             data[utils.runtime_header_incr_posts_rel_child].append(0)
             data[utils.analysis_header_parent].append(0)
+            data[utils.analysis_header_non_incr_child].append(0)
             data[utils.analysis_header_incr_child].append(0)
             data[utils.analysis_header_incr_posts_child].append(0)
             data[utils.analysis_header_incr_posts_rel_child].append(0)
             data[utils.solving_header_parent].append(0)
+            data[utils.solving_header_non_incr_child].append(0)
             data[utils.solving_header_incr_child].append(0)
             data[utils.solving_header_incr_posts_child].append(0)
             data[utils.solving_header_incr_posts_rel_child].append(0)
@@ -185,23 +196,27 @@ def collect_data(outdir):
             data["Changed/Added/Removed functions"].append(0)
             data["Change in number of race warnings"].append(0)
             continue
-        parent_info = utils.extract_from_analyzer_log(parentlog)
-        child_info = utils.extract_from_analyzer_log(childlog)
-        child_posts_info = utils.extract_from_analyzer_log(childpostslog)
-        child_posts_rel_info = utils.extract_from_analyzer_log(childpostsrellog)
+
+        parent_info = utils.extract_from_analyzer_log(parent_log)
+        child_non_incr_info = utils.extract_from_analyzer_log(child_non_incr_log)
+        child_info = utils.extract_from_analyzer_log(child_log)
+        child_posts_info = utils.extract_from_analyzer_log(child_posts_log)
+        child_posts_rel_info = utils.extract_from_analyzer_log(child_posts_rel_log)
         data["Changed/Added/Removed functions"].append(int(child_info["changed"]) + int(child_info["added"]) + int(child_info["removed"]))
         data[utils.runtime_header_parent].append(float(parent_info["runtime"]))
+        data[utils.runtime_header_non_incr_child].append(float(child_non_incr_info["runtime"]))
         data[utils.runtime_header_incr_child].append(float(child_info["runtime"]))
         data[utils.runtime_header_incr_posts_child].append(float(child_posts_info["runtime"]))
         data[utils.runtime_header_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"]))
 
-
         data[utils.analysis_header_parent].append(float(parent_info["analysis_time"]))
+        data[utils.analysis_header_non_incr_child].append(float(child_non_incr_info["analysis_time"]))
         data[utils.analysis_header_incr_child].append(float(child_info["analysis_time"]))
         data[utils.analysis_header_incr_posts_child].append(float(child_posts_info["analysis_time"]))
         data[utils.analysis_header_incr_posts_rel_child].append(float(child_posts_rel_info["analysis_time"]))
 
         data[utils.solving_header_parent].append(float(parent_info["solving_time"]))
+        data[utils.solving_header_non_incr_child].append(float(child_non_incr_info["solving_time"]))
         data[utils.solving_header_incr_child].append(float(child_info["solving_time"]))
         data[utils.solving_header_incr_posts_child].append(float(child_posts_info["solving_time"]))
         data[utils.solving_header_incr_posts_rel_child].append(float(child_posts_rel_info["solving_time"]))
diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 63bcd69cd..0df4057ae 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -13,7 +13,7 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
-    data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_parent, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child])
+    data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child])
     datanonincr = {"values": data[0], "label": description_non_incr}
     dataincr = {"values": data[1], "label": description_incr}
 
@@ -30,7 +30,7 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
 
-    data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_parent, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child])
+    data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child])
     data_non_incr = {"values": data[0], "label": description_non_incr}
     data_incr = {"values": data[1], "label": description_incr}
     data_incr_post = {"values": data[2], "label": description_incr_post}
@@ -41,7 +41,7 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
     df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
 
     # plot incremental vs non-incremental
-    diff = df.loc[:,utils.runtime_header_parent] - df.loc[:,utils.runtime_header_incr_child]
+    diff = df.loc[:,utils.runtime_header_non_incr_child] - df.loc[:,utils.runtime_header_incr_child]
     utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr)
 
     # plot reluctant vs. basic incremental
@@ -53,7 +53,7 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
 
     # plot incremental vs non-incremental
     print(df[utils.runtime_header_incr_child].astype('float'))
-    diff = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float')
+    diff = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float')
     utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr)
 
     # plot reluctant vs. basic incremental
@@ -62,10 +62,10 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
 
 def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False):
     df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges)
-    diff1 = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float')
+    diff1 = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float')
     diff2 = 1 - df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_incr_child].astype('float')
     diff3 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_incr_posts_child].astype('float')
-    diff4 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_parent].astype('float')
+    diff4 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float')
     step = 0.01
     for i, diff in enumerate([diff1,diff2,diff3,diff4]):
         # output textwidth in latex with
@@ -97,9 +97,9 @@ def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=Fal
         for e in diff:
             if (xlimleft and e < xlimleft) or (xlimright and e > xlimright):
                 print("excluded", e, "from efficiency figure", i)
-    diff1 = df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float')
-    diff2 = df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_parent].astype('float')
-    diff3 = df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_parent].astype('float')
+    diff1 = df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float')
+    diff2 = df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float')
+    diff3 = df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float')
     for n, diff in [("incr", diff1), ("+ incr postsolver", diff2), ("+ reluctant", diff3)]:
         print("80% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.8) * 100, "%")
         print("75% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.75) * 100, "%")
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 4a98d8f7e..79d5e9027 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -34,21 +34,25 @@
 solving_prefix = "Solving"
 
 header_parent = " for parent commit (non-incremental)"
+header_non_incr_child = " for commit (non-incremental)"
 header_incr_child = " for commit (incremental)"
 header_incr_posts_child = " for commit (incremental + incr postsolver)"
 header_incr_posts_rel_child = " for commit (incremental + incr postsolver + reluctant)"
 
 runtime_header_parent = runtime_prefix + header_parent
+runtime_header_non_incr_child = runtime_prefix + header_non_incr_child
 runtime_header_incr_child = runtime_prefix + header_incr_child
 runtime_header_incr_posts_child = runtime_prefix + header_incr_posts_child
 runtime_header_incr_posts_rel_child = runtime_prefix + header_incr_posts_rel_child
 
 analysis_header_parent = analysis_prefix + header_parent
+analysis_header_non_incr_child = analysis_prefix + header_non_incr_child
 analysis_header_incr_child = analysis_prefix + header_incr_child
 analysis_header_incr_posts_child = analysis_prefix + header_incr_posts_child
 analysis_header_incr_posts_rel_child = analysis_prefix + header_incr_posts_rel_child
 
 solving_header_parent = solving_prefix + header_parent
+solving_header_non_incr_child = solving_prefix + header_non_incr_child
 solving_header_incr_child = solving_prefix + header_incr_child
 solving_header_incr_posts_child = solving_prefix + header_incr_posts_child
 solving_header_incr_posts_rel_child = solving_prefix + header_incr_posts_rel_child

From 7eba011e8904dc59cbfd755c370cf69c6586473c Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sun, 18 Dec 2022 10:09:38 +0100
Subject: [PATCH 46/84] Add script to run all efficiency scripts.

---
 .../incremental/benchmarking/run_efficiency.sh  | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100755 scripts/incremental/benchmarking/run_efficiency.sh

diff --git a/scripts/incremental/benchmarking/run_efficiency.sh b/scripts/incremental/benchmarking/run_efficiency.sh
new file mode 100755
index 000000000..e6a284312
--- /dev/null
+++ b/scripts/incremental/benchmarking/run_efficiency.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+ANALYZER_DIR=$1
+
+#Number of cores to be used
+NCORES=$2
+
+echo "Starting run on figlet"
+python3 efficiency.py $ANALYZER_DIR figlet $NCORES
+mv result_efficiency result_efficiency_figlet
+
+echo "Starting run on chrony"
+python3 efficiency.py $ANALYZER_DIR chrony $NCORES
+mv result_efficiency result_efficiency_chrony
+
+echo "Starting run on zstd"
+python3 efficiency.py $ANALYZER_DIR zstd $NCORES
+mv result_efficiency result_efficiency_zstd

From d19d7cb4608d8532c3b674eadfc0bce69129a5a9 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Mon, 19 Dec 2022 15:46:12 +0100
Subject: [PATCH 47/84] Change line style for efficiency plots, text for
 configuration description.

---
 scripts/incremental/benchmarking/utils.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 79d5e9027..e97cd8846 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -236,11 +236,18 @@ def cummulative_distr_plot(data_sets, base, figure_dir, outfile, figsize=None, t
     else:
         plt.figure()
     min = sys.maxsize
+
+    linestyle_tuple = [
+        "solid",
+        "--",
+        (0, (10, 1)), # long dash
+        (0, (3, 1, 1, 1)) # dash dots
+    ]
     for d in data_sets:
         min_d = d["values"].min()
         if min_d < min:
             min = min_d
-        plt.plot(d["values"], base, label=d["label"])
+        plt.plot(d["values"], base, linestyle=linestyle_tuple.pop(0), label=d["label"])
     plt.xlabel('Number of Commits')
     if logscale:
         plt.ylabel('Runtime in s ($log_{2}$ scale)')

From fa658ffea912a7e880419fb7d37b2aa53e9c2355 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 20 Dec 2022 12:15:04 +0100
Subject: [PATCH 48/84] Add run_precision script.

---
 .../incremental/benchmarking/run_precision.sh   | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100755 scripts/incremental/benchmarking/run_precision.sh

diff --git a/scripts/incremental/benchmarking/run_precision.sh b/scripts/incremental/benchmarking/run_precision.sh
new file mode 100755
index 000000000..9e86c8682
--- /dev/null
+++ b/scripts/incremental/benchmarking/run_precision.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+ANALYZER_DIR=$1
+
+#Number of cores to be used
+NCORES=$2
+
+echo "Starting run on figlet"
+python3 precision.py $ANALYZER_DIR figlet $NCORES
+mv result_precision result_precision_figlet
+
+echo "Starting run on zstd"
+python3 precision.py $ANALYZER_DIR zstd $NCORES
+mv result_precision result_precision_zstd
+
+echo "Starting run on chrony"
+python3 precision.py $ANALYZER_DIR chrony $NCORES
+mv result_precision result_precision_chrony

From 24d7a9b2f6d5a21efb668bbaab78b22c0630b167 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Wed, 21 Dec 2022 15:20:04 +0100
Subject: [PATCH 49/84] Use chrony configuration for chrony.

---
 scripts/incremental/benchmarking/projects.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index f04b63aec..80315ebbe 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -71,8 +71,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     url="https://github.com/mlichvar/chrony.git",
     repo_name="chrony",
     build_compdb="../build/build_compdb_chrony.sh",
-    conf_base=os.path.join("custom", "figlet"),
-    conf_incrpost=os.path.join("custom", "figlet-incrpostsolver"),
+    conf_base=os.path.join("custom", "chrony"),
+    conf_incrpost=os.path.join("custom", "chrony-incrpostsolver"),
     begin=datetime.datetime(2020, 1, 1),
     to=datetime.datetime(2022, 10, 10),
     diff_exclude=[],

From bfc219a31a99474716f063becf76bb96590e2a04 Mon Sep 17 00:00:00 2001
From: stilscher <66023521+stilscher@users.noreply.github.com>
Date: Fri, 23 Dec 2022 16:26:52 +0100
Subject: [PATCH 50/84] reset current time to static value for chrony

---
 scripts/incremental/build/build_compdb_chrony.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/incremental/build/build_compdb_chrony.sh b/scripts/incremental/build/build_compdb_chrony.sh
index 0ba4a563c..f222e093a 100755
--- a/scripts/incremental/build/build_compdb_chrony.sh
+++ b/scripts/incremental/build/build_compdb_chrony.sh
@@ -4,3 +4,4 @@ git clean -fdx
 make -j 1 chronyd | tee build.log
 compiledb --parse build.log
 # ./configure && bear -- make chronyd
+sed -i -E 's/#define NTP_ERA_SPLIT \([0-9]+LL/#define NTP_ERA_SPLIT \(1671796396LL/' config.h

From 294cf9a1dc062347d92df0cfc8eece0320420513 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sun, 25 Dec 2022 17:25:24 +0100
Subject: [PATCH 51/84] Precision: Run figlet and zstd.

---
 scripts/incremental/benchmarking/run_precision.sh | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/scripts/incremental/benchmarking/run_precision.sh b/scripts/incremental/benchmarking/run_precision.sh
index 9e86c8682..e94a3d580 100755
--- a/scripts/incremental/benchmarking/run_precision.sh
+++ b/scripts/incremental/benchmarking/run_precision.sh
@@ -11,7 +11,3 @@ mv result_precision result_precision_figlet
 echo "Starting run on zstd"
 python3 precision.py $ANALYZER_DIR zstd $NCORES
 mv result_precision result_precision_zstd
-
-echo "Starting run on chrony"
-python3 precision.py $ANALYZER_DIR chrony $NCORES
-mv result_precision result_precision_chrony

From 6ad3b2013865e796fb47f4e35500a3f350c4f9ac Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Wed, 28 Dec 2022 15:13:05 +0100
Subject: [PATCH 52/84] Change ymax for precision graphs: 0.3

---
 scripts/incremental/benchmarking/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index e97cd8846..a527f57bf 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -325,7 +325,7 @@ def scatter_plot(data, xlabel, ylabel, outfile, size):
         plt.plot(x,y, marker='x', linewidth=0.4, markersize=1, alpha=0.85, color=colors[i % len(colors)], linestyle=linestyles[i % len(linestyles)])
     plt.xlabel(xlabel)
     plt.ylabel(ylabel)
-    plt.ylim(bottom=-0.005, top=0.19)
+    plt.ylim(bottom=-0.005, top=0.3)
     plt.tight_layout(pad=0.4)
     plt.savefig(outfile)
 

From 696506f5450a0463b4312367a0dda3de30c99955 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 30 Dec 2022 12:26:04 +0100
Subject: [PATCH 53/84] Change text for different configurations to
 enumeration.

---
 scripts/incremental/benchmarking/plot.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 0df4057ae..da4ef09fb 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -2,10 +2,10 @@
 import os
 import shutil
 
-description_non_incr = "Non-incremental analysis"
-description_incr = "Incremental analysis"
-description_incr_post = "Incremental analysis with incremental postsolver"
-description_incr_rel ="Reluctant incremental analysis with incremental postsolver"
+description_non_incr = "(1)"
+description_incr = "(2)"
+description_incr_post = "(3)"
+description_incr_rel ="(4)"
 
 def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000

From e671d9cbcb6c0819dde2f0f03432d511d672816d Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 30 Dec 2022 17:00:06 +0100
Subject: [PATCH 54/84] Update size of fonts for precision graph.

---
 scripts/incremental/benchmarking/utils.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index a527f57bf..da2e7642a 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -17,13 +17,13 @@
     'pgf.rcfonts': False,
     'text.usetex': True,
     'font.family': 'serif',
-    'font.size': 9,
-    'axes.titlesize': 9,
-    'legend.fontsize': 9,
-    'figure.titlesize': 9,
+    'font.size': 6,
+    'axes.titlesize': 6,
+    'legend.fontsize': 6,
+    'figure.titlesize': 7,
     'figure.dpi': 300,
-    'xtick.labelsize': 9,
-    'ytick.labelsize': 9,
+    'xtick.labelsize': 6,
+    'ytick.labelsize': 6,
 
 })
 import matplotlib.pyplot as plt

From 6333415e9b5b5157b3b462b673726faa93c97488 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sat, 31 Dec 2022 18:40:10 +0100
Subject: [PATCH 55/84] Precision plot: Add ticks at every commit.

---
 scripts/incremental/benchmarking/utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index da2e7642a..4dddc6c43 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -11,6 +11,8 @@
 import numpy as np
 import brokenaxes
 import matplotlib as mpl
+import matplotlib.ticker as mticker
+
 mpl.use("pgf")
 mpl.rcParams.update({
     "pgf.texsystem": "pdflatex",
@@ -325,6 +327,8 @@ def scatter_plot(data, xlabel, ylabel, outfile, size):
         plt.plot(x,y, marker='x', linewidth=0.4, markersize=1, alpha=0.85, color=colors[i % len(colors)], linestyle=linestyles[i % len(linestyles)])
     plt.xlabel(xlabel)
     plt.ylabel(ylabel)
+    plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1))
+    # plt.xticks([1,2,5,10,15])
     plt.ylim(bottom=-0.005, top=0.3)
     plt.tight_layout(pad=0.4)
     plt.savefig(outfile)

From b3838d378b0d0919452860597e65b7c01ebca962 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 15 Jun 2023 10:54:43 +0200
Subject: [PATCH 56/84] Move confs for figlet, chrony, zstd into bench repo,
 adapt scripts for it.

---
 .../conf/chrony-incrpostsolver.json           | 114 ++++++++++++++++
 .../incremental/benchmarking/conf/chrony.json | 114 ++++++++++++++++
 .../conf/figlet-incrpostsolver.json           | 103 +++++++++++++++
 .../incremental/benchmarking/conf/figlet.json | 103 +++++++++++++++
 .../conf/zstd-race-incrpostsolver.json        | 122 ++++++++++++++++++
 .../benchmarking/conf/zstd-race.json          | 122 ++++++++++++++++++
 .../incremental/benchmarking/efficiency.py    |   2 +
 scripts/incremental/benchmarking/projects.py  |  16 +--
 scripts/incremental/benchmarking/utils.py     |   6 +-
 9 files changed, 691 insertions(+), 11 deletions(-)
 create mode 100644 scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json
 create mode 100644 scripts/incremental/benchmarking/conf/chrony.json
 create mode 100644 scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json
 create mode 100644 scripts/incremental/benchmarking/conf/figlet.json
 create mode 100644 scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json
 create mode 100644 scripts/incremental/benchmarking/conf/zstd-race.json

diff --git a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json
new file mode 100644
index 000000000..8a97510dd
--- /dev/null
+++ b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json
@@ -0,0 +1,114 @@
+{
+  "ana": {
+    "activated": [
+      "expRelation",
+      "base",
+      "threadid",
+      "threadflag",
+      "threadreturn",
+      "escape",
+      "mutexEvents",
+      "mutex",
+      "access",
+      "mallocWrapper",
+      "mhp",
+      "symb_locks",
+      "var_eq",
+      "mallocFresh",
+      "race"
+    ],
+    "ctx_insens": [
+      "var_eq"
+    ],
+    "base": {
+      "privatization": "none",
+      "context": {
+        "non-ptr": false
+      }
+    },
+    "thread": {
+      "domain": "plain",
+      "include-node": false
+    },
+    "race": {
+      "free": false
+    },
+    "dead-code": {
+      "lines": true
+    },
+    "int": {
+      "interval": true,
+      "def_exc": true
+    },
+    "malloc": {
+      "wrappers": [
+        "Malloc",
+        "Realloc",
+        "Malloc2",
+        "Realloc2",
+        "ARR_CreateInstance",
+        "realloc_array",
+        "ARR_GetNewElement"
+      ]
+    }
+  },
+  "sem": {
+    "unknown_function": {
+      "spawn": false,
+      "invalidate": {
+        "globals": false,
+        "args": false
+      }
+    }
+  },
+  "solvers": {
+    "td3": {
+      "restart": {
+        "wpoint": {
+          "enabled": false
+        }
+      }
+    }
+  },
+  "exp": {
+    "earlyglobs": true
+  },
+  "cil": {
+    "merge": {
+      "inlines": false
+    }
+  },
+  "dbg": {
+    "timing": {
+      "enabled": true
+    }
+  },
+  "warn": {
+    "assert": false,
+    "behavior": false,
+    "integer": false,
+    "cast": false,
+    "race": true,
+    "deadcode": true,
+    "analyzer": false,
+    "unsound": true,
+    "imprecise": false,
+    "unknown": false,
+    "error": false,
+    "warning": true,
+    "info": false,
+    "debug": false,
+    "success": true
+  },
+  "incremental": {
+    "postsolver": {
+      "enabled": true
+    },
+    "restart": {
+      "sided": {
+        "enabled": false
+      },
+      "write-only": true
+    }
+  }
+}
\ No newline at end of file
diff --git a/scripts/incremental/benchmarking/conf/chrony.json b/scripts/incremental/benchmarking/conf/chrony.json
new file mode 100644
index 000000000..a2fe392e4
--- /dev/null
+++ b/scripts/incremental/benchmarking/conf/chrony.json
@@ -0,0 +1,114 @@
+{
+  "ana": {
+    "activated": [
+      "expRelation",
+      "base",
+      "threadid",
+      "threadflag",
+      "threadreturn",
+      "escape",
+      "mutexEvents",
+      "mutex",
+      "access",
+      "mallocWrapper",
+      "mhp",
+      "symb_locks",
+      "var_eq",
+      "mallocFresh",
+      "race"
+    ],
+    "ctx_insens": [
+      "var_eq"
+    ],
+    "base": {
+      "privatization": "none",
+      "context": {
+        "non-ptr": false
+      }
+    },
+    "thread": {
+      "domain": "plain",
+      "include-node": false
+    },
+    "race": {
+      "free": false
+    },
+    "dead-code": {
+      "lines": true
+    },
+    "int": {
+      "interval": true,
+      "def_exc": true
+    },
+    "malloc": {
+      "wrappers": [
+        "Malloc",
+        "Realloc",
+        "Malloc2",
+        "Realloc2",
+        "ARR_CreateInstance",
+        "realloc_array",
+        "ARR_GetNewElement"
+      ]
+    }
+  },
+  "sem": {
+    "unknown_function": {
+      "spawn": false,
+      "invalidate": {
+        "globals": false,
+        "args": false
+      }
+    }
+  },
+  "solvers": {
+    "td3": {
+      "restart": {
+        "wpoint": {
+          "enabled": false
+        }
+      }
+    }
+  },
+  "exp": {
+    "earlyglobs": true
+  },
+  "cil": {
+    "merge": {
+      "inlines": false
+    }
+  },
+  "dbg": {
+    "timing": {
+      "enabled": true
+    }
+  },
+  "warn": {
+    "assert": false,
+    "behavior": false,
+    "integer": false,
+    "cast": false,
+    "race": true,
+    "deadcode": true,
+    "analyzer": false,
+    "unsound": true,
+    "imprecise": false,
+    "unknown": false,
+    "error": false,
+    "warning": true,
+    "info": false,
+    "debug": false,
+    "success": true
+  },
+  "incremental": {
+    "postsolver": {
+      "enabled": false
+    },
+    "restart": {
+      "sided": {
+        "enabled": false
+      },
+      "write-only": true
+    }
+  }
+}
\ No newline at end of file
diff --git a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json
new file mode 100644
index 000000000..46ad26fce
--- /dev/null
+++ b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json
@@ -0,0 +1,103 @@
+{
+  "ana": {
+    "activated": [
+      "expRelation",
+      "base",
+      "threadid",
+      "threadflag",
+      "threadreturn",
+      "escape",
+      "mutexEvents",
+      "mutex",
+      "access",
+      "mallocWrapper",
+      "mhp",
+      "symb_locks",
+      "var_eq",
+      "mallocFresh",
+      "race"
+    ],
+    "ctx_insens": [
+      "var_eq"
+    ],
+    "base": {
+      "privatization": "none",
+      "context": {
+        "non-ptr": false
+      }
+    },
+    "thread": {
+      "domain": "plain",
+      "include-node": false
+    },
+    "race": {
+      "free": false
+    },
+    "dead-code": {
+      "lines": true
+    },
+    "int": {
+      "interval": true,
+      "def_exc": true
+    }
+  },
+  "sem": {
+    "unknown_function": {
+      "spawn": false,
+      "invalidate": {
+        "globals": false,
+        "args": false
+      }
+    }
+  },
+  "solvers": {
+    "td3": {
+      "restart": {
+        "wpoint": {
+          "enabled": false
+        }
+      }
+    }
+  },
+  "exp": {
+    "earlyglobs": true
+  },
+  "cil": {
+    "merge": {
+      "inlines": false
+    }
+  },
+  "dbg": {
+    "timing": {
+      "enabled": true
+    }
+  },
+  "warn": {
+    "assert": false,
+    "behavior": false,
+    "integer": false,
+    "cast": false,
+    "race": true,
+    "deadcode": true,
+    "analyzer": false,
+    "unsound": true,
+    "imprecise": false,
+    "unknown": false,
+    "error": false,
+    "warning": true,
+    "info": false,
+    "debug": false,
+    "success": true
+  },
+  "incremental": {
+    "postsolver": {
+      "enabled": true
+    },
+    "restart": {
+      "sided": {
+        "enabled": false
+      },
+      "write-only": true
+    }
+  }
+}
\ No newline at end of file
diff --git a/scripts/incremental/benchmarking/conf/figlet.json b/scripts/incremental/benchmarking/conf/figlet.json
new file mode 100644
index 000000000..3e80b8ffe
--- /dev/null
+++ b/scripts/incremental/benchmarking/conf/figlet.json
@@ -0,0 +1,103 @@
+{
+  "ana": {
+    "activated": [
+      "expRelation",
+      "base",
+      "threadid",
+      "threadflag",
+      "threadreturn",
+      "escape",
+      "mutexEvents",
+      "mutex",
+      "access",
+      "mallocWrapper",
+      "mhp",
+      "symb_locks",
+      "var_eq",
+      "mallocFresh",
+      "race"
+    ],
+    "ctx_insens": [
+      "var_eq"
+    ],
+    "base": {
+      "privatization": "none",
+      "context": {
+        "non-ptr": false
+      }
+    },
+    "thread": {
+      "domain": "plain",
+      "include-node": false
+    },
+    "race": {
+      "free": false
+    },
+    "dead-code": {
+      "lines": true
+    },
+    "int": {
+      "interval": true,
+      "def_exc": true
+    }
+  },
+  "sem": {
+    "unknown_function": {
+      "spawn": false,
+      "invalidate": {
+        "globals": false,
+        "args": false
+      }
+    }
+  },
+  "solvers": {
+    "td3": {
+      "restart": {
+        "wpoint": {
+          "enabled": false
+        }
+      }
+    }
+  },
+  "exp": {
+    "earlyglobs": true
+  },
+  "cil": {
+    "merge": {
+      "inlines": false
+    }
+  },
+  "dbg": {
+    "timing": {
+      "enabled": true
+    }
+  },
+  "warn": {
+    "assert": false,
+    "behavior": false,
+    "integer": false,
+    "cast": false,
+    "race": true,
+    "deadcode": true,
+    "analyzer": false,
+    "unsound": true,
+    "imprecise": false,
+    "unknown": false,
+    "error": false,
+    "warning": true,
+    "info": false,
+    "debug": false,
+    "success": true
+  },
+  "incremental": {
+    "postsolver": {
+      "enabled": false
+    },
+    "restart": {
+      "sided": {
+        "enabled": false
+      },
+      "write-only": true
+    }
+  }
+}
\ No newline at end of file
diff --git a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json
new file mode 100644
index 000000000..dbe858b98
--- /dev/null
+++ b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json
@@ -0,0 +1,122 @@
+{
+  "ana": {
+    "activated": [
+      "expRelation",
+      "base",
+      "threadid",
+      "threadflag",
+      "threadreturn",
+      "escape",
+      "mutexEvents",
+      "mutex",
+      "access",
+      "mallocWrapper",
+      "mhp",
+      "symb_locks",
+      "var_eq",
+      "mallocFresh",
+      "race"
+    ],
+    "ctx_insens": [
+      "var_eq"
+    ],
+    "base": {
+      "privatization": "none",
+      "context": {
+        "non-ptr": false
+      }
+    },
+    "thread": {
+      "domain": "plain",
+      "include-node": false
+    },
+    "malloc": {
+      "wrappers": [
+        "ZSTD_customMalloc",
+        "ZSTD_customCalloc"
+      ]
+    },
+    "race": {
+      "free": false
+    },
+    "dead-code": {
+      "lines": true
+    },
+    "int": {
+      "interval": true,
+      "def_exc": true
+    }
+  },
+  "sem": {
+    "unknown_function": {
+      "spawn": false,
+      "invalidate": {
+        "globals": false,
+        "args": false
+      }
+    }
+  },
+  "solvers": {
+    "td3": {
+      "restart": {
+        "wpoint": {
+          "enabled": false
+        }
+      }
+    }
+  },
+  "exp": {
+    "earlyglobs": true,
+    "extraspecials": [
+      "ZSTD_customMalloc",
+      "ZSTD_customCalloc",
+      "ZSTD_customFree"
+    ]
+  },
+  "pre": {
+    "cppflags": [
+      "-DZSTD_NO_INTRINSICS",
+      "-D_FORTIFY_SOURCE=0",
+      "-DGOBLINT_NO_ASSERT",
+      "-DGOBLINT_NO_BSEARCH"
+    ]
+  },
+  "cil": {
+    "merge": {
+      "inlines": false
+    }
+  },
+  "dbg": {
+    "timing": {
+      "enabled": true
+    }
+  },
+  "warn": {
+    "assert": false,
+    "behavior": false,
+    "integer": false,
+    "cast": false,
+    "race": true,
+    "deadcode": true,
+    "analyzer": false,
+    "unsound": true,
+    "imprecise": false,
+    "unknown": false,
+    "error": false,
+    "warning": true,
+    "info": false,
+    "debug": false,
+    "success": true
+  },
+  "incremental": {
+    "postsolver": {
+      "enabled": true
+    },
+    "restart": {
+      "sided": {
+        "enabled": false
+      },
+      "write-only": true
+    }
+  }
+}
\ No newline at end of file
diff --git a/scripts/incremental/benchmarking/conf/zstd-race.json b/scripts/incremental/benchmarking/conf/zstd-race.json
new file mode 100644
index 000000000..b3c4a49ac
--- /dev/null
+++ b/scripts/incremental/benchmarking/conf/zstd-race.json
@@ -0,0 +1,122 @@
+{
+  "ana": {
+    "activated": [
+      "expRelation",
+      "base",
+      "threadid",
+      "threadflag",
+      "threadreturn",
+      "escape",
+      "mutexEvents",
+      "mutex",
+      "access",
+      "mallocWrapper",
+      "mhp",
+      "symb_locks",
+      "var_eq",
+      "mallocFresh",
+      "race"
+    ],
+    "ctx_insens": [
+      "var_eq"
+    ],
+    "base": {
+      "privatization": "none",
+      "context": {
+        "non-ptr": false
+      }
+    },
+    "thread": {
+      "domain": "plain",
+      "include-node": false
+    },
+    "malloc": {
+      "wrappers": [
+        "ZSTD_customMalloc",
+        "ZSTD_customCalloc"
+      ]
+    },
+    "race": {
+      "free": false
+    },
+    "dead-code": {
+      "lines": true
+    },
+    "int": {
+      "interval": true,
+      "def_exc": true
+    }
+  },
+  "sem": {
+    "unknown_function": {
+      "spawn": false,
+      "invalidate": {
+        "globals": false,
+        "args": false
+      }
+    }
+  },
+  "solvers": {
+    "td3": {
+      "restart": {
+        "wpoint": {
+          "enabled": false
+        }
+      }
+    }
+  },
+  "exp": {
+    "earlyglobs": true,
+    "extraspecials": [
+      "ZSTD_customMalloc",
+      "ZSTD_customCalloc",
+      "ZSTD_customFree"
+    ]
+  },
+  "pre": {
+    "cppflags": [
+      "-DZSTD_NO_INTRINSICS",
+      "-D_FORTIFY_SOURCE=0",
+      "-DGOBLINT_NO_ASSERT",
+      "-DGOBLINT_NO_BSEARCH"
+    ]
+  },
+  "cil": {
+    "merge": {
+      "inlines": false
+    }
+  },
+  "dbg": {
+    "timing": {
+      "enabled": true
+    }
+  },
+  "warn": {
+    "assert": false,
+    "behavior": false,
+    "integer": false,
+    "cast": false,
+    "race": true,
+    "deadcode": true,
+    "analyzer": false,
+    "unsound": true,
+    "imprecise": false,
+    "unknown": false,
+    "error": false,
+    "warning": true,
+    "info": false,
+    "debug": false,
+    "success": true
+  },
+  "incremental": {
+    "postsolver": {
+      "enabled": false
+    },
+    "restart": {
+      "sided": {
+        "enabled": false
+      },
+      "write-only": true
+    }
+  }
+}
\ No newline at end of file
diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index fea267198..16e3b3bcc 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -36,7 +36,9 @@
 repo_name = project.repo_name
 build_compdb = project.build_compdb
 conf_base = project.conf_base
+conf_base = os.path.join(os.getcwd(), conf_base + ".json")
 conf_incrpost = project.conf_incrpost
+conf_incrpost = os.path.join(os.getcwd(), conf_incrpost + ".json")
 begin = project.begin
 to = project.to
 files = project.files
diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py
index 80315ebbe..ba2125cd5 100644
--- a/scripts/incremental/benchmarking/projects.py
+++ b/scripts/incremental/benchmarking/projects.py
@@ -30,8 +30,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     url           = "https://github.com/sqlite/sqlite",
     repo_name     = "sqlite",
     build_compdb  = "../build/build_compdb_sqlite.sh",
-    conf_base     = os.path.join("custom", "sqlite-minimal"), # very minimal: "zstd-minimal"
-    conf_incrpost = os.path.join("custom", "sqlite-minimal-incrpostsolver"),
+    conf_base     = os.path.join("conf", "sqlite-minimal"), # very minimal: "zstd-minimal"
+    conf_incrpost = os.path.join("conf", "sqlite-minimal-incrpostsolver"),
     begin         = datetime.datetime(2021,8,1),
     to            = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4)
     diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
@@ -43,8 +43,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     url           = "https://github.com/facebook/zstd",
     repo_name     = "zstd",
     build_compdb  = "../build/build_compdb_zstd.sh",
-    conf_base     = os.path.join("custom", "zstd-race"), # very minimal: "zstd-minimal"
-    conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"),
+    conf_base     = os.path.join("conf", "zstd-race"), # very minimal: "zstd-minimal"
+    conf_incrpost = os.path.join("conf", "zstd-race-incrpostsolver"),
     begin         = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal"
     to            = datetime.datetime(2022,2,1), # minimal subset: datetime(2021,8,4)
     diff_exclude  = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"],
@@ -56,8 +56,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     url           = "https://github.com/cmatsuoka/figlet",
     repo_name     = "figlet",
     build_compdb  = None,
-    conf_base     = os.path.join("custom", "figlet"),
-    conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"),
+    conf_base     = os.path.join("conf", "figlet"),
+    conf_incrpost = os.path.join("conf", "figlet-incrpostsolver"),
     begin         = datetime.datetime(2010,1,1),
     to            = datetime.datetime(2022,10,10),
     diff_exclude  = [],
@@ -71,8 +71,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin
     url="https://github.com/mlichvar/chrony.git",
     repo_name="chrony",
     build_compdb="../build/build_compdb_chrony.sh",
-    conf_base=os.path.join("custom", "chrony"),
-    conf_incrpost=os.path.join("custom", "chrony-incrpostsolver"),
+    conf_base=os.path.join("conf", "chrony"),
+    conf_incrpost=os.path.join("conf", "chrony-incrpostsolver"),
     begin=datetime.datetime(2020, 1, 1),
     to=datetime.datetime(2022, 10, 10),
     diff_exclude=[],
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 4dddc6c43..b2c8a441a 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -70,7 +70,7 @@ def reset_incremental_data(incr_data_dir):
 def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, outdir, conf, extra_options, files):
 
     gr.checkout(commit_hash)
-    conf_path = os.path.join(analyzer_dir, 'conf', conf + '.json')
+    conf_path = conf
 
     # Creat the analyze command
     file_list = []
@@ -79,7 +79,7 @@ def append_to_repo_path(file):
             return os.path.join(repo_path, file)
         file_list = list(map(append_to_repo_path, files))
 
-    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *file_list, *extra_options]
+    analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', conf_path, *file_list, *extra_options]
     # If the list of files was empty, we pass the repo_path to goblint
     if not files:
         analyze_command.append(repo_path)
@@ -117,7 +117,7 @@ def append_to_repo_path(file):
         outfile.close()
 
 def compare_runs(analyzer_dir, dummy_c_file, outdir, conf, compare_data_1, compare_data_2):
-    options = ['--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2]
+    options = ['--conf', conf, '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2]
     analyze_command = [os.path.join(analyzer_dir, 'goblint'), *options, dummy_c_file]
     with open(os.path.join(outdir, comparelog), "w+") as outfile:
         subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)

From e3db195806c34790de9884f9ee83e26056165b53 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 15 Jun 2023 10:57:36 +0200
Subject: [PATCH 57/84] Relax constraint on numpy, to make constraints
 satisfiable, add compiledb as requirement.

---
 scripts/incremental/benchmarking/requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/requirements.txt b/scripts/incremental/benchmarking/requirements.txt
index 7d705bd6a..29e5c8733 100644
--- a/scripts/incremental/benchmarking/requirements.txt
+++ b/scripts/incremental/benchmarking/requirements.txt
@@ -1,7 +1,8 @@
 brokenaxes==0.5.0
 matplotlib==3.5.1
-numpy==1.19.5
+numpy>=1.19.5
 pandas==1.4.1
 psutil==5.9.0
 PyDriller==2.1
 pytz==2021.1
+compiledb>=0.10.1

From a6d27d9b59029c6f20e1d39b309e6b01d74cf0f0 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 15 Jun 2023 18:05:35 +0200
Subject: [PATCH 58/84] Refactor collect_data to reduce redundancy.

---
 .../incremental/benchmarking/efficiency.py    | 49 +++++++++----------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 16e3b3bcc..1cf0f824e 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -199,31 +199,30 @@ def collect_data(outdir):
             data["Change in number of race warnings"].append(0)
             continue
 
-        parent_info = utils.extract_from_analyzer_log(parent_log)
-        child_non_incr_info = utils.extract_from_analyzer_log(child_non_incr_log)
-        child_info = utils.extract_from_analyzer_log(child_log)
-        child_posts_info = utils.extract_from_analyzer_log(child_posts_log)
-        child_posts_rel_info = utils.extract_from_analyzer_log(child_posts_rel_log)
-        data["Changed/Added/Removed functions"].append(int(child_info["changed"]) + int(child_info["added"]) + int(child_info["removed"]))
-        data[utils.runtime_header_parent].append(float(parent_info["runtime"]))
-        data[utils.runtime_header_non_incr_child].append(float(child_non_incr_info["runtime"]))
-        data[utils.runtime_header_incr_child].append(float(child_info["runtime"]))
-        data[utils.runtime_header_incr_posts_child].append(float(child_posts_info["runtime"]))
-        data[utils.runtime_header_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"]))
-
-        data[utils.analysis_header_parent].append(float(parent_info["analysis_time"]))
-        data[utils.analysis_header_non_incr_child].append(float(child_non_incr_info["analysis_time"]))
-        data[utils.analysis_header_incr_child].append(float(child_info["analysis_time"]))
-        data[utils.analysis_header_incr_posts_child].append(float(child_posts_info["analysis_time"]))
-        data[utils.analysis_header_incr_posts_rel_child].append(float(child_posts_rel_info["analysis_time"]))
-
-        data[utils.solving_header_parent].append(float(parent_info["solving_time"]))
-        data[utils.solving_header_non_incr_child].append(float(child_non_incr_info["solving_time"]))
-        data[utils.solving_header_incr_child].append(float(child_info["solving_time"]))
-        data[utils.solving_header_incr_posts_child].append(float(child_posts_info["solving_time"]))
-        data[utils.solving_header_incr_posts_rel_child].append(float(child_posts_rel_info["solving_time"]))
-
-        data["Change in number of race warnings"].append(int(child_info["race_warnings"] - int(parent_info["race_warnings"])))
+        logs = [parent_log, child_non_incr_log, child_log, child_posts_log, child_posts_rel_log]
+        headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child]
+        infos = list(map(utils.extract_from_analyzer_log, logs))
+
+        data["Changed/Added/Removed functions"].append(int(infos[1]["changed"]) + int(infos[1]["added"]) + int(infos[1]["removed"]))
+
+        field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix]
+        field_indexes = ["runtime", "analysis_time", "solving_time"]
+
+        for field in range(field_indexes.__len__()):
+            header_prefix = field_prefixes[field]
+            field_index = field_indexes[field]
+            for config in range(logs.__len__()):
+                header = header_prefix + headers[config]
+                info = infos[config]
+                data[header].append(float(info[field_index]))
+
+        parent_index = 0
+        parent_info = infos[parent_index]
+
+        child_non_incr_index = 2
+        child_non_incr_info = infos[child_non_incr_index]
+
+        data["Change in number of race warnings"].append(int(child_non_incr_info["race_warnings"] - int(parent_info["race_warnings"])))
     return data
 
 def runperprocess(core, from_c, to_c):

From 670cf3e13f29cdd39b561d0d37ec6b258c6a25dc Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 15 Jun 2023 18:18:54 +0200
Subject: [PATCH 59/84] Refactor collect_data further to reduce redundancy.

---
 .../incremental/benchmarking/efficiency.py    | 35 +++++++------------
 1 file changed, 13 insertions(+), 22 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 1cf0f824e..2663cf2b7 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -178,41 +178,32 @@ def collect_data(outdir):
         data["Relevant changed LOC"].append(commit_prop["relCLOC"])
         data["Failed?"].append(commit_prop["failed"])
         data["Commit"].append(commit_prop["hash"][:7])
+
+        config_headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child]
+        field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix]
+        field_indexes = ["runtime", "analysis_time", "solving_time"]
+
         if commit_prop["failed"] == True:
-            data[utils.runtime_header_parent].append(0)
-            data[utils.runtime_header_non_incr_child].append(0)
-            data[utils.runtime_header_incr_child].append(0)
-            data[utils.runtime_header_incr_posts_child].append(0)
-            data[utils.runtime_header_incr_posts_rel_child].append(0)
-            data[utils.analysis_header_parent].append(0)
-            data[utils.analysis_header_non_incr_child].append(0)
-            data[utils.analysis_header_incr_child].append(0)
-            data[utils.analysis_header_incr_posts_child].append(0)
-            data[utils.analysis_header_incr_posts_rel_child].append(0)
-            data[utils.solving_header_parent].append(0)
-            data[utils.solving_header_non_incr_child].append(0)
-            data[utils.solving_header_incr_child].append(0)
-            data[utils.solving_header_incr_posts_child].append(0)
-            data[utils.solving_header_incr_posts_rel_child].append(0)
+            for field in range(field_indexes.__len__()):
+                header_prefix = field_prefixes[field]
+                field_index = field_indexes[field]
+                for config in range(config_headers.__len__()):
+                    header = header_prefix + config_headers[config]
+                    data[header].append(float(0))
 
             data["Changed/Added/Removed functions"].append(0)
             data["Change in number of race warnings"].append(0)
             continue
 
         logs = [parent_log, child_non_incr_log, child_log, child_posts_log, child_posts_rel_log]
-        headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child]
         infos = list(map(utils.extract_from_analyzer_log, logs))
-
         data["Changed/Added/Removed functions"].append(int(infos[1]["changed"]) + int(infos[1]["added"]) + int(infos[1]["removed"]))
 
-        field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix]
-        field_indexes = ["runtime", "analysis_time", "solving_time"]
-
         for field in range(field_indexes.__len__()):
             header_prefix = field_prefixes[field]
             field_index = field_indexes[field]
-            for config in range(logs.__len__()):
-                header = header_prefix + headers[config]
+            for config in range(config_headers.__len__()):
+                header = header_prefix + config_headers[config]
                 info = infos[config]
                 data[header].append(float(info[field_index]))
 

From 376a17603db3d274458f63c502badf485b6cdfd3 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 16 Jun 2023 13:38:00 +0200
Subject: [PATCH 60/84] Collec changed/added/removed functions from child
 (incremental) config.

---
 .../incremental/benchmarking/efficiency.py    | 27 ++++++++++---------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 2663cf2b7..25581ab26 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -157,14 +157,15 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
     print("Skipped: ", count_skipped)
 
 def collect_data(outdir):
-    data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [],
-      utils.runtime_header_parent: [], utils.runtime_header_non_incr_child: [], utils.runtime_header_incr_child: [],
-      utils.runtime_header_incr_posts_child: [], utils.runtime_header_incr_posts_rel_child: [],
-      utils.analysis_header_parent: [], utils.analysis_header_non_incr_child: [], utils.analysis_header_incr_child: [],
-      utils.analysis_header_incr_posts_child: [], utils.analysis_header_incr_posts_rel_child: [],
-      utils.solving_header_parent: [], utils.solving_header_non_incr_child: [], utils.solving_header_incr_child: [],
-      utils.solving_header_incr_posts_child: [], utils.solving_header_incr_posts_rel_child: [],
-      "Change in number of race warnings": []}
+    data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [], "Change in number of race warnings": []}
+
+    config_headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child]
+    field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix]
+
+    for prefix in field_prefixes:
+        for config in config_headers:
+            data[prefix + config] = []
+
     for t in os.listdir(outdir):
         parent_log = os.path.join(outdir, t, 'parent', utils.analyzerlog)
         child_non_incr_log = os.path.join(outdir, t, 'child-non-incr', utils.analyzerlog)
@@ -179,8 +180,6 @@ def collect_data(outdir):
         data["Failed?"].append(commit_prop["failed"])
         data["Commit"].append(commit_prop["hash"][:7])
 
-        config_headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child]
-        field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix]
         field_indexes = ["runtime", "analysis_time", "solving_time"]
 
         if commit_prop["failed"] == True:
@@ -197,7 +196,11 @@ def collect_data(outdir):
 
         logs = [parent_log, child_non_incr_log, child_log, child_posts_log, child_posts_rel_log]
         infos = list(map(utils.extract_from_analyzer_log, logs))
-        data["Changed/Added/Removed functions"].append(int(infos[1]["changed"]) + int(infos[1]["added"]) + int(infos[1]["removed"]))
+
+
+        child_incr_index = 2
+        child_incr_info = infos[child_incr_index]
+        data["Changed/Added/Removed functions"].append(int(child_incr_info["changed"]) + int(child_incr_info["added"]) + int(child_incr_info["removed"]))
 
         for field in range(field_indexes.__len__()):
             header_prefix = field_prefixes[field]
@@ -210,7 +213,7 @@ def collect_data(outdir):
         parent_index = 0
         parent_info = infos[parent_index]
 
-        child_non_incr_index = 2
+        child_non_incr_index = 1
         child_non_incr_info = infos[child_non_incr_index]
 
         data["Change in number of race warnings"].append(int(child_non_incr_info["race_warnings"] - int(parent_info["race_warnings"])))

From 9b67c0774c17eb690c8f7c9f8b77701b067a17b7 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 16 Jun 2023 13:38:27 +0200
Subject: [PATCH 61/84] Adapt match for changed/added/removed functions for
 changed goblint output.

---
 scripts/incremental/benchmarking/utils.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index b2c8a441a..4a4bce3f3 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -151,11 +151,13 @@ def find_line(pattern, log):
 
 def extract_from_analyzer_log(log):
     # First comes the cpu time (which is ignored); we look at the walltime.
-    runtime_pattern = 'Default[ ]+[0-9\.]+s[ ]+(?P<runtime>[0-9\.]+)s'
-    analysis_time_pattern = 'analysis[ ]+[0-9\.]+s[ ]+(?P<analysis_time>[0-9\.]+)s'
-    solving_time_pattern = 'solving[ ]+[0-9\.]+s[ ]+(?P<solving_time>[0-9\.]+)s'
-    change_info_pattern = 'change_info = { unchanged = (?P<unchanged>[0-9]*); changed = (?P<changed>[0-9]*); added = (?P<added>[0-9]*); removed = (?P<removed>[0-9]*) }'
+    runtime_pattern = 'Default[ ]+(?P<cpu_runtime>[0-9\.]+)s[ ]+(?P<runtime>[0-9\.]+)s'
+    analysis_time_pattern = 'analysis[ ]+(?P<cpu_analysis_time>[0-9\.]+)s[ ]+(?P<analysis_time>[0-9\.]+)s'
+    solving_time_pattern = 'solving[ ]+(?P<cpu_solving_time>[0-9\.]+)s[ ]+(?P<solving_time>[0-9\.]+)s'
+    change_info_pattern = 'change_info = { unchanged = (?P<unchanged>[0-9]*); changed = (?P<changed>[0-9]*) \\(with unchangedHeader = (?P<changed_unchanged_header>[0-9]*)\\); added = (?P<added>[0-9]*); removed = (?P<removed>[0-9]*) }'
+
     runtime = find_line(runtime_pattern, log)
+
     analysis_time = find_line(analysis_time_pattern, log)
     solving_time = find_line(solving_time_pattern, log)
     ch = find_line(change_info_pattern, log) or {"unchanged": 0, "changed": 0, "added": 0, "removed": 0}

From 4f24c8993f4d6d3f8afcda5b755c747302f4a6dd Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 16 Jun 2023 16:07:12 +0200
Subject: [PATCH 62/84] Extract CPU times for total runtimes, analysis and
 solving times.

---
 scripts/incremental/benchmarking/efficiency.py | 9 ++++++++-
 scripts/incremental/benchmarking/utils.py      | 6 +++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 25581ab26..5060c2cc5 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -156,11 +156,19 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c):
     print("Failed: ", count_failed)
     print("Skipped: ", count_skipped)
 
+def add_version_with_cpu_suffix(strings):
+    string_with_cpu_suffix = list(map (lambda prefix : "CPU_" + prefix , strings))
+    return strings + string_with_cpu_suffix
+
 def collect_data(outdir):
     data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [], "Change in number of race warnings": []}
 
     config_headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child]
     field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix]
+    field_indexes = ["runtime", "analysis_time", "solving_time"]
+
+    field_prefixes = add_version_with_cpu_suffix(field_prefixes)
+    field_indexes = add_version_with_cpu_suffix(field_indexes)
 
     for prefix in field_prefixes:
         for config in config_headers:
@@ -180,7 +188,6 @@ def collect_data(outdir):
         data["Failed?"].append(commit_prop["failed"])
         data["Commit"].append(commit_prop["hash"][:7])
 
-        field_indexes = ["runtime", "analysis_time", "solving_time"]
 
         if commit_prop["failed"] == True:
             for field in range(field_indexes.__len__()):
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 4a4bce3f3..da5898922 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -151,9 +151,9 @@ def find_line(pattern, log):
 
 def extract_from_analyzer_log(log):
     # First comes the cpu time (which is ignored); we look at the walltime.
-    runtime_pattern = 'Default[ ]+(?P<cpu_runtime>[0-9\.]+)s[ ]+(?P<runtime>[0-9\.]+)s'
-    analysis_time_pattern = 'analysis[ ]+(?P<cpu_analysis_time>[0-9\.]+)s[ ]+(?P<analysis_time>[0-9\.]+)s'
-    solving_time_pattern = 'solving[ ]+(?P<cpu_solving_time>[0-9\.]+)s[ ]+(?P<solving_time>[0-9\.]+)s'
+    runtime_pattern = 'Default[ ]+(?P<CPU_runtime>[0-9\.]+)s[ ]+(?P<runtime>[0-9\.]+)s'
+    analysis_time_pattern = 'analysis[ ]+(?P<CPU_analysis_time>[0-9\.]+)s[ ]+(?P<analysis_time>[0-9\.]+)s'
+    solving_time_pattern = 'solving[ ]+(?P<CPU_solving_time>[0-9\.]+)s[ ]+(?P<solving_time>[0-9\.]+)s'
     change_info_pattern = 'change_info = { unchanged = (?P<unchanged>[0-9]*); changed = (?P<changed>[0-9]*) \\(with unchangedHeader = (?P<changed_unchanged_header>[0-9]*)\\); added = (?P<added>[0-9]*); removed = (?P<removed>[0-9]*) }'
 
     runtime = find_line(runtime_pattern, log)

From e4dc7feda4558397ef121d783f873db7f44e9f7e Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 16 Jun 2023 17:05:21 +0200
Subject: [PATCH 63/84] Collect_data: Check whether result directory exists
 before iterating over it.

This avoids related errors in the command prompt.
---
 scripts/incremental/benchmarking/efficiency.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 5060c2cc5..71424d853 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -174,6 +174,9 @@ def collect_data(outdir):
         for config in config_headers:
             data[prefix + config] = []
 
+    if not os.path.exists(outdir):
+        return
+
     for t in os.listdir(outdir):
         parent_log = os.path.join(outdir, t, 'parent', utils.analyzerlog)
         child_non_incr_log = os.path.join(outdir, t, 'child-non-incr', utils.analyzerlog)
@@ -236,6 +239,7 @@ def runperprocess(core, from_c, to_c):
           shutil.rmtree(outdir)
         analyze_small_commits_in_repo(cwd, outdir, from_c, to_c)
     data_set = collect_data(outdir)
+
     df = pd.DataFrame(data_set)
     #df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0])))
     print(df)

From 7e6e8a72cee251b085b91631f7a714028cb2926f Mon Sep 17 00:00:00 2001
From: stilscher <66023521+stilscher@users.noreply.github.com>
Date: Wed, 28 Jun 2023 17:42:54 +0200
Subject: [PATCH 64/84] additionally plot efficiency bar plots

---
 scripts/incremental/benchmarking/plot.py  | 21 ++++++++++++++++-----
 scripts/incremental/benchmarking/utils.py | 13 +++++--------
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index da4ef09fb..51cbf54cd 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -7,6 +7,19 @@
 description_incr_post = "(3)"
 description_incr_rel ="(4)"
 
+def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir):
+    outfile_nonincr_vs_incr = "figure_bar.pgf"
+    df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
+
+    data_set = df[["Relevant changed LOC", utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]]
+    data_set = data_set.rename(columns={utils.runtime_header_non_incr_child: description_non_incr, utils.runtime_header_incr_child: description_incr, utils.runtime_header_incr_posts_child: description_incr_post, utils.runtime_header_incr_posts_rel_child: description_incr_rel})
+
+    colors = ["tab:olive", "tab:blue", "tab:orange", "tab:green", "tab:red"]
+    textwidth = 7
+    size = (textwidth,textwidth/3)
+
+    utils.barplot(data_set, figure_dir, outfile_nonincr_vs_incr, size, colors)
+
 def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
     num_bins = 2000
     outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf"
@@ -162,9 +175,6 @@ def paper_precision_graph(results_precision, filename, outdir):
     utils.scatter_plot(data, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size)
 
 
-
-# efficiency plots
-
 def main():
     projects = ["figlet", "chrony", "zstd"]
     results_efficiency = "result_efficiency_"
@@ -191,7 +201,8 @@ def main():
             print("Creating efficiency plots.")
             cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir)
             cummulative_distr_all4(efficiency_results, efficieny_filename, figures_dir)
-            # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False)
+            efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir)
+            # paper_efficiency_graphs(efficiency_results, efficieny_filename, figures_dir, filterRelCLOC=True, filterDetectedChanges=False)
         else:
             print("No efficiency results available.")
 
@@ -203,4 +214,4 @@ def main():
         else:
             print("No precision results available.")
 
-main()
\ No newline at end of file
+main()
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index da5898922..c80282b19 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -173,17 +173,14 @@ def extract_precision_from_compare_log(log):
     precision = find_line(pattern, log)
     return {k: int(v) for k,v in precision.items()} if precision else None
 
-def barplot(data_set):
-    df = pandas.DataFrame(data_set["data"], index=data_set["index"]) # TODO: index=analyzed_commits
-    df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0])))
-    print(df)
-    df.to_csv('results.csv')
-
-    df.plot.bar(rot=0, width=0.7, figsize=(25,10))
+def barplot(df, figure_dir, outfile, figsize=None, colors=None):
+    df.plot.bar(rot=0, width=0.7, figsize=figsize, color=colors)
     plt.xticks(rotation=45, ha='right', rotation_mode='anchor')
     plt.xlabel('Commit')
     plt.tight_layout()
-    plt.savefig("figure.pdf")
+
+    outfile = os.path.join(figure_dir, outfile)
+    plt.savefig(outfile)
 
 def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetectedChanges=False):
     df=pandas.read_csv(result_csv_file, index_col='Commit', sep=";")

From ddaabd72786d3ab57769421da0f52f2f82e2c5a3 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 11 Jul 2023 16:17:36 +0200
Subject: [PATCH 65/84] Interactive confs: set incremental.detect-renames to
 false.

---
 scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json | 1 +
 scripts/incremental/benchmarking/conf/chrony.json                | 1 +
 scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json | 1 +
 scripts/incremental/benchmarking/conf/figlet.json                | 1 +
 .../incremental/benchmarking/conf/zstd-race-incrpostsolver.json  | 1 +
 scripts/incremental/benchmarking/conf/zstd-race.json             | 1 +
 6 files changed, 6 insertions(+)

diff --git a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json
index 8a97510dd..b87872714 100644
--- a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json
+++ b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json
@@ -104,6 +104,7 @@
     "postsolver": {
       "enabled": true
     },
+    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/chrony.json b/scripts/incremental/benchmarking/conf/chrony.json
index a2fe392e4..8cfa70f2f 100644
--- a/scripts/incremental/benchmarking/conf/chrony.json
+++ b/scripts/incremental/benchmarking/conf/chrony.json
@@ -104,6 +104,7 @@
     "postsolver": {
       "enabled": false
     },
+    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json
index 46ad26fce..68d3fee50 100644
--- a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json
+++ b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json
@@ -93,6 +93,7 @@
     "postsolver": {
       "enabled": true
     },
+    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/figlet.json b/scripts/incremental/benchmarking/conf/figlet.json
index 3e80b8ffe..0e93dc207 100644
--- a/scripts/incremental/benchmarking/conf/figlet.json
+++ b/scripts/incremental/benchmarking/conf/figlet.json
@@ -93,6 +93,7 @@
     "postsolver": {
       "enabled": false
     },
+    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json
index dbe858b98..4df9e9a2c 100644
--- a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json
+++ b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json
@@ -112,6 +112,7 @@
     "postsolver": {
       "enabled": true
     },
+    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/zstd-race.json b/scripts/incremental/benchmarking/conf/zstd-race.json
index b3c4a49ac..095596d25 100644
--- a/scripts/incremental/benchmarking/conf/zstd-race.json
+++ b/scripts/incremental/benchmarking/conf/zstd-race.json
@@ -112,6 +112,7 @@
     "postsolver": {
       "enabled": false
     },
+    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false

From e169eff9e24d8647f2697313ac0bca003e3eecc0 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 11 Jul 2023 16:55:52 +0200
Subject: [PATCH 66/84] Extract incremental analysis of child commit in
 precision script.

---
 scripts/incremental/benchmarking/precision.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index df92a6106..651f9b128 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -32,6 +32,7 @@
 repo_name = project.repo_name
 build_compdb = project.build_compdb
 conf = project.conf_base
+conf_incrpost = project.conf_incrpost
 begin = project.begin
 to = project.to
 diff_exclude = project.diff_exclude
@@ -106,6 +107,15 @@ def find_sequences():
         json.dump(seq_list, outfile, indent=4)
     return seq_list
 
+def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, repo_path):
+    # analyze commit incrementally based on the previous commit and save run for comparison
+    # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').')
+    out_incr = os.path.join(out_commit, out_dir_name)
+    os.makedirs(out_incr)
+    file_incremental_run = os.path.join(out_incr, compare_data_file)
+    add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run]
+    utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files)
+
 def analyze_series_in_repo(series):
     prev_commit = ""
     commit_num = 0
@@ -173,6 +183,8 @@ def analyze_series_in_repo(series):
                 add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run]
                 utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files)
 
+                incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path)
+
                 if commit_num in compare_commits or commit_num == len(series) - 1:
                     # compare stored data of original and incremental run
                     # print('Compare both runs.')

From 249ebf5ad6fdf31ddc784fd47f829186e4e0019a Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Wed, 12 Jul 2023 17:15:32 +0200
Subject: [PATCH 67/84] Exclude figlet from precision run.

---
 scripts/incremental/benchmarking/run_precision.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/incremental/benchmarking/run_precision.sh b/scripts/incremental/benchmarking/run_precision.sh
index e94a3d580..c0f8032ee 100755
--- a/scripts/incremental/benchmarking/run_precision.sh
+++ b/scripts/incremental/benchmarking/run_precision.sh
@@ -4,9 +4,9 @@ ANALYZER_DIR=$1
 #Number of cores to be used
 NCORES=$2
 
-echo "Starting run on figlet"
-python3 precision.py $ANALYZER_DIR figlet $NCORES
-mv result_precision result_precision_figlet
+# echo "Starting run on figlet"
+# python3 precision.py $ANALYZER_DIR figlet $NCORES
+# mv result_precision result_precision_figlet
 
 echo "Starting run on zstd"
 python3 precision.py $ANALYZER_DIR zstd $NCORES

From de4208b3698d9ce554b64f2cf8bea6f1d5bc7b18 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Wed, 12 Jul 2023 17:19:01 +0200
Subject: [PATCH 68/84] Precision script: fix config file look-up.

---
 scripts/incremental/benchmarking/precision.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index 651f9b128..077944e62 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -31,8 +31,9 @@
 url = project.url
 repo_name = project.repo_name
 build_compdb = project.build_compdb
-conf = project.conf_base
-conf_incrpost = project.conf_incrpost
+cwd = os.getcwd()
+conf = os.path.join(cwd, project.conf_base + ".json")
+conf_incrpost = os.path.join(cwd, project.conf_incrpost + ".json")
 begin = project.begin
 to = project.to
 diff_exclude = project.diff_exclude

From bbb3e7d03dfc2609973bbb3d41e71bec6ca2351e Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 13 Jul 2023 10:57:08 +0200
Subject: [PATCH 69/84] Incremental efficiency script: Do not limit commits to
 50 relevant LoC changes.

---
 scripts/incremental/benchmarking/efficiency.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py
index 71424d853..873d00e62 100644
--- a/scripts/incremental/benchmarking/efficiency.py
+++ b/scripts/incremental/benchmarking/efficiency.py
@@ -46,7 +46,7 @@
 
 # Project independent settings
 result_dir    = os.path.join(os.getcwd(), 'result_efficiency')
-maxCLOC       = 50 # can be deactivated with None
+maxCLOC       = None # was 50; can be deactivated with None
 analyzer_dir  = sys.argv[1]
 only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables
 ################################################################################

From 793496c5560a9fc5d73e19bcfd8d00b3b814761b Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 20 Jul 2023 11:05:07 +0200
Subject: [PATCH 70/84] Precision: run all configurations.

---
 scripts/incremental/benchmarking/precision.py | 21 +++++++++++--------
 scripts/incremental/benchmarking/utils.py     |  8 ++++---
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index 077944e62..addae366c 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -108,14 +108,16 @@ def find_sequences():
         json.dump(seq_list, outfile, indent=4)
     return seq_list
 
-def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, repo_path):
+# returns the file where the incremental results are stored for comparison
+def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, repo_path, conf, add_options):
     # analyze commit incrementally based on the previous commit and save run for comparison
     # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').')
     out_incr = os.path.join(out_commit, out_dir_name)
     os.makedirs(out_incr)
     file_incremental_run = os.path.join(out_incr, compare_data_file)
-    add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run]
+    add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', '--set', 'save_run', file_incremental_run]
     utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files)
+    return file_incremental_run
 
 def analyze_series_in_repo(series):
     prev_commit = ""
@@ -178,20 +180,21 @@ def analyze_series_in_repo(series):
 
                 # analyze commit incrementally based on the previous commit and save run for comparison
                 # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').')
-                out_incr = os.path.join(out_commit, 'incr')
-                os.makedirs(out_incr)
-                file_incremental_run = os.path.join(out_incr, "compare-data-incr")
-                add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run]
-                utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files)
 
-                incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path)
+                file_incr_run = incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path, conf, [])
+                file_incr_post_run = incremental_analyze(commit, out_commit, 'incr-post', "compare-data-incr-post", gr, repo_path, conf_incrpost, [])
+                reluctant_option = ['--enable', 'incremental.reluctant.enabled']
+                file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', "compare-data-incr-post-rel", gr, repo_path, reluctant_option, [])
 
                 if commit_num in compare_commits or commit_num == len(series) - 1:
                     # compare stored data of original and incremental run
                     # print('Compare both runs.')
                     out_compare = os.path.join(out_commit, 'compare')
                     os.makedirs(out_compare)
-                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, conf, file_incremental_run, file_original_run)
+                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "incr", conf, file_incr_run, file_original_run)
+                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "incr_post", conf, file_incr_post_run, file_original_run)
+                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "inr_rel_post", conf, file_incr_rel_post_run, file_original_run)
+
 
             except utils.subprocess.CalledProcessError as e:
                 print('Aborted because command ', e.cmd, 'failed.')
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index c80282b19..77643be9e 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -61,7 +61,9 @@
 
 preparelog = "prepare.log"
 analyzerlog = "analyzer.log"
-comparelog = "compare.log"
+
+def comparelog_with_suffix (suffix):
+    return "compare_" +suffix + ".log"
 
 def reset_incremental_data(incr_data_dir):
     if os.path.exists(incr_data_dir) and os.path.isdir(incr_data_dir):
@@ -116,10 +118,10 @@ def append_to_repo_path(file):
         subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)
         outfile.close()
 
-def compare_runs(analyzer_dir, dummy_c_file, outdir, conf, compare_data_1, compare_data_2):
+def compare_runs(analyzer_dir, dummy_c_file, outdir, log_suffix, conf, compare_data_1, compare_data_2):
     options = ['--conf', conf, '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2]
     analyze_command = [os.path.join(analyzer_dir, 'goblint'), *options, dummy_c_file]
-    with open(os.path.join(outdir, comparelog), "w+") as outfile:
+    with open(os.path.join(outdir, comparelog_with_suffix(log_suffix)), "w+") as outfile:
         subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)
         outfile.close()
 

From 2e50ca1bd8e724b7ec7575308daccf5b761ee253 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 20 Jul 2023 11:11:15 +0200
Subject: [PATCH 71/84] Fix parameters for increm rel with incr. postsolver.

---
 scripts/incremental/benchmarking/precision.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index addae366c..5969381d2 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -184,7 +184,7 @@ def analyze_series_in_repo(series):
                 file_incr_run = incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path, conf, [])
                 file_incr_post_run = incremental_analyze(commit, out_commit, 'incr-post', "compare-data-incr-post", gr, repo_path, conf_incrpost, [])
                 reluctant_option = ['--enable', 'incremental.reluctant.enabled']
-                file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', "compare-data-incr-post-rel", gr, repo_path, reluctant_option, [])
+                file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', "compare-data-incr-post-rel", gr, repo_path, conf_incrpost, reluctant_option)
 
                 if commit_num in compare_commits or commit_num == len(series) - 1:
                     # compare stored data of original and incremental run

From 5c9d7e1e942d5ca38e1b7591f631087ecf3d7e44 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 20 Jul 2023 11:27:57 +0200
Subject: [PATCH 72/84] Incremental precision script: perform merge results for
 all configs.

---
 scripts/incremental/benchmarking/precision.py | 36 ++++++++++---------
 scripts/incremental/benchmarking/utils.py     |  1 +
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index 5969381d2..1a3fae852 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -191,9 +191,9 @@ def analyze_series_in_repo(series):
                     # print('Compare both runs.')
                     out_compare = os.path.join(out_commit, 'compare')
                     os.makedirs(out_compare)
-                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "incr", conf, file_incr_run, file_original_run)
-                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "incr_post", conf, file_incr_post_run, file_original_run)
-                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "inr_rel_post", conf, file_incr_rel_post_run, file_original_run)
+                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, utils.compare_runs_suffixes[0], conf, file_incr_run, file_original_run)
+                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, utils.compare_runs_suffixes[1], conf, file_incr_post_run, file_original_run)
+                    utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, utils.compare_runs_suffixes[2], conf, file_incr_rel_post_run, file_original_run)
 
 
             except utils.subprocess.CalledProcessError as e:
@@ -264,20 +264,22 @@ def merge_results(outfilename):
         relCLOC = 0
         for i in filter(lambda x: x != "0", commits):
             ith_dir = os.path.join(outdir, i)
-            compare_log_path = os.path.join(ith_dir, "compare", utils.comparelog)
-            with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f:
-                relCLOC += json.load(f)["relCLOC"]
-            if int(i) in compare_commits:
-                if os.path.isdir(ith_dir) and os.path.exists(compare_log_path):
-                    int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path)
-                    int_prec[i]["relCLOC"] = relCLOC
-                    if int_prec[i]["precision"]:
-                        result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])}
-                        result_sums[i]["number_of_commits"] += 1
-                        result_sums[i]["relCLOC"] += relCLOC
-            if int(i) != 0 and int(i) == len(commits) - 1:
-                if os.path.exists(compare_log_path):
-                    final_prec = utils.extract_precision_from_compare_log(compare_log_path)
+            for suffix in utils.compare_runs_suffixes:
+                comparelog = utils.comparelog_with_suffix(suffix)
+                compare_log_path = os.path.join(ith_dir, "compare", comparelog)
+                with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f:
+                    relCLOC += json.load(f)["relCLOC"]
+                if int(i) in compare_commits:
+                    if os.path.isdir(ith_dir) and os.path.exists(compare_log_path):
+                        int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path)
+                        int_prec[i]["relCLOC"] = relCLOC
+                        if int_prec[i]["precision"]:
+                            result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])}
+                            result_sums[i]["number_of_commits"] += 1
+                            result_sums[i]["relCLOC"] += relCLOC
+                if int(i) != 0 and int(i) == len(commits) - 1:
+                    if os.path.exists(compare_log_path):
+                        final_prec = utils.extract_precision_from_compare_log(compare_log_path)
         summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC}
         seq_summaries.append(summary)
         os.chdir(wd)
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 77643be9e..c8afe2bc4 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -61,6 +61,7 @@
 
 preparelog = "prepare.log"
 analyzerlog = "analyzer.log"
+compare_runs_suffixes = ["incr", "incr_post", "incr_rel_post"]
 
 def comparelog_with_suffix (suffix):
     return "compare_" +suffix + ".log"

From c7d9e15712bd5cba9b5ae2c77fce903ae5ece960 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 20 Jul 2023 13:24:06 +0200
Subject: [PATCH 73/84] Interactive precision: iterate over configurations in
 outer instead of inner loop.

---
 scripts/incremental/benchmarking/precision.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index 1a3fae852..688c0b81d 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -262,9 +262,10 @@ def merge_results(outfilename):
         int_prec = {str(i): {"precision": None, "relCLOC": None} for i in compare_commits}
         final_prec = None
         relCLOC = 0
-        for i in filter(lambda x: x != "0", commits):
-            ith_dir = os.path.join(outdir, i)
-            for suffix in utils.compare_runs_suffixes:
+
+        for suffix in utils.compare_runs_suffixes:
+            for i in filter(lambda x: x != "0", commits):
+                ith_dir = os.path.join(outdir, i)
                 comparelog = utils.comparelog_with_suffix(suffix)
                 compare_log_path = os.path.join(ith_dir, "compare", comparelog)
                 with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f:
@@ -280,9 +281,9 @@ def merge_results(outfilename):
                 if int(i) != 0 and int(i) == len(commits) - 1:
                     if os.path.exists(compare_log_path):
                         final_prec = utils.extract_precision_from_compare_log(compare_log_path)
-        summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC}
-        seq_summaries.append(summary)
-        os.chdir(wd)
+            summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC}
+            seq_summaries.append(summary)
+            os.chdir(wd)
     result_avgs = {i: None for i in result_sums.keys()}
     for i, ps in result_sums.items():
         if ps["number_of_commits"] != 0:

From 54e6359961250e9d14f1702803c29650c7f37d07 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 20 Jul 2023 13:25:28 +0200
Subject: [PATCH 74/84] Use float instead of np.float as np.float has been
 removed.

---
 scripts/incremental/benchmarking/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index c8afe2bc4..55e5f9040 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -213,7 +213,7 @@ def create_cum_data(dataFrame, num_bins, relColumns):
     for c in relColumns:
         valuesc, basec = np.histogram(dataFrame.loc[:,c], bins=bins)
         base = basec
-        cum = np.cumsum(valuesc, dtype=np.float)
+        cum = np.cumsum(valuesc, dtype=float)
         cum[cum==0] = np.nan
 
         # If there is a tail of values that are the same, set the ones after its first occurrence to NaN.

From 92af04787758915f32363ee1d39b4a03387001ad Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 20 Jul 2023 13:48:45 +0200
Subject: [PATCH 75/84] Plot separate efficiency graphs for <= 50 loc changes
 and > 50 loc changes.

---
 scripts/incremental/benchmarking/plot.py  | 40 ++++++++++++++---------
 scripts/incremental/benchmarking/utils.py |  5 +--
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 51cbf54cd..e10e9b54a 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -7,9 +7,9 @@
 description_incr_post = "(3)"
 description_incr_rel ="(4)"
 
-def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir):
+def efficiency_bar_plot_all4(results_dir, changed_loc_filter, result_csv_filename, figure_dir):
     outfile_nonincr_vs_incr = "figure_bar.pgf"
-    df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
+    df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True)
 
     data_set = df[["Relevant changed LOC", utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]]
     data_set = data_set.rename(columns={utils.runtime_header_non_incr_child: description_non_incr, utils.runtime_header_incr_child: description_incr, utils.runtime_header_incr_posts_child: description_incr_post, utils.runtime_header_incr_posts_rel_child: description_incr_rel})
@@ -20,11 +20,11 @@ def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir):
 
     utils.barplot(data_set, figure_dir, outfile_nonincr_vs_incr, size, colors)
 
-def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
+def cummulative_distr_compare2(results_dir, suffix, changed_loc_filter, result_csv_filename, figure_dir):
     num_bins = 2000
     outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf"
     outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf"
-    df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
+    df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child])
     datanonincr = {"values": data[0], "label": description_non_incr}
@@ -38,10 +38,12 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir):
 
     utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True)
 
-def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
+
+
+def cummulative_distr_all4_filter(results_dir, suffix, changed_loc_filter, result_csv_filename, figure_dir):
     num_bins = 2000
-    outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf"
-    df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True)
+    outfile_nonincr_vs_incr = "figure_cum_distr_all3"+ suffix + ".pdf"
+    df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True)
 
     data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child])
     data_non_incr = {"values": data[0], "label": description_non_incr}
@@ -50,8 +52,16 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir):
     data_incr_rel = {"values": data[3], "label": description_incr_rel}
     utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True)
 
-def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
-    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
+def cummulative_distr_all4(results_dir, results_csv_filenmane, figure_dir):
+    greater_50 = lambda x : x > 50
+    cummulative_distr_all4_filter(results_dir, "_greater_50_loc_changed", greater_50, results_csv_filenmane, figure_dir)
+
+    leq_50 = lambda x : x <= 50
+    cummulative_distr_all4_filter(results_dir, "_leq_50_loc_changed", leq_50, results_csv_filenmane, figure_dir)
+
+
+def distribution_absdiff_plot(title, changed_loc_filter, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
+    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True)
 
     # plot incremental vs non-incremental
     diff = df.loc[:,utils.runtime_header_non_incr_child] - df.loc[:,utils.runtime_header_incr_child]
@@ -61,8 +71,8 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
     diff = df.loc[:,utils.runtime_header_incr_child] - df.loc[:,utils.runtime_header_incr_posts_rel_child]
     utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel)
 
-def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
-    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True)
+def distribution_reldiff_plot(title, changed_loc_filter, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None):
+    df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True)
 
     # plot incremental vs non-incremental
     print(df[utils.runtime_header_incr_child].astype('float'))
@@ -73,8 +83,8 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N
     diff = 1 - df.loc[:,utils.runtime_header_incr_posts_rel_child] / df.loc[:,utils.runtime_header_incr_child]
     utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel)
 
-def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False):
-    df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges)
+def paper_efficiency_graphs(dir_results, changed_loc_filter, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False):
+    df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), changed_loc_filter, filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges)
     diff1 = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float')
     diff2 = 1 - df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_incr_child].astype('float')
     diff3 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_incr_posts_child].astype('float')
@@ -199,9 +209,9 @@ def main():
         if os.path.exists(efficiency_results):
             efficieny_filename = "total_results.csv"
             print("Creating efficiency plots.")
-            cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir)
+            # cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir)
             cummulative_distr_all4(efficiency_results, efficieny_filename, figures_dir)
-            efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir)
+            # efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir)
             # paper_efficiency_graphs(efficiency_results, efficieny_filename, figures_dir, filterRelCLOC=True, filterDetectedChanges=False)
         else:
             print("No efficiency results available.")
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 55e5f9040..68d1ede9e 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -185,13 +185,14 @@ def barplot(df, figure_dir, outfile, figsize=None, colors=None):
     outfile = os.path.join(figure_dir, outfile)
     plt.savefig(outfile)
 
-def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetectedChanges=False):
-    df=pandas.read_csv(result_csv_file, index_col='Commit', sep=";")
+def get_cleaned_filtered_data(result_csv_file, changed_loc_filter, filterRelCLOC=False,  filterDetectedChanges=False):
+    df = pandas.read_csv(result_csv_file, index_col='Commit', sep=";")
     df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
 
     # clean dataset (remove all rows for which any of the runtime entries is 0 which means that the respective analysis
     # run failed)
     df = df[(df[runtime_header_parent] != 0)]
+    df = df[changed_loc_filter(df["Relevant changed LOC"])]
     if filterRelCLOC:
         df = df[df["Relevant changed LOC"] > 0]
     if filterDetectedChanges:

From c020267c1b6b2ab67d11b1e5631e3ac6bf9b2a20 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 20 Jul 2023 14:06:30 +0200
Subject: [PATCH 76/84] Efficiency plots: add efficiency_bar_plots again.

---
 scripts/incremental/benchmarking/plot.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index e10e9b54a..3ad39dbc5 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -7,7 +7,8 @@
 description_incr_post = "(3)"
 description_incr_rel ="(4)"
 
-def efficiency_bar_plot_all4(results_dir, changed_loc_filter, result_csv_filename, figure_dir):
+def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir):
+    changed_loc_filter = lambda x : x >= 0 # no filtering
     outfile_nonincr_vs_incr = "figure_bar.pgf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True)
 
@@ -211,7 +212,7 @@ def main():
             print("Creating efficiency plots.")
             # cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir)
             cummulative_distr_all4(efficiency_results, efficieny_filename, figures_dir)
-            # efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir)
+            efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir)
             # paper_efficiency_graphs(efficiency_results, efficieny_filename, figures_dir, filterRelCLOC=True, filterDetectedChanges=False)
         else:
             print("No efficiency results available.")

From ca79b69e4f536fd5e585615983bf0b1394af68b9 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Thu, 20 Jul 2023 14:23:35 +0200
Subject: [PATCH 77/84] Interactive efficiency: plot cpu_time instead of wall
 time.

---
 scripts/incremental/benchmarking/plot.py  | 6 +++---
 scripts/incremental/benchmarking/utils.py | 8 ++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 3ad39dbc5..51261fbd2 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -12,8 +12,8 @@ def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir):
     outfile_nonincr_vs_incr = "figure_bar.pgf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True)
 
-    data_set = df[["Relevant changed LOC", utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]]
-    data_set = data_set.rename(columns={utils.runtime_header_non_incr_child: description_non_incr, utils.runtime_header_incr_child: description_incr, utils.runtime_header_incr_posts_child: description_incr_post, utils.runtime_header_incr_posts_rel_child: description_incr_rel})
+    data_set = df[["Relevant changed LOC", utils.cpu_runtime_header_non_incr_child, utils.cpu_runtime_header_incr_child, utils.cpu_runtime_header_incr_posts_child, utils.cpu_runtime_header_incr_posts_rel_child]]
+    data_set = data_set.rename(columns={utils.cpu_runtime_header_non_incr_child: description_non_incr, utils.cpu_runtime_header_incr_child: description_incr, utils.cpu_runtime_header_incr_posts_child: description_incr_post, utils.cpu_runtime_header_incr_posts_rel_child: description_incr_rel})
 
     colors = ["tab:olive", "tab:blue", "tab:orange", "tab:green", "tab:red"]
     textwidth = 7
@@ -46,7 +46,7 @@ def cummulative_distr_all4_filter(results_dir, suffix, changed_loc_filter, resul
     outfile_nonincr_vs_incr = "figure_cum_distr_all3"+ suffix + ".pdf"
     df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True)
 
-    data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child])
+    data, base = utils.create_cum_data(df, num_bins, [utils.cpu_runtime_header_non_incr_child, utils.cpu_runtime_header_incr_child, utils.cpu_runtime_header_incr_posts_child, utils.cpu_runtime_header_incr_posts_rel_child])
     data_non_incr = {"values": data[0], "label": description_non_incr}
     data_incr = {"values": data[1], "label": description_incr}
     data_incr_post = {"values": data[2], "label": description_incr_post}
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 68d1ede9e..284346e72 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -47,6 +47,14 @@
 runtime_header_incr_posts_child = runtime_prefix + header_incr_posts_child
 runtime_header_incr_posts_rel_child = runtime_prefix + header_incr_posts_rel_child
 
+cpu_prefix = "CPU_"
+
+cpu_runtime_header_parent = cpu_prefix + runtime_header_parent
+cpu_runtime_header_non_incr_child = cpu_prefix + runtime_header_non_incr_child
+cpu_runtime_header_incr_child = cpu_prefix + runtime_header_incr_child
+cpu_runtime_header_incr_posts_child = cpu_prefix + runtime_header_incr_posts_child
+cpu_runtime_header_incr_posts_rel_child = cpu_prefix + runtime_header_incr_posts_rel_child
+
 analysis_header_parent = analysis_prefix + header_parent
 analysis_header_non_incr_child = analysis_prefix + header_non_incr_child
 analysis_header_incr_child = analysis_prefix + header_incr_child

From 086955ba0e5865aca6f60b1d57696b4c429244af Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sun, 23 Jul 2023 17:28:33 +0200
Subject: [PATCH 78/84] Add printouts for debugging.

---
 scripts/incremental/benchmarking/precision.py | 1 +
 scripts/incremental/benchmarking/utils.py     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index 688c0b81d..8b189d32e 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -114,6 +114,7 @@ def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr,
     # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').')
     out_incr = os.path.join(out_commit, out_dir_name)
     os.makedirs(out_incr)
+    print("Creating directory" + out_incr)
     file_incremental_run = os.path.join(out_incr, compare_data_file)
     add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', '--set', 'save_run', file_incremental_run]
     utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files)
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 284346e72..02b179bbd 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -125,6 +125,7 @@ def append_to_repo_path(file):
     # Run the analysis
     with open(os.path.join(outdir, analyzerlog), "w+") as outfile:
         subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)
+        print("Started run:\n" + analyze_command)
         outfile.close()
 
 def compare_runs(analyzer_dir, dummy_c_file, outdir, log_suffix, conf, compare_data_1, compare_data_2):

From 02d9efc30a4b87ea622c94afdc9caa9eb34e8885 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sun, 23 Jul 2023 17:47:56 +0200
Subject: [PATCH 79/84] Remove spurious --enable from parameters passed in
 precision script.

---
 scripts/incremental/benchmarking/precision.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index 8b189d32e..2c9e84710 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -116,7 +116,7 @@ def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr,
     os.makedirs(out_incr)
     print("Creating directory" + out_incr)
     file_incremental_run = os.path.join(out_incr, compare_data_file)
-    add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', '--set', 'save_run', file_incremental_run]
+    add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--set', 'save_run', file_incremental_run]
     utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files)
     return file_incremental_run
 

From 8657739a9b722ef060a102b41099f4e5ee087cb4 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sun, 23 Jul 2023 18:33:29 +0200
Subject: [PATCH 80/84] Output precision results into different files, extend
 plot script to produce plots for each of them.

---
 scripts/incremental/benchmarking/plot.py      | 12 +++--
 scripts/incremental/benchmarking/precision.py | 53 ++++++++++---------
 scripts/incremental/benchmarking/utils.py     |  5 +-
 3 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 51261fbd2..8b07c6341 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -156,7 +156,7 @@ def paper_precision_graph_box(results_precision, filename, outdir):
     utils.box_plot(data, x, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size)
 
 
-def paper_precision_graph(results_precision, filename, outdir):
+def paper_precision_graph(results_precision, filename, outdir, suffix):
     df = utils.get_data_from_json(os.path.join(results_precision, filename))
 
     # Plot precision loss after x commits, where x is in {1, 2, 5, 10, 15}
@@ -183,7 +183,8 @@ def paper_precision_graph(results_precision, filename, outdir):
         data.append((x,y))
     halftextwidth = 3.3
     size=(halftextwidth,halftextwidth*2/3)
-    utils.scatter_plot(data, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size)
+    outfile = os.path.join(outdir, "precision_figure" + suffix + ".pgf")
+    utils.scatter_plot(data, "\# Commits", "Share of less precise program points", outfile, size)
 
 
 def main():
@@ -219,9 +220,10 @@ def main():
 
         # precision plot
         if os.path.exists(precision_results):
-            precision_filename = "results.json"
-            print("Creating precision plots.")
-            paper_precision_graph(precision_results, precision_filename, figures_dir)
+            for suffix in utils.compare_runs_suffixes:
+                precision_filename = utils.precision_result_file_name_with_suffix(suffix)
+                print("Creating precision plots for configuration:" + suffix)
+                paper_precision_graph(precision_results, precision_filename, figures_dir, suffix)
         else:
             print("No precision results available.")
 
diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index 2c9e84710..41c224bf5 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -114,7 +114,6 @@ def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr,
     # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').')
     out_incr = os.path.join(out_commit, out_dir_name)
     os.makedirs(out_incr)
-    print("Creating directory" + out_incr)
     file_incremental_run = os.path.join(out_incr, compare_data_file)
     add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--set', 'save_run', file_incremental_run]
     utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files)
@@ -244,7 +243,7 @@ def analyze_seq_in_parallel(seq_list):
         p.join()
 
 
-def merge_results(outfilename):
+def merge_results(outfilename, suffix):
     wd = os.getcwd()
     seq_summaries = []
     result_sums = {str(i): {"precpertotal": {"equal": 0, "moreprec": 0, "lessprec": 0, "incomp": 0, "total": 0}, "number_of_commits": 0, "relCLOC": 0} for i in compare_commits}
@@ -264,27 +263,26 @@ def merge_results(outfilename):
         final_prec = None
         relCLOC = 0
 
-        for suffix in utils.compare_runs_suffixes:
-            for i in filter(lambda x: x != "0", commits):
-                ith_dir = os.path.join(outdir, i)
-                comparelog = utils.comparelog_with_suffix(suffix)
-                compare_log_path = os.path.join(ith_dir, "compare", comparelog)
-                with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f:
-                    relCLOC += json.load(f)["relCLOC"]
-                if int(i) in compare_commits:
-                    if os.path.isdir(ith_dir) and os.path.exists(compare_log_path):
-                        int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path)
-                        int_prec[i]["relCLOC"] = relCLOC
-                        if int_prec[i]["precision"]:
-                            result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])}
-                            result_sums[i]["number_of_commits"] += 1
-                            result_sums[i]["relCLOC"] += relCLOC
-                if int(i) != 0 and int(i) == len(commits) - 1:
-                    if os.path.exists(compare_log_path):
-                        final_prec = utils.extract_precision_from_compare_log(compare_log_path)
-            summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC}
-            seq_summaries.append(summary)
-            os.chdir(wd)
+        comparelog = utils.comparelog_with_suffix(suffix)
+        for i in filter(lambda x: x != "0", commits):
+            ith_dir = os.path.join(outdir, i)
+            compare_log_path = os.path.join(ith_dir, "compare", comparelog)
+            with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f:
+                relCLOC += json.load(f)["relCLOC"]
+            if int(i) in compare_commits:
+                if os.path.isdir(ith_dir) and os.path.exists(compare_log_path):
+                    int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path)
+                    int_prec[i]["relCLOC"] = relCLOC
+                    if int_prec[i]["precision"]:
+                        result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])}
+                        result_sums[i]["number_of_commits"] += 1
+                        result_sums[i]["relCLOC"] += relCLOC
+            if int(i) != 0 and int(i) == len(commits) - 1:
+                if os.path.exists(compare_log_path):
+                    final_prec = utils.extract_precision_from_compare_log(compare_log_path)
+        summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC}
+        seq_summaries.append(summary)
+        os.chdir(wd)
     result_avgs = {i: None for i in result_sums.keys()}
     for i, ps in result_sums.items():
         if ps["number_of_commits"] != 0:
@@ -295,6 +293,11 @@ def merge_results(outfilename):
         json.dump(res, f, indent=4)
     res
 
+def merge_all_results():
+    print("\nmerge results")
+    for suffix in utils.compare_runs_suffixes:
+        results_filename = utils.precision_result_file_name_with_suffix(suffix)
+        merge_results(results_filename, suffix)
 
 if not only_collect_results:
     os.mkdir(res_dir)
@@ -307,6 +310,4 @@ def merge_results(outfilename):
     print("\nanalyze sequences in parallel")
     analyze_seq_in_parallel(seq_list)
 
-print("\nmerge results")
-results_filename = "results.json"
-merge_results(results_filename)
+merge_all_results()
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 02b179bbd..ca2033ce8 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -125,7 +125,6 @@ def append_to_repo_path(file):
     # Run the analysis
     with open(os.path.join(outdir, analyzerlog), "w+") as outfile:
         subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT)
-        print("Started run:\n" + analyze_command)
         outfile.close()
 
 def compare_runs(analyzer_dir, dummy_c_file, outdir, log_suffix, conf, compare_data_1, compare_data_2):
@@ -185,6 +184,10 @@ def extract_precision_from_compare_log(log):
     precision = find_line(pattern, log)
     return {k: int(v) for k,v in precision.items()} if precision else None
 
+def precision_result_file_name_with_suffix(suffix):
+    result_file_name = "results" + suffix + ".json"
+    return result_file_name
+
 def barplot(df, figure_dir, outfile, figsize=None, colors=None):
     df.plot.bar(rot=0, width=0.7, figsize=figsize, color=colors)
     plt.xticks(rotation=45, ha='right', rotation_mode='anchor')

From 26fc662ac398f14c039404dddd9529fb86ed774a Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sun, 23 Jul 2023 18:44:45 +0200
Subject: [PATCH 81/84] Add underscore in file names of results produced by
 precision script and the precision plots.

---
 scripts/incremental/benchmarking/plot.py  | 2 +-
 scripts/incremental/benchmarking/utils.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py
index 8b07c6341..ede93d8b7 100644
--- a/scripts/incremental/benchmarking/plot.py
+++ b/scripts/incremental/benchmarking/plot.py
@@ -183,7 +183,7 @@ def paper_precision_graph(results_precision, filename, outdir, suffix):
         data.append((x,y))
     halftextwidth = 3.3
     size=(halftextwidth,halftextwidth*2/3)
-    outfile = os.path.join(outdir, "precision_figure" + suffix + ".pgf")
+    outfile = os.path.join(outdir, "precision_figure_" + suffix + ".pgf")
     utils.scatter_plot(data, "\# Commits", "Share of less precise program points", outfile, size)
 
 
diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index ca2033ce8..235dd484b 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -185,7 +185,7 @@ def extract_precision_from_compare_log(log):
     return {k: int(v) for k,v in precision.items()} if precision else None
 
 def precision_result_file_name_with_suffix(suffix):
-    result_file_name = "results" + suffix + ".json"
+    result_file_name = "results_" + suffix + ".json"
     return result_file_name
 
 def barplot(df, figure_dir, outfile, figsize=None, colors=None):

From 20571d34f44240fa5e39e5a026b67c587a770731 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Tue, 25 Jul 2023 10:25:09 +0200
Subject: [PATCH 82/84] Precision script: explicitely pass load-dir/save-dir
 for incremental data.

---
 scripts/incremental/benchmarking/precision.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py
index 41c224bf5..cce567ebb 100644
--- a/scripts/incremental/benchmarking/precision.py
+++ b/scripts/incremental/benchmarking/precision.py
@@ -109,13 +109,13 @@ def find_sequences():
     return seq_list
 
 # returns the file where the incremental results are stored for comparison
-def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, repo_path, conf, add_options):
+def incremental_analyze(commit, out_commit, out_dir_name, incremental_dir, compare_data_file, gr, repo_path, conf, add_options):
     # analyze commit incrementally based on the previous commit and save run for comparison
     # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').')
     out_incr = os.path.join(out_commit, out_dir_name)
     os.makedirs(out_incr)
     file_incremental_run = os.path.join(out_incr, compare_data_file)
-    add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--set', 'save_run', file_incremental_run]
+    add_options = add_options + ['--set','incremental.load-dir', incremental_dir, '--set','incremental.save-dir', incremental_dir, '--enable', 'incremental.load', '--enable', 'incremental.save', '--set', 'save_run', file_incremental_run]
     utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files)
     return file_incremental_run
 
@@ -124,6 +124,13 @@ def analyze_series_in_repo(series):
     commit_num = 0
     repo_path = os.path.abspath(repo_name)
     out_dir = os.path.abspath('out')
+
+    incremental_data = "incremental_data_"
+    incr_data_dir = os.path.abspath(incremental_data + "incr")
+    incr_post_data_dir = os.path.abspath(incremental_data + "incr_post")
+    incr_post_rel_data_dir = os.path.abspath(incremental_data + "incr_post_rel")
+
+
     with open('sequence.json', 'w') as outfile:
         json.dump(series, outfile, indent=4)
     dummy_c_file = "file.c"
@@ -181,10 +188,10 @@ def analyze_series_in_repo(series):
                 # analyze commit incrementally based on the previous commit and save run for comparison
                 # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').')
 
-                file_incr_run = incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path, conf, [])
-                file_incr_post_run = incremental_analyze(commit, out_commit, 'incr-post', "compare-data-incr-post", gr, repo_path, conf_incrpost, [])
+                file_incr_run = incremental_analyze(commit, out_commit, 'incr', incr_data_dir, "compare-data-incr", gr, repo_path, conf, [])
+                file_incr_post_run = incremental_analyze(commit, out_commit, 'incr-post', incr_post_data_dir, "compare-data-incr-post", gr, repo_path, conf_incrpost, [])
                 reluctant_option = ['--enable', 'incremental.reluctant.enabled']
-                file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', "compare-data-incr-post-rel", gr, repo_path, conf_incrpost, reluctant_option)
+                file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', incr_post_rel_data_dir, "compare-data-incr-post-rel", gr, repo_path, conf_incrpost, reluctant_option)
 
                 if commit_num in compare_commits or commit_num == len(series) - 1:
                     # compare stored data of original and incremental run

From 41d5558c1b5134f65a574583018e16aa7283c146 Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Fri, 28 Jul 2023 10:00:58 +0200
Subject: [PATCH 83/84] Remove detect-renames field for compatibility with
 sttt-2022 goblint.

---
 scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json | 1 -
 scripts/incremental/benchmarking/conf/chrony.json                | 1 -
 scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json | 1 -
 scripts/incremental/benchmarking/conf/figlet.json                | 1 -
 .../incremental/benchmarking/conf/zstd-race-incrpostsolver.json  | 1 -
 scripts/incremental/benchmarking/conf/zstd-race.json             | 1 -
 6 files changed, 6 deletions(-)

diff --git a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json
index b87872714..8a97510dd 100644
--- a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json
+++ b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json
@@ -104,7 +104,6 @@
     "postsolver": {
       "enabled": true
     },
-    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/chrony.json b/scripts/incremental/benchmarking/conf/chrony.json
index 8cfa70f2f..a2fe392e4 100644
--- a/scripts/incremental/benchmarking/conf/chrony.json
+++ b/scripts/incremental/benchmarking/conf/chrony.json
@@ -104,7 +104,6 @@
     "postsolver": {
       "enabled": false
     },
-    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json
index 68d3fee50..46ad26fce 100644
--- a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json
+++ b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json
@@ -93,7 +93,6 @@
     "postsolver": {
       "enabled": true
     },
-    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/figlet.json b/scripts/incremental/benchmarking/conf/figlet.json
index 0e93dc207..3e80b8ffe 100644
--- a/scripts/incremental/benchmarking/conf/figlet.json
+++ b/scripts/incremental/benchmarking/conf/figlet.json
@@ -93,7 +93,6 @@
     "postsolver": {
       "enabled": false
     },
-    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json
index 4df9e9a2c..dbe858b98 100644
--- a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json
+++ b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json
@@ -112,7 +112,6 @@
     "postsolver": {
       "enabled": true
     },
-    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false
diff --git a/scripts/incremental/benchmarking/conf/zstd-race.json b/scripts/incremental/benchmarking/conf/zstd-race.json
index 095596d25..b3c4a49ac 100644
--- a/scripts/incremental/benchmarking/conf/zstd-race.json
+++ b/scripts/incremental/benchmarking/conf/zstd-race.json
@@ -112,7 +112,6 @@
     "postsolver": {
       "enabled": false
     },
-    "detect-renames": false,
     "restart": {
       "sided": {
         "enabled": false

From 6158b17ed1d5f75ccee2ad0460a88a42f9fb275d Mon Sep 17 00:00:00 2001
From: Julian Erhard <julian.erhard@tum.de>
Date: Sun, 30 Jul 2023 11:14:04 +0200
Subject: [PATCH 84/84] Adapt script to extract changed functions from goblint
 versino sttt-2022.

---
 scripts/incremental/benchmarking/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py
index 235dd484b..cb29a652d 100644
--- a/scripts/incremental/benchmarking/utils.py
+++ b/scripts/incremental/benchmarking/utils.py
@@ -165,7 +165,7 @@ def extract_from_analyzer_log(log):
     runtime_pattern = 'Default[ ]+(?P<CPU_runtime>[0-9\.]+)s[ ]+(?P<runtime>[0-9\.]+)s'
     analysis_time_pattern = 'analysis[ ]+(?P<CPU_analysis_time>[0-9\.]+)s[ ]+(?P<analysis_time>[0-9\.]+)s'
     solving_time_pattern = 'solving[ ]+(?P<CPU_solving_time>[0-9\.]+)s[ ]+(?P<solving_time>[0-9\.]+)s'
-    change_info_pattern = 'change_info = { unchanged = (?P<unchanged>[0-9]*); changed = (?P<changed>[0-9]*) \\(with unchangedHeader = (?P<changed_unchanged_header>[0-9]*)\\); added = (?P<added>[0-9]*); removed = (?P<removed>[0-9]*) }'
+    change_info_pattern = 'change_info = { unchanged = (?P<unchanged>[0-9]*); changed = (?P<changed>[0-9]*); added = (?P<added>[0-9]*); removed = (?P<removed>[0-9]*) }'
 
     runtime = find_line(runtime_pattern, log)