From ccc2a6a1e08663488257cab06834f879b13e3060 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Wed, 5 Oct 2022 13:20:40 +0200 Subject: [PATCH 01/84] Move scripts for incremental benchmarking from the analyzer to the bench repo. These scripts were written by Sarah Tilscher. Co-authored-by: Sarah Tilscher <66023521+stilscher@users.noreply.github.com> --- .../incremental/benchmarking/efficiency.py | 234 +++++++++++++++ scripts/incremental/benchmarking/plot.py | 144 +++++++++ scripts/incremental/benchmarking/precision.py | 277 ++++++++++++++++++ .../incremental/benchmarking/requirements.txt | 7 + scripts/incremental/benchmarking/stats.py | 65 ++++ scripts/incremental/benchmarking/utils.py | 246 ++++++++++++++++ .../incremental/build/build_compdb_zstd.sh | 3 + 7 files changed, 976 insertions(+) create mode 100644 scripts/incremental/benchmarking/efficiency.py create mode 100644 scripts/incremental/benchmarking/plot.py create mode 100644 scripts/incremental/benchmarking/precision.py create mode 100644 scripts/incremental/benchmarking/requirements.txt create mode 100644 scripts/incremental/benchmarking/stats.py create mode 100644 scripts/incremental/benchmarking/utils.py create mode 100755 scripts/incremental/build/build_compdb_zstd.sh diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py new file mode 100644 index 000000000..c6abb95a0 --- /dev/null +++ b/scripts/incremental/benchmarking/efficiency.py @@ -0,0 +1,234 @@ +from pydriller import Repository, Git +import utils +import psutil +import multiprocessing as mp +import os +import subprocess +import itertools +import shutil +import json +from datetime import datetime +import sys +import pandas as pd + +################################################################################ +# Usage: python3 incremental_smallcommits.py +# Executing the script will overwrite the directory 'result_efficiency' in the cwd. +# The script for building the compilation database is assumed to be found in the analyzers script directory and the +# config file is assumed to be found in the conf directory of the analyzers repository. +# The single test runs are mapped to processors according to the coremapping. The one specified in the section below +# should work for Intel machines, otherwise you might need to adapt it according to the description. +if len(sys.argv) != 3: + print("Wrong number of parameters.\nUse script like this: python3 parallel_benchmarking.py ") + exit() +result_dir = os.path.join(os.getcwd(), 'result_efficiency') +maxCLOC = 50 # can be deactivated with None +url = "https://github.com/facebook/zstd" +repo_name = "zstd" +build_compdb = "build_compdb_zstd.sh" +conf_base = "zstd-race-baseline" # very minimal: "zstd-minimal" +conf_incrpost = "zstd-race-incrpostsolver" +begin = datetime(2021,8,1) +to = datetime(2022,2,1) # minimal subset: datetime(2021,8,4) +diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"] +analyzer_dir = sys.argv[1] +only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables +################################################################################ +try: + numcores = int(sys.argv[2]) +except ValueError: + print("Parameter should be a number.\nUse script like this: python3 parallel_benchmarking.py ") + exit() +avail_phys_cores = psutil.cpu_count(logical=False) +allowedcores = avail_phys_cores - 1 +if not only_collect_results and numcores > allowedcores: + print("Not enough physical cores on this machine (exist: ", avail_phys_cores, " allowed: ", allowedcores, ")") + exit() +# For equal load distribution, choose a processes to core mapping, +# use only physical cores and have an equal number of processes per cache. +# The layout of physical/logical cores and sharing of caches is machine dependent. To find out use: 'lscpu --all --extended'. +# For our test server: +coremapping1 = [i for i in range(numcores - numcores//2)] +coremapping2 = [i for i in range(avail_phys_cores//2, avail_phys_cores//2 + numcores//2)] +coremapping = [coremapping1[i//2] if i%2==0 else coremapping2[i//2] for i in range(len(coremapping1) + len(coremapping2))] +################################################################################ + +def filter_commits_false_pred(repo_path): + def pred(c): + relCLOC = utils.calculateRelCLOC(repo_path, c, diff_exclude) + return relCLOC == 0 or (maxCLOC is not None and relCLOC > maxCLOC) + return pred + +def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): + count_analyzed = 0 + count_skipped = 0 + count_failed = 0 + analyzed_commits = {} + repo_path = os.path.join(cwd, repo_name) + + for commit in itertools.islice(itertools.filterfalse(filter_commits_false_pred(repo_path), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=cwd).traverse_commits()), from_c, to_c): + gr = Git(repo_path) + + #print("\n" + commit.hash) + #print('changed LOC: ', commit.lines) + #print('merge commit: ', commit.merge) + + # skip merge commits and commits that have no or less than maxCLOC of relevant code changes + relCLOC = utils.calculateRelCLOC(repo_path, commit, diff_exclude) # use this to filter commits by actually relevant changes + #print("relCLOC: ", relCLOC) + if relCLOC == 0 or (maxCLOC is not None and relCLOC > maxCLOC): + #print('Skip this commit: merge commit or too many relevant changed LOC') + count_skipped+=1 + continue + + # analyze + try_num = from_c + count_analyzed + count_failed + 1 + outtry = os.path.join(outdir, str(try_num)) + parent = gr.get_commit(commit.parents[0]) + #print('Analyze this commit incrementally. #', try_num) + + utils.reset_incremental_data(os.path.join(cwd, 'incremental_data')) + failed = True + try: + #print('Starting from parent', str(parent.hash), ".") + outparent = os.path.join(outtry, 'parent') + os.makedirs(outparent) + add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options) + + #print('And now analyze', str(commit.hash), 'incrementally.') + outchild = os.path.join(outtry, 'child') + os.makedirs(outchild) + add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options) + + #print('And again incremental, this time with incremental postsolver') + outchildincrpost = os.path.join(outtry, 'child-incr-post') + os.makedirs(outchildincrpost) + add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options) + + #print('And again incremental, this time with incremental postsolver and reluctant') + outchildrel = os.path.join(outtry, 'child-rel') + os.makedirs(outchildrel) + add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options) + + count_analyzed+=1 + failed = False + except subprocess.CalledProcessError as e: + print('Aborted because command ', e.cmd, 'failed.') + count_failed+=1 + os.makedirs(outtry, exist_ok=True) + with open(os.path.join(outtry,'commit_properties.log'), "w+") as file: + json.dump({"hash": commit.hash, "parent_hash": parent.hash, "CLOC": commit.lines, "relCLOC": relCLOC, "failed": failed}, file) + analyzed_commits[try_num]=(str(commit.hash)[:6], relCLOC) + + num_commits = count_analyzed + count_skipped + count_failed + print("\nCommits traversed in total: ", num_commits) + print("Analyzed: ", count_analyzed) + print("Failed: ", count_failed) + print("Skipped: ", count_skipped) + +def collect_data(outdir): + data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [], + utils.header_runtime_parent: [], utils.header_runtime_incr_child: [], + utils.header_runtime_incr_posts_child: [], utils.header_runtime_incr_posts_rel_child: [], + "Change in number of race warnings": []} + for t in os.listdir(outdir): + parentlog = os.path.join(outdir, t, 'parent', utils.analyzerlog) + childlog = os.path.join(outdir, t, 'child', utils.analyzerlog) + childpostslog = os.path.join(outdir, t, 'child-incr-post', utils.analyzerlog) + childpostsrellog = os.path.join(outdir, t, 'child-rel', utils.analyzerlog) + commit_prop_log = os.path.join(outdir, t, 'commit_properties.log') + t = int(t) + commit_prop = json.load(open(commit_prop_log, "r")) + data["Changed LOC"].append(commit_prop["CLOC"]) + data["Relevant changed LOC"].append(commit_prop["relCLOC"]) + data["Failed?"].append(commit_prop["failed"]) + data["Commit"].append(commit_prop["hash"][:7]) + if commit_prop["failed"] == True: + data[utils.header_runtime_parent].append(0) + data[utils.header_runtime_incr_child].append(0) + data[utils.header_runtime_incr_posts_child].append(0) + data[utils.header_runtime_incr_posts_rel_child].append(0) + data["Changed/Added/Removed functions"].append(0) + data["Change in number of race warnings"].append(0) + continue + parent_info = utils.extract_from_analyzer_log(parentlog) + child_info = utils.extract_from_analyzer_log(childlog) + child_posts_info = utils.extract_from_analyzer_log(childpostslog) + child_posts_rel_info = utils.extract_from_analyzer_log(childpostsrellog) + data["Changed/Added/Removed functions"].append(int(child_info["changed"]) + int(child_info["added"]) + int(child_info["removed"])) + data[utils.header_runtime_parent].append(float(parent_info["runtime"])) + data[utils.header_runtime_incr_child].append(float(child_info["runtime"])) + data[utils.header_runtime_incr_posts_child].append(float(child_posts_info["runtime"])) + data[utils.header_runtime_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"])) + data["Change in number of race warnings"].append(int(child_info["race_warnings"] - int(parent_info["race_warnings"]))) + return data + +def runperprocess(core, from_c, to_c): + if not only_collect_results: + psutil.Process().cpu_affinity([core]) + cwd = os.getcwd() + outdir = os.path.join(cwd, 'out') + if not only_collect_results: + if os.path.exists(outdir) and os.path.isdir(outdir): + shutil.rmtree(outdir) + analyze_small_commits_in_repo(cwd, outdir, from_c, to_c) + data_set = collect_data(outdir) + df = pd.DataFrame(data_set) + #df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0]))) + print(df) + df.to_csv('results.csv', sep =';') + +def analyze_chunks_of_commits_in_parallel(): + processes = [] + + # calculate actual number of interesting commits up-front to allow for similar load distribution + iter = itertools.filterfalse(filter_commits_false_pred(os.path.join(os.getcwd(), repo_name)), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=os.getcwd()).traverse_commits()) + num_commits = sum(1 for _ in iter) + print("Number of potentially interesting commits:", num_commits) + perprocess = num_commits // numcores if num_commits % numcores == 0 else num_commits // numcores + 1 + print("Per process: " + str(perprocess)) + + for i in range(numcores): + dir = "process" + str(i) + if not only_collect_results: + os.mkdir(dir) + os.chdir(dir) + # run script + start = perprocess * i + end = perprocess * (i + 1) if i < numcores - 1 else num_commits + if not only_collect_results: + p = mp.Process(target=runperprocess, args=[coremapping[i], start, end]) + p.start() + processes.append(p) + # time.sleep(random.randint(5,60)) # add random delay between process creation to try to reduce interference + else: + runperprocess(coremapping[i], start, end) + os.chdir(result_dir) + + for p in processes: + p.join() + +def merge_results(): + filename = "results.csv" + frames = [] + for process_dir in os.listdir("."): + path = os.path.join(process_dir, filename) + if os.path.exists(path): + t = pd.read_csv(path, index_col=0, sep=";") + frames.append(t) + if len(frames) > 0: + df = pd.concat(frames) + #df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0]))) + df.to_csv('total_results.csv', sep=";") + + +if not only_collect_results: + os.mkdir(result_dir) +os.chdir(result_dir) + +analyze_chunks_of_commits_in_parallel() +merge_results() diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py new file mode 100644 index 000000000..4720fd56a --- /dev/null +++ b/scripts/incremental/benchmarking/plot.py @@ -0,0 +1,144 @@ +import utils +import os +import shutil + +def cummulative_distr_compare2(outdir, result_csv_filename): + num_bins = 2000 + outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf" + outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf" + df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) + + data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child]) + datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"} + dataincr = {"values": data[1], "label": "Incremental analysis of commit"} + utils.cummulative_distr_plot([datanonincr, dataincr], base, outfile_nonincr_vs_incr) + + data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) + dataincr = {"values": data[0], "label": "Incremental analysis of commit"} + datarelincr = {"values": data[1], "label": "Reluctant incremental analysis of commit"} + utils.cummulative_distr_plot([dataincr, datarelincr], base, outfile_incr_vs_incrrel, logscale=True) + +def cummulative_distr_all3(outdir, result_csv_filename): + num_bins = 2000 + outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf" + df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) + + data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) + datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"} + dataincr = {"values": data[1], "label": "Incremental analysis of commit"} + datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"} + utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) + +def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): + df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) + + # plot incremental vs non-incremental + diff = df.loc[:,utils.header_runtime_parent] - df.loc[:,utils.header_runtime_incr_child] + utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr) + + # plot reluctant vs. basic incremental + diff = df.loc[:,utils.header_runtime_incr_child] - df.loc[:,utils.header_runtime_incr_posts_rel_child] + utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel) + +def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): + df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) + + # plot incremental vs non-incremental + print(df[utils.header_runtime_incr_child].astype('float')) + diff = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float') + utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr) + + # plot reluctant vs. basic incremental + diff = 1 - df.loc[:,utils.header_runtime_incr_posts_rel_child] / df.loc[:,utils.header_runtime_incr_child] + utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel) + +def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False): + df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges) + diff1 = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float') + diff2 = 1 - df[utils.header_runtime_incr_posts_child].astype('float') / df[utils.header_runtime_incr_child].astype('float') + diff3 = 1 - df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_incr_posts_child].astype('float') + diff4 = 1 - df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_parent].astype('float') + step = 0.01 + for i, diff in enumerate([diff1,diff2,diff3,diff4]): + # output textwidth in latex with + # \usepackage{layouts} + # \printinunitsof{cm}\prntlen{\textwidth} + # \printinunitsof{in}\prntlen{\textwidth} + # -> 17.7917cm / 7.00697in + textwidth = 7 + xlimleft = None + xlimright = 1.05 + xlabel = "Relative speedup" if i==3 else None + ylabel = "\# Commits" if i==0 or i==3 else None + outfile = os.path.join(outdir, "efficiency_figure_" + str(i) + ".pgf") + if i == 0: + size = (textwidth/3+0.1, textwidth/4) # additional ylabel + elif i == 1: + xlimleft = -0.3 + size = (textwidth/3-0.1/2, textwidth/4) # missing ylabel + elif i == 3: + size = (textwidth, textwidth/4) + xlimright = 1.02 + step = 0.005 + else: + size = (textwidth/3-0.1/2, textwidth/4) # missing ylabel + utils.hist_plot(diff, step, None, xlabel, ylabel, outfile, + size, xlim_left=xlimleft, xlim_right=xlimright, cutoffs=None) + + # print statistics + for e in diff: + if (xlimleft and e < xlimleft) or (xlimright and e > xlimright): + print("excluded", e, "from efficiency figure", i) + diff1 = df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float') + diff2 = df[utils.header_runtime_incr_posts_child].astype('float') / df[utils.header_runtime_parent].astype('float') + diff3 = df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_parent].astype('float') + for n, diff in [("incr", diff1), ("+ incr postsolver", diff2), ("+ reluctant", diff3)]: + print("80% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.8) * 100, "%") + print("75% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.75) * 100, "%") + + +def paper_precision_graph(results_precision, filename, outdir): + df = utils.get_data_from_json(os.path.join(results_precision, filename)) + + # Plot precision loss after x commits, where x is in {1, 2, 5, 10, 15} + lessprec1 = 'intermediate precision.1.precision.lessprec' + lessprec2 = 'intermediate precision.2.precision.lessprec' + lessprec5 = 'intermediate precision.5.precision.lessprec' + lessprec10 = 'intermediate precision.10.precision.lessprec' + lessprec15 = 'intermediate precision.15.precision.lessprec' + lessprecfinal = 'final precision.lessprec' + total1 = 'intermediate precision.1.precision.total' + total2 = 'intermediate precision.2.precision.total' + total5 = 'intermediate precision.5.precision.total' + total10 = 'intermediate precision.10.precision.total' + total15 = 'intermediate precision.15.precision.total' + totalfinal = 'final precision.total' + + data = [] + for i in range(len(df.index)): + x = [1,2,5,10,15,df.iloc[i]['length']] + vals = df.iloc[i][[lessprec1, lessprec2, lessprec5, lessprec10, lessprec15, lessprecfinal]].values + total = df.iloc[i][[total1, total2, total5, total10, total15, totalfinal]].values + x = [x[i] for i in range(len(x)) if vals[i] == vals[i]] + y = [vals[i] / total[i] for i in range(len(vals)) if vals[i] == vals[i] and total[i] == total[i]] + data.append((x,y)) + halftextwidth = 3.3 + size=(halftextwidth,halftextwidth*2/3) + utils.scatter_plot(data, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size) + + + +# efficiency plots +results_efficiency = "result_efficiency" +outdir = "figures" +if os.path.exists(outdir): + shutil.rmtree(outdir) +os.mkdir(outdir) +filename = "total_results.csv" +paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) + + +# precision plot +results_precision = "result_precision" +filename = "results.json" +paper_precision_graph(results_precision, filename, outdir) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py new file mode 100644 index 000000000..a9c848114 --- /dev/null +++ b/scripts/incremental/benchmarking/precision.py @@ -0,0 +1,277 @@ +import utils +from pydriller import Repository, Git +import psutil +import os +import sys +from datetime import datetime +import json +import shutil +import pytz +import multiprocessing as mp + + +################################################################################ +# Usage: python3 incremental_smallcommits.py +# Executing the script will overwrite the directory 'result_precision' in the cwd. +# The script for building the compilation database is assumed to be found in the analyzers script directory and the +# config file is assumed to be found in the conf directory of the analyzers repository. +if len(sys.argv) != 3: + print("Wrong number of parameters.\nUse script like this: python3 parallel_benchmarking.py ") + exit() +res_dir = os.path.abspath('result_precision') +maxCLOC = None +url = "https://github.com/facebook/zstd" +repo_name = "zstd" +build_compdb = "build_compdb_zstd.sh" +conf = "zstd-race-incrpostsolver" +begin = datetime(2021,8,1) +to = datetime(2022,2,1) +diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"] +analyzer_dir = sys.argv[1] +try: + numcores = int(sys.argv[2]) +except ValueError: + print("Parameter should be a number.\nUse script like this: python3 parallel_benchmarking.py ") + exit() +only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables +################################################################################ + +utc = pytz.UTC +compare_commits = [1,2,5,10,15] +skipSeqShorterEq = 5 # minimum number of incremental commits in chain + +def start_commit_for_sequence_search(): + current_commit = "" + for commit in Repository(url, to=to, only_in_branch='dev', order='reverse', clone_repo_to=res_dir).traverse_commits(): + current_commit = commit + break + gr = Git(os.path.join(res_dir, repo_name)) + return current_commit, gr + +def find_sequences_rec(gr, commit, seq, seq_list, starting_points): + commit_date = commit.committer_date.replace(tzinfo=None) + if commit_date < begin: + if len(seq) > skipSeqShorterEq: + print("found seq of length: " + str(len(seq))) + seq_list.insert(0,seq) + elif commit.merge: + seq.insert(0,commit.hash) + if len(seq) > skipSeqShorterEq: + print("found seq of length: " + str(len(seq))) + seq_list.insert(0,seq) + for ph in commit.parents: + parent_commit = gr.get_commit(ph) + if ph not in starting_points: + starting_points.insert(0,ph) + find_sequences_rec(gr, parent_commit, [], seq_list, starting_points) + else: + seq.insert(0,commit.hash) + for p in commit.parents: + parent_commit = gr.get_commit(p) + find_sequences_rec(gr, parent_commit, seq, seq_list, starting_points) + +def find_sequences(): + seq_list = [] + starting_points=[] + start_commit, gr = start_commit_for_sequence_search() + starting_points.insert(0,start_commit.hash) + find_sequences_rec(gr, start_commit, [], seq_list, starting_points) + seq_list.sort(key=len, reverse=True) + print("summary") + total = 0 + maxlen = max(map(lambda x : len(x), seq_list)) + for i in range(0,maxlen + 1): + c = sum(map(lambda x : len(x) == i, seq_list)) + total += c + print("length " + str(i) + ": " + str(c)) + print("total: " + str(len(seq_list))) + assert(total == len(seq_list)) + print("avg len: " + str(sum(map(lambda x : len(x), seq_list))/len(list(map(lambda x : len(x), seq_list))))) + with open('sequences.json', 'w') as outfile: + json.dump(seq_list, outfile, indent=4) + return seq_list + +def analyze_series_in_repo(series): + prev_commit = "" + commit_num = 0 + repo_path = os.path.abspath(repo_name) + out_dir = os.path.abspath('out') + with open('sequence.json', 'w') as outfile: + json.dump(series, outfile, indent=4) + dummy_c_file = "file.c" + with open(dummy_c_file, 'w') as file: + file.write("int main() { return 0; }") + file.close() + + for commit in Repository(url, since=begin, only_commits=series, clone_repo_to=os.getcwd()).traverse_commits(): + gr = Git(repo_path) + + # print("\n" + commit.hash) + # print('changed LOC: ', commit.lines) + # print('merge commit: ', commit.merge) + + # check that given series is a path of sequential commits in the repository + msg = "Commit " + prev_commit[:7] + "is not a parent commit of " + commit.hash[:7] + " (parents: " + ','.join(commit.parents) + ")" + assert (prev_commit == "" or prev_commit in commit.parents), msg + + relCLOC = utils.calculateRelCLOC(repo_path, commit, diff_exclude) + + # analyze + out_commit = os.path.join(out_dir, str(commit_num)) + os.makedirs(out_commit) + with open(os.path.join(out_commit,'commit_properties.log'), "w+") as file: + json.dump({"hash": commit.hash, "parent_hash": prev_commit, "CLOC": commit.lines, "relCLOC": relCLOC}, file) + + if commit_num == 0: + # analyze initial commit non-incrementally + try: + # print('Analyze ', str(commit.hash), ' as initial commit.') + add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_commit, conf, add_options) + prev_commit = commit.hash + except utils.subprocess.CalledProcessError as e: + print('Aborted initial because command ', e.cmd, 'failed.') + print('Fix the problem or choose a different commit to start the accumulative analysis from.') + exit() + else: + # analyze every following commit based on the latest previous commit for which the analysis succeeded + try: + if os.path.isdir("backup_incremental_data"): + shutil.rmtree("backup_incremental_data") + shutil.copytree("incremental_data", "backup_incremental_data") + + # compare only for 10th and last run + if commit_num in compare_commits or commit_num == len(series) - 1: + # analyze commit non-incrementally and save run for comparison + # print('Analyze', str(commit.hash), 'non-incrementally (#', commit_num, ').') + out_nonincr = os.path.join(out_commit, 'non-incr') + os.makedirs(out_nonincr) + file_original_run = os.path.join(out_nonincr, "compare-data-nonincr") + add_options = ['--enable', 'incremental.only-rename', '--set', 'save_run', file_original_run] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_nonincr, conf, add_options) + + # analyze commit incrementally based on the previous commit and save run for comparison + # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') + out_incr = os.path.join(out_commit, 'incr') + os.makedirs(out_incr) + file_incremental_run = os.path.join(out_incr, "compare-data-incr") + add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options) + + if commit_num in compare_commits or commit_num == len(series) - 1: + # compare stored data of original and incremental run + # print('Compare both runs.') + out_compare = os.path.join(out_commit, 'compare') + os.makedirs(out_compare) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, conf, file_incremental_run, file_original_run) + + except utils.subprocess.CalledProcessError as e: + print('Aborted because command ', e.cmd, 'failed.') + shutil.rmtree("incremental_data") + shutil.copytree("backup_incremental_data", "incremental_data") + + prev_commit = commit.hash + commit_num += 1 + +def runperprocess(core, seq_list, q): + psutil.Process().cpu_affinity([core]) + while not q.empty(): + i = q.get() + serie = seq_list[i] + dir = "series" + str(i) + os.mkdir(dir) + os.chdir(dir) + analyze_series_in_repo(serie) + os.chdir(res_dir) + +def analyze_seq_in_parallel(seq_list): + avail_phys_cores = psutil.cpu_count(logical=False) + allowedcores = avail_phys_cores - 1 + if numcores > allowedcores: + print("Not enough physical cores on this maching (exist: ", avail_phys_cores, " allowed: ", allowedcores, ")") + exit() + # For equal load distribution, choose a processes to core mapping, + # use only physical cores and have an equal number of processes per cache. + # The layout of physical/logical cores and sharing of caches is machine dependent. To find out use: 'lscpu --all --extended'. + # For our test server: + coremapping = [i for i in range(numcores - numcores//2)] + [i for i in range(avail_phys_cores//2, avail_phys_cores//2 + numcores//2)] + processes = [] + + # set up Queue with each serie as task + q = mp.Queue() + for i in range(len(seq_list)): + q.put(i) + + for j in range(numcores): + # start process for analysing series on core j + c = coremapping[j] + p = mp.Process(target=runperprocess, args=[c, seq_list.copy(), q]) + p.start() + processes.append(p) + for p in processes: + p.join() + + +def merge_results(outfilename): + wd = os.getcwd() + seq_summaries = [] + result_sums = {str(i): {"precpertotal": {"equal": 0, "moreprec": 0, "lessprec": 0, "incomp": 0, "total": 0}, "number_of_commits": 0, "relCLOC": 0} for i in compare_commits} + num_seq = 0 + for s in map(lambda x: os.path.abspath(x), os.listdir(wd)): + if not os.path.isdir(s) or os.path.basename(s)[:6] != "series": + continue + num_seq += 1 + os.chdir(s) + with open('sequence.json', 'r') as file: + seq = json.load(file) + # lookup comparison results + outdir = os.path.join(s, "out") + commits = os.listdir(outdir) + commits.sort(key = lambda x: int(x)) + int_prec = {str(i): {"precision": None, "relCLOC": None} for i in compare_commits} + final_prec = None + relCLOC = 0 + for i in filter(lambda x: x != "0", commits): + ith_dir = os.path.join(outdir, i) + compare_log_path = os.path.join(ith_dir, "compare", utils.comparelog) + with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f: + relCLOC += json.load(f)["relCLOC"] + if int(i) in compare_commits: + if os.path.isdir(ith_dir) and os.path.exists(compare_log_path): + int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path) + int_prec[i]["relCLOC"] = relCLOC + if int_prec[i]["precision"]: + result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])} + result_sums[i]["number_of_commits"] += 1 + result_sums[i]["relCLOC"] += relCLOC + if int(i) != 0 and int(i) == len(commits) - 1: + if os.path.exists(compare_log_path): + final_prec = utils.extract_precision_from_compare_log(compare_log_path) + summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC} + seq_summaries.append(summary) + os.chdir(wd) + result_avgs = {i: None for i in result_sums.keys()} + for i, ps in result_sums.items(): + if ps["number_of_commits"] != 0: + avg_prec = {k: ps["precpertotal"].get(k,0) / ps["number_of_commits"] for k in set(ps["precpertotal"])} + result_avgs[i] = {"precpertotal_avg": avg_prec, "relCLOC_avg": ps["relCLOC"] / ps["number_of_commits"]} + res = {"seq_summary": seq_summaries, "prec_avgs": result_avgs} + with open(outfilename, "w") as f: + json.dump(res, f, indent=4) + res + + +if not only_collect_results: + os.mkdir(res_dir) +os.chdir(res_dir) + +if not only_collect_results: + print("find sequences to analyze") + seq_list = find_sequences() + + print("\nanalyze sequences in parallel") + analyze_seq_in_parallel(seq_list) + +print("\nmerge results") +results_filename = "results.json" +merge_results(results_filename) diff --git a/scripts/incremental/benchmarking/requirements.txt b/scripts/incremental/benchmarking/requirements.txt new file mode 100644 index 000000000..7d705bd6a --- /dev/null +++ b/scripts/incremental/benchmarking/requirements.txt @@ -0,0 +1,7 @@ +brokenaxes==0.5.0 +matplotlib==3.5.1 +numpy==1.19.5 +pandas==1.4.1 +psutil==5.9.0 +PyDriller==2.1 +pytz==2021.1 diff --git a/scripts/incremental/benchmarking/stats.py b/scripts/incremental/benchmarking/stats.py new file mode 100644 index 000000000..3a2909ec3 --- /dev/null +++ b/scripts/incremental/benchmarking/stats.py @@ -0,0 +1,65 @@ +import utils +from pydriller import Repository +from datetime import datetime +import os +import sys + +if __name__ == '__main__': + if len(sys.argv) != 2: + print("Wrong number of parameters.\nUse script like this: python3 incremental_stats.py ") + exit() + +analyzer_dir = sys.argv[1] +url = 'https://github.com/facebook/zstd' +repo_name = 'zstd' +begin = datetime(2021,8,1) +to = datetime(2022,2,1) +maxCLOC = 50 +dirs_to_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"] + +cwd = os.getcwd() +outdir = os.path.join(cwd, 'out') +repo_path = os.path.normpath(os.path.join(cwd, repo_name)) +paths_to_exclude = list(map(lambda x: os.path.join(repo_path, x), dirs_to_exclude)) + +analyzed_commits = {} +total_commits = 0 +count_nochanges = 0 +count_merge = 0 +count_big = 0 +count_small = 0 + +def iter_repo(): + global analyzed_commits + global total_commits + global count_merge + global count_nochanges + global count_big + global count_small + + for commit in Repository(url, since=begin, to=to, clone_repo_to=cwd).traverse_commits(): + total_commits += 1 + + # count merge commits + if commit.merge: + count_merge += 1 + continue + + # count commits that have less than maxCLOC of relevant code changes + relCLOC = utils.calculateRelCLOC(repo_path, commit, paths_to_exclude) # use this to filter commits by actually relevant changes + if relCLOC == 0: + count_nochanges += 1 + continue + + if maxCLOC is not None and relCLOC > maxCLOC: + count_big += 1 + continue + + count_small += 1 + +iter_repo() +print("\nCommits traversed in total: ", total_commits) +print("Merge commits: ", count_merge) +print("Commits without any relevant changes: ", count_nochanges) +print("Big commits: ", count_big) +print("Small commits with relevant changes: ", count_small) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py new file mode 100644 index 000000000..c45bf36ae --- /dev/null +++ b/scripts/incremental/benchmarking/utils.py @@ -0,0 +1,246 @@ +import os +import shutil +from pathlib import Path +import subprocess +from pydriller import Git +import re +import pandas +import json +import numpy as np +import brokenaxes +import matplotlib as mpl +mpl.use("pgf") +mpl.rcParams.update({ + "pgf.texsystem": "pdflatex", + 'pgf.rcfonts': False, + 'text.usetex': True, + 'font.family': 'serif', + 'font.size': 9, + 'axes.titlesize': 9, + 'legend.fontsize': 9, + 'figure.titlesize': 9, + 'figure.dpi': 300, + 'xtick.labelsize': 9, + 'ytick.labelsize': 9, + +}) +import matplotlib.pyplot as plt +from matplotlib.ticker import ScalarFormatter + +header_runtime_parent = "Runtime for parent commit (non-incremental)" +header_runtime_incr_child = "Runtime for commit (incremental)" +header_runtime_incr_posts_child = "Runtime for commit (incremental + incr postsolver)" +header_runtime_incr_posts_rel_child = "Runtime for commit (incremental + incr postsolver + reluctant)" + +preparelog = "prepare.log" +analyzerlog = "analyzer.log" +comparelog = "compare.log" + +def reset_incremental_data(incr_data_dir): + if os.path.exists(incr_data_dir) and os.path.isdir(incr_data_dir): + shutil.rmtree(incr_data_dir) + +def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, outdir, conf, extra_options): + gr.checkout(commit_hash) + conf_path = os.path.join(analyzer_dir, 'conf', conf + '.json') + + # print configuration + with open(outdir+'/config.out', "a+") as file: + with open(conf_path, "r") as c: + file.write("config: " + c.read()) + file.write("\n") + file.write("added options:\n") + for o in extra_options: + file.write(o + " ") + file.close() + + prepare_command = ['sh', os.path.join(analyzer_dir, 'scripts', build_compdb)] + with open(os.path.join(outdir, preparelog), "w+") as outfile: + subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT) + outfile.close() + + analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *extra_options, repo_path] + with open(os.path.join(outdir, analyzerlog), "w+") as outfile: + subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) + outfile.close() + +def compare_runs(analyzer_dir, dummy_c_file, outdir, conf, compare_data_1, compare_data_2): + options = ['--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), '--disable', 'printstats', '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2] + analyze_command = [os.path.join(analyzer_dir, 'goblint'), *options, dummy_c_file] + with open(os.path.join(outdir, comparelog), "w+") as outfile: + subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) + outfile.close() + +def calculateRelCLOC(repo_path, commit, diff_exclude): + diff_exclude = list(map(lambda x: os.path.join(repo_path, x), diff_exclude)) + relcloc = 0 + for f in commit.modified_files: + _, extension = os.path.splitext(f.filename) + if not (extension == ".h" or extension == ".c"): + continue + filepath = f.new_path + if filepath is None: + filepath = f.old_path + parents = Path(filepath).parents + parents = list(map(lambda x: os.path.join(repo_path, x), parents)) + if any(dir in parents for dir in diff_exclude): + continue + relcloc = relcloc + f.added_lines + f.deleted_lines + return relcloc + +def find_line(pattern, log): + with open (log, 'r') as file: + for line in file: + m = re.search(pattern, line) + if m: + file.close() + return m.groupdict() + return None + +def extract_from_analyzer_log(log): + runtime_pattern = 'TOTAL[ ]+(?P[0-9\.]+) s' + change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*); added = (?P[0-9]*); removed = (?P[0-9]*) }' + r = find_line(runtime_pattern, log) + ch = find_line(change_info_pattern, log) or {"unchanged": 0, "changed": 0, "added": 0, "removed": 0} + d = dict(list(r.items()) + list(ch.items())) + with open(log, "r") as file: + num_racewarnings = file.read().count('[Warning][Race]') + d["race_warnings"] = num_racewarnings + file.close() + return d + +def extract_precision_from_compare_log(log): + pattern = "equal: (?P[0-9]+), more precise: (?P[0-9]+), less precise: (?P[0-9]+), incomparable: (?P[0-9]+), total: (?P[0-9]+)" + precision = find_line(pattern, log) + return {k: int(v) for k,v in precision.items()} if precision else None + +def barplot(data_set): + df = pandas.DataFrame(data_set["data"], index=data_set["index"]) # TODO: index=analyzed_commits + df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0]))) + print(df) + df.to_csv('results.csv') + + df.plot.bar(rot=0, width=0.7, figsize=(25,10)) + plt.xticks(rotation=45, ha='right', rotation_mode='anchor') + plt.xlabel('Commit') + plt.tight_layout() + plt.savefig("figure.pdf") + +def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetectedChanges=False): + df=pandas.read_csv(result_csv_file, index_col='Commit', sep=";") + df = df.loc[:, ~df.columns.str.contains('^Unnamed')] + + # clean dataset (remove all rows for which any of the runtime entries is 0 which means that the respective analysis + # run failed) + df = df[(df[header_runtime_parent] != 0)] + if filterRelCLOC: + df = df[df["Relevant changed LOC"] > 0] + if filterDetectedChanges: + df = df[df["Changed/Added/Removed functions"] > 0] + return df + +def get_data_from_json(result_file): + with open(result_file) as f: + d = json.load(f) + df=pandas.json_normalize(d['seq_summary']) + return df + +def create_cum_data(dataFrame, num_bins, relColumns): + min = dataFrame[relColumns].min().min() + max = dataFrame[relColumns].max().max() + bins = np.linspace(min,max,num=num_bins+1) + data = [] + base = [] + for c in relColumns: + valuesc, basec = np.histogram(dataFrame.loc[:,c], bins=bins) + base = basec + cum = np.cumsum(valuesc, dtype=np.float) + cum[cum==0] = np.nan + data = data + [cum] + return data, base[:-1] + +def cummulative_distr_plot(data_sets, base, outfile, figsize=None, title=None, logscale=False): + if figsize: + plt.figure(figsize=figsize) + else: + plt.figure() + for d in data_sets: + plt.plot(d["values"], base, label=d["label"]) + plt.xlabel('Number of Commits') + if logscale: + plt.ylabel('Runtime in s ($log_{2}$ scale)') + plt.yscale('log', base=2) + plt.gca().yaxis.set_major_formatter(ScalarFormatter()) + plt.xlim(left=0) + plt.ylim(bottom=95) + #plt.yticks(np.arange(100,1500,100)) + else: + plt.ylabel('Runtime in s') + plt.tight_layout() + plt.legend() + plt.title(title) + plt.savefig(outfile) + +def hist_plot(data, step, title, xlabel, ylabel, outfile, size, xlim_left=None, xlim_right=None, cutoffs=None): + min = data.min() + max = data.max() + min = min//step + max = max//step + 1 + bins = np.arange(min*step,(max+1)*step,step) + + if cutoffs: + plt.figure() + bax = brokenaxes.brokenaxes(ylims=cutoffs, hspace=0.05, left = 0.18, bottom = 0.16) + bax.hist(data, bins, histtype='bar') + plt.xlabel(xlabel, labelpad=0) + plt.ylabel(ylabel, labelpad=0) + if title: plt.title(title) + plt.savefig(outfile, bbox_inches='tight') + else: + fig = plt.figure() + width, height = size + fig.set_size_inches(w=width, h=height) + plt.hist(data, bins) + if xlim_left: + plt.xlim(left=xlim_left, right=xlim_right) + else: + plt.xlim(right=xlim_right) + if xlabel: plt.xlabel(xlabel) + if ylabel: plt.ylabel(ylabel) + if title: plt.title(title) + plt.tight_layout(pad=0.4) + plt.savefig(outfile) + +def hist_subplots(ax, data, step): + min = data.min() + max = data.max() + min = min//step + max = max//step + 1 + bins = np.arange(min*step,(max+1)*step,step) + ax.hist(data, bins) + +def four_hist_subplots(data, title, xlabel, ylabel, outfile): + step = 0.01 + fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2,tight_layout=True) + for i, ax in enumerate([ax1,ax2,ax3,ax4]): + hist_subplots(ax, data, step) + ax.title.set_text(title[i]) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.tight_layout() + fig.savefig(outfile) + +def scatter_plot(data, xlabel, ylabel, outfile, size): + fig = plt.figure() + width, height = size + fig.set_size_inches(w=width, h=height) + colors=['red','azure','blue','brown','chartreuse','chocolate','darkblue','darkgreen','seagreen','green','indigo','orangered','orange','coral','olive','mediumseagreen','grey','teal'] + markers = ['x','+','o','s','p','*','D','d','v','^','<','>','1','2','3','4','H','P'] + linestyles = ['dashed'] + for i, (x, y) in enumerate(data): + plt.plot(x,y, marker='x', linewidth=0.4, markersize=1, alpha=0.85, color=colors[i % len(colors)], linestyle=linestyles[i % len(linestyles)]) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.ylim(bottom=-0.005, top=0.19) + plt.tight_layout(pad=0.4) + plt.savefig(outfile) diff --git a/scripts/incremental/build/build_compdb_zstd.sh b/scripts/incremental/build/build_compdb_zstd.sh new file mode 100755 index 000000000..baedce33c --- /dev/null +++ b/scripts/incremental/build/build_compdb_zstd.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +sed -i 's/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT).*/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -DZSTD_NO_INTRINSICS/' programs/Makefile +LC_ALL=C.UTF-8 compiledb make -j 1 zstd From 73cebe1c873f9667f749b8e15a0fea09f1e89716 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 11:33:11 +0200 Subject: [PATCH 02/84] Change incremental benchmarking scripts for running with sqlite --- .../incremental/benchmarking/efficiency.py | 36 ++++++++++++------- scripts/incremental/benchmarking/utils.py | 10 ++++-- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index c6abb95a0..4023c84eb 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -23,13 +23,13 @@ exit() result_dir = os.path.join(os.getcwd(), 'result_efficiency') maxCLOC = 50 # can be deactivated with None -url = "https://github.com/facebook/zstd" -repo_name = "zstd" -build_compdb = "build_compdb_zstd.sh" -conf_base = "zstd-race-baseline" # very minimal: "zstd-minimal" -conf_incrpost = "zstd-race-incrpostsolver" +url = "https://github.com/sqlite/sqlite" +repo_name = "sqlite" +build_compdb = "../build/build_compdb_sqlite.sh" +conf_base = "large-program" # very minimal: "zstd-minimal" +conf_incrpost = "large-program" #TODO: Use incremental postprocessing begin = datetime(2021,8,1) -to = datetime(2022,2,1) # minimal subset: datetime(2021,8,4) +to = datetime(2021,8,10) # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"] analyzer_dir = sys.argv[1] only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables @@ -93,26 +93,36 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): #print('Starting from parent', str(parent.hash), ".") outparent = os.path.join(outtry, 'parent') os.makedirs(outparent) - add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options) + + def append_to_repo_path(file): + return os.path.join(repo_path, file) + + sqlite_files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c'] + sqlite_files = list(map(append_to_repo_path, sqlite_files)) + + pseudo_repo_path = "" + + options = sqlite_files + ['-v', '--set', 'pre.cppflags[+]', '-DSQLITE_DEBUG', '--disable', 'ana.base.context.non-ptr', '--disable', 'ana.int.def_exc', '--disable', 'sem.unknown_function.spawn', '--set', 'ana.thread.domain', 'plain', '--enable', 'exp.earlyglobs', '--set', 'ana.base.privatization', 'none', '--set', 'pre.cppflags[+]', '-DGOBLINT_NO_BSEARCH', '--set', 'pre.cppflags[+]', '-DGOBLINT_NO_ASSERT', '--set', 'result', 'json-messages', '--set', 'ana.activated', '[\"base\",\"mallocWrapper\"]', '--set', 'ana.ctx_insens[+]', 'base', '--set', 'ana.ctx_insens[+]', 'mallocWrapper'] + add_options = options + ['--disable', 'incremental.load', '--enable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, parent.hash, outparent, conf_base, add_options) #print('And now analyze', str(commit.hash), 'incrementally.') outchild = os.path.join(outtry, 'child') os.makedirs(outchild) - add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options) + add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchild, conf_base, add_options) #print('And again incremental, this time with incremental postsolver') outchildincrpost = os.path.join(outtry, 'child-incr-post') os.makedirs(outchildincrpost) - add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options) + add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options) #print('And again incremental, this time with incremental postsolver and reluctant') outchildrel = os.path.join(outtry, 'child-rel') os.makedirs(outchildrel) add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options) + utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options) count_analyzed+=1 failed = False diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index c45bf36ae..1ba99411c 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -54,12 +54,18 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, file.write(o + " ") file.close() - prepare_command = ['sh', os.path.join(analyzer_dir, 'scripts', build_compdb)] + script_path = os.path.abspath(os.path.dirname(__file__)) + + prepare_command = ['sh', os.path.join(script_path, build_compdb)] with open(os.path.join(outdir, preparelog), "w+") as outfile: subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT) outfile.close() - analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *extra_options, repo_path] + analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *extra_options] + + if (repo_path != ""): + analyze_command.append(repo_path) + with open(os.path.join(outdir, analyzerlog), "w+") as outfile: subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) outfile.close() From c9e5566c33e5885efb906fb36396eedd0477487c Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 12:41:08 +0200 Subject: [PATCH 03/84] Update path for conf --- scripts/incremental/benchmarking/efficiency.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 4023c84eb..8ac91072f 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -26,8 +26,8 @@ url = "https://github.com/sqlite/sqlite" repo_name = "sqlite" build_compdb = "../build/build_compdb_sqlite.sh" -conf_base = "large-program" # very minimal: "zstd-minimal" -conf_incrpost = "large-program" #TODO: Use incremental postprocessing +conf_base = os.path.join("examples", "large-program") # very minimal: "zstd-minimal" +conf_incrpost = os.path.join("examples", "large-program") #TODO: Use incremental postprocessing begin = datetime(2021,8,1) to = datetime(2021,8,10) # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"] From e1388676014a19581ed710b276ff738bccec5337 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 12:41:31 +0200 Subject: [PATCH 04/84] Add build_compdb_sqlite.sh --- scripts/incremental/build/build_compdb_sqlite.sh | 4 ++++ 1 file changed, 4 insertions(+) create mode 100755 scripts/incremental/build/build_compdb_sqlite.sh diff --git a/scripts/incremental/build/build_compdb_sqlite.sh b/scripts/incremental/build/build_compdb_sqlite.sh new file mode 100755 index 000000000..65ef7ce04 --- /dev/null +++ b/scripts/incremental/build/build_compdb_sqlite.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +# sed -i 's/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT).*/zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -DZSTD_NO_INTRINSICS/' programs/Makefile +sh configure +LC_ALL=C.UTF-8 compiledb make -j 1 sqlite3.lo From 7bd5d926c3f074cd0f9a6861e156432d71af7860 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 14:03:11 +0200 Subject: [PATCH 05/84] Update configuration for sqlite --- scripts/incremental/benchmarking/efficiency.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 8ac91072f..220987eef 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -26,8 +26,8 @@ url = "https://github.com/sqlite/sqlite" repo_name = "sqlite" build_compdb = "../build/build_compdb_sqlite.sh" -conf_base = os.path.join("examples", "large-program") # very minimal: "zstd-minimal" -conf_incrpost = os.path.join("examples", "large-program") #TODO: Use incremental postprocessing +conf_base = os.path.join("custom", "sqlite-minimal") # very minimal: "zstd-minimal" +conf_incrpost = os.path.join("custom", "sqlite-minimal") #TODO: Use incremental postprocessing begin = datetime(2021,8,1) to = datetime(2021,8,10) # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"] @@ -102,7 +102,7 @@ def append_to_repo_path(file): pseudo_repo_path = "" - options = sqlite_files + ['-v', '--set', 'pre.cppflags[+]', '-DSQLITE_DEBUG', '--disable', 'ana.base.context.non-ptr', '--disable', 'ana.int.def_exc', '--disable', 'sem.unknown_function.spawn', '--set', 'ana.thread.domain', 'plain', '--enable', 'exp.earlyglobs', '--set', 'ana.base.privatization', 'none', '--set', 'pre.cppflags[+]', '-DGOBLINT_NO_BSEARCH', '--set', 'pre.cppflags[+]', '-DGOBLINT_NO_ASSERT', '--set', 'result', 'json-messages', '--set', 'ana.activated', '[\"base\",\"mallocWrapper\"]', '--set', 'ana.ctx_insens[+]', 'base', '--set', 'ana.ctx_insens[+]', 'mallocWrapper'] + options = sqlite_files + ['-v'] add_options = options + ['--disable', 'incremental.load', '--enable', 'incremental.save'] utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, parent.hash, outparent, conf_base, add_options) From dffaeac626aefce01fa4da16aa7a3e61d46dc50a Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 15:54:30 +0200 Subject: [PATCH 06/84] efficiency.py: Make it configurable which project to benchmark incremental goblint on. --- .../incremental/benchmarking/efficiency.py | 70 ++++++++++++------- scripts/incremental/benchmarking/projects.py | 43 ++++++++++++ 2 files changed, 86 insertions(+), 27 deletions(-) create mode 100644 scripts/incremental/benchmarking/projects.py diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 220987eef..6c7f53292 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -11,6 +11,9 @@ import sys import pandas as pd +# Some basic settings for the different projects (currently zstd, sqlite) +import projects + ################################################################################ # Usage: python3 incremental_smallcommits.py # Executing the script will overwrite the directory 'result_efficiency' in the cwd. @@ -18,26 +21,37 @@ # config file is assumed to be found in the conf directory of the analyzers repository. # The single test runs are mapped to processors according to the coremapping. The one specified in the section below # should work for Intel machines, otherwise you might need to adapt it according to the description. -if len(sys.argv) != 3: - print("Wrong number of parameters.\nUse script like this: python3 parallel_benchmarking.py ") - exit() +usage = "Use script like this: python3 parallel_benchmarking.py " +if len(sys.argv) != 4: + print("Wrong number of parameters.\n" + usage) + exit() + +# Load some project dependent settings: +project = projects.projects.get(sys.argv[2]) +if project == None: + print("Given Project " + project + " is not one of the supported projects. Add a new project by modifying projects.py.") + exit() + +url = project.url +repo_name = project.repo_name +build_compdb = project.build_compdb +conf_base = project.conf_base +conf_incrpost = project.conf_incrpost +begin = project.begin +to = project.to +files = project.files + +# Project independent settings +diff_exclude = project.diff_exclude result_dir = os.path.join(os.getcwd(), 'result_efficiency') maxCLOC = 50 # can be deactivated with None -url = "https://github.com/sqlite/sqlite" -repo_name = "sqlite" -build_compdb = "../build/build_compdb_sqlite.sh" -conf_base = os.path.join("custom", "sqlite-minimal") # very minimal: "zstd-minimal" -conf_incrpost = os.path.join("custom", "sqlite-minimal") #TODO: Use incremental postprocessing -begin = datetime(2021,8,1) -to = datetime(2021,8,10) # minimal subset: datetime(2021,8,4) -diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"] analyzer_dir = sys.argv[1] only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables ################################################################################ try: - numcores = int(sys.argv[2]) + numcores = int(sys.argv[3]) except ValueError: - print("Parameter should be a number.\nUse script like this: python3 parallel_benchmarking.py ") + print("Parameter should be a number.\nUse script like this:" + usage) exit() avail_phys_cores = psutil.cpu_count(logical=False) allowedcores = avail_phys_cores - 1 @@ -66,6 +80,17 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): analyzed_commits = {} repo_path = os.path.join(cwd, repo_name) + options = [] + if files == [] or files == None: + # If no list of files is given for the project, we analyze the repo using compiledb. For that, we pass the repo_path to goblint. + repo_path_goblint = repo_path + else: + def append_to_repo_path(file): + return os.path.join(repo_path, file) + # A list of files is given for the project. Pass these to goblint, but not the repo_path. + repo_path_goblint = "" + options = list(map(append_to_repo_path, files)) + for commit in itertools.islice(itertools.filterfalse(filter_commits_false_pred(repo_path), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=cwd).traverse_commits()), from_c, to_c): gr = Git(repo_path) @@ -94,35 +119,26 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): outparent = os.path.join(outtry, 'parent') os.makedirs(outparent) - def append_to_repo_path(file): - return os.path.join(repo_path, file) - - sqlite_files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c'] - sqlite_files = list(map(append_to_repo_path, sqlite_files)) - - pseudo_repo_path = "" - - options = sqlite_files + ['-v'] add_options = options + ['--disable', 'incremental.load', '--enable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, parent.hash, outparent, conf_base, add_options) + utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, parent.hash, outparent, conf_base, add_options) #print('And now analyze', str(commit.hash), 'incrementally.') outchild = os.path.join(outtry, 'child') os.makedirs(outchild) add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchild, conf_base, add_options) + utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchild, conf_base, add_options) #print('And again incremental, this time with incremental postsolver') outchildincrpost = os.path.join(outtry, 'child-incr-post') os.makedirs(outchildincrpost) add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options) + utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options) #print('And again incremental, this time with incremental postsolver and reluctant') outchildrel = os.path.join(outtry, 'child-rel') os.makedirs(outchildrel) - add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] - utils.analyze_commit(analyzer_dir, gr, pseudo_repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options) + add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] + utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options) count_analyzed+=1 failed = False diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py new file mode 100644 index 000000000..04df7fe7b --- /dev/null +++ b/scripts/incremental/benchmarking/projects.py @@ -0,0 +1,43 @@ +import os +import datetime + +from dataclasses import dataclass + +@dataclass +class ProjectConfig: + url: str + repo_name: str + build_compdb: str + conf_base: str + conf_incrpost: str + begin: datetime.datetime + to: datetime.datetime + diff_exclude: list[str] + '''Files to analyze. If this list is not empty, the given files will be analyzed (not those in the compiledb)''' + files: list[str] + +sqlite = ProjectConfig( + url = "https://github.com/sqlite/sqlite", + repo_name = "sqlite", + build_compdb = "../build/build_compdb_sqlite.sh", + conf_base = os.path.join("custom", "sqlite-minimal"), # very minimal: "zstd-minimal" + conf_incrpost = os.path.join("custom", "sqlite-minimal"), #TODO: Use incremental postprocessing, + begin = datetime.datetime(2021,8,1), + to = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4) + diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], + files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c'] +) + +zstd = ProjectConfig( + url = "https://github.com/facebook/zstd", + repo_name = "zstd", + build_compdb = "../build/build_compdb_zstd.sh", + conf_base = os.path.join("custom", "zstd-race-baseline"), # very minimal: "zstd-minimal" + conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"), + begin = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal" + to = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4) + diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], + files = None +) + +projects = {"sqlite": sqlite, "zstd": zstd} From 764372e2191e6bc3138f340b93676048bdbc3791 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 16:48:34 +0200 Subject: [PATCH 07/84] Make precision take paramters for which project to run on --- .../incremental/benchmarking/efficiency.py | 31 ++++-------- scripts/incremental/benchmarking/precision.py | 48 ++++++++++++------- scripts/incremental/benchmarking/projects.py | 2 +- scripts/incremental/benchmarking/utils.py | 14 ++++-- 4 files changed, 53 insertions(+), 42 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 6c7f53292..f9aa42d94 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -21,7 +21,7 @@ # config file is assumed to be found in the conf directory of the analyzers repository. # The single test runs are mapped to processors according to the coremapping. The one specified in the section below # should work for Intel machines, otherwise you might need to adapt it according to the description. -usage = "Use script like this: python3 parallel_benchmarking.py " +usage = "Use script like this: python3 efficiency.py " if len(sys.argv) != 4: print("Wrong number of parameters.\n" + usage) exit() @@ -40,9 +40,9 @@ begin = project.begin to = project.to files = project.files +diff_exclude = project.diff_exclude # Project independent settings -diff_exclude = project.diff_exclude result_dir = os.path.join(os.getcwd(), 'result_efficiency') maxCLOC = 50 # can be deactivated with None analyzer_dir = sys.argv[1] @@ -80,17 +80,6 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): analyzed_commits = {} repo_path = os.path.join(cwd, repo_name) - options = [] - if files == [] or files == None: - # If no list of files is given for the project, we analyze the repo using compiledb. For that, we pass the repo_path to goblint. - repo_path_goblint = repo_path - else: - def append_to_repo_path(file): - return os.path.join(repo_path, file) - # A list of files is given for the project. Pass these to goblint, but not the repo_path. - repo_path_goblint = "" - options = list(map(append_to_repo_path, files)) - for commit in itertools.islice(itertools.filterfalse(filter_commits_false_pred(repo_path), Repository(url, since=begin, to=to, only_no_merge=True, clone_repo_to=cwd).traverse_commits()), from_c, to_c): gr = Git(repo_path) @@ -119,26 +108,26 @@ def append_to_repo_path(file): outparent = os.path.join(outtry, 'parent') os.makedirs(outparent) - add_options = options + ['--disable', 'incremental.load', '--enable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, parent.hash, outparent, conf_base, add_options) + add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options, files) #print('And now analyze', str(commit.hash), 'incrementally.') outchild = os.path.join(outtry, 'child') os.makedirs(outchild) - add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchild, conf_base, add_options) + add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options, files) #print('And again incremental, this time with incremental postsolver') outchildincrpost = os.path.join(outtry, 'child-incr-post') os.makedirs(outchildincrpost) - add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options) + add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options, files) #print('And again incremental, this time with incremental postsolver and reluctant') outchildrel = os.path.join(outtry, 'child-rel') os.makedirs(outchildrel) - add_options = options + ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] - utils.analyze_commit(analyzer_dir, gr, repo_path_goblint, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options) + add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options, files) count_analyzed+=1 failed = False diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index a9c848114..510731026 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -9,30 +9,44 @@ import pytz import multiprocessing as mp +# Some basic settings for the different projects (currently zstd, sqlite) +import projects ################################################################################ # Usage: python3 incremental_smallcommits.py # Executing the script will overwrite the directory 'result_precision' in the cwd. # The script for building the compilation database is assumed to be found in the analyzers script directory and the # config file is assumed to be found in the conf directory of the analyzers repository. -if len(sys.argv) != 3: - print("Wrong number of parameters.\nUse script like this: python3 parallel_benchmarking.py ") +usage = "Use script like this: python3 precision.py " +if len(sys.argv) != 4: + print("Wrong number of parameters.\n" + usage) exit() -res_dir = os.path.abspath('result_precision') -maxCLOC = None -url = "https://github.com/facebook/zstd" -repo_name = "zstd" -build_compdb = "build_compdb_zstd.sh" -conf = "zstd-race-incrpostsolver" -begin = datetime(2021,8,1) -to = datetime(2022,2,1) -diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"] -analyzer_dir = sys.argv[1] + +# Load some project dependent settings: +project = projects.projects.get(sys.argv[2]) +if project == None: + print("Given Project " + project + " is not one of the supported projects. Add a new project by modifying projects.py.") + exit() + +url = project.url +repo_name = project.repo_name +build_compdb = project.build_compdb +conf = project.conf_base +begin = project.begin +to = project.to +diff_exclude = project.diff_exclude +files = project.files + try: - numcores = int(sys.argv[2]) + numcores = int(sys.argv[3]) except ValueError: - print("Parameter should be a number.\nUse script like this: python3 parallel_benchmarking.py ") + print("Parameter should be a number.\n" + usage) exit() + +# Project independent settings +analyzer_dir = sys.argv[1] +res_dir = os.path.abspath('result_precision') +maxCLOC = None only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables ################################################################################ @@ -127,7 +141,7 @@ def analyze_series_in_repo(series): try: # print('Analyze ', str(commit.hash), ' as initial commit.') add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_commit, conf, add_options) + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_commit, conf, add_options, files) prev_commit = commit.hash except utils.subprocess.CalledProcessError as e: print('Aborted initial because command ', e.cmd, 'failed.') @@ -148,7 +162,7 @@ def analyze_series_in_repo(series): os.makedirs(out_nonincr) file_original_run = os.path.join(out_nonincr, "compare-data-nonincr") add_options = ['--enable', 'incremental.only-rename', '--set', 'save_run', file_original_run] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_nonincr, conf, add_options) + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_nonincr, conf, add_options, files) # analyze commit incrementally based on the previous commit and save run for comparison # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') @@ -156,7 +170,7 @@ def analyze_series_in_repo(series): os.makedirs(out_incr) file_incremental_run = os.path.join(out_incr, "compare-data-incr") add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options) + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) if commit_num in compare_commits or commit_num == len(series) - 1: # compare stored data of original and incremental run diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index 04df7fe7b..732bd54a7 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -35,7 +35,7 @@ class ProjectConfig: conf_base = os.path.join("custom", "zstd-race-baseline"), # very minimal: "zstd-minimal" conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"), begin = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal" - to = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4) + to = datetime.datetime(2021,10,10), # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], files = None ) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 1ba99411c..b0c310fa8 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -40,7 +40,8 @@ def reset_incremental_data(incr_data_dir): if os.path.exists(incr_data_dir) and os.path.isdir(incr_data_dir): shutil.rmtree(incr_data_dir) -def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, outdir, conf, extra_options): +def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, outdir, conf, extra_options, files): + gr.checkout(commit_hash) conf_path = os.path.join(analyzer_dir, 'conf', conf + '.json') @@ -61,9 +62,16 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT) outfile.close() - analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *extra_options] + files = [] + if files != [] and files != None: + def append_to_repo_path(file): + return os.path.join(repo_path, file) + files = list(map(append_to_repo_path, files)) + + analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *files, *extra_options] - if (repo_path != ""): + # If the list of files was empty, we pass the repo_path to goblint + if not files: analyze_command.append(repo_path) with open(os.path.join(outdir, analyzerlog), "w+") as outfile: From c773156fb22092c8c1f7cadaa25dc5b6c2f5d1e0 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 17:00:02 +0200 Subject: [PATCH 08/84] Do not use dataclasses for compatibility with python 3.6 --- scripts/incremental/benchmarking/projects.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index 732bd54a7..1dd5030d7 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -1,9 +1,6 @@ import os import datetime -from dataclasses import dataclass - -@dataclass class ProjectConfig: url: str repo_name: str @@ -16,6 +13,17 @@ class ProjectConfig: '''Files to analyze. If this list is not empty, the given files will be analyzed (not those in the compiledb)''' files: list[str] + def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin, to, diff_exclude, files): + self.url = url + self.repo_name = repo_name + self.build_compdb = build_compdb + self.conf_base = conf_base + self.conf_incrpost = conf_incrpost + self.begin = begin + self.to = to + self.diff_exclude = diff_exclude + self.files = files + sqlite = ProjectConfig( url = "https://github.com/sqlite/sqlite", repo_name = "sqlite", From 7009d93abfd7042a1975984fd51d4ce1dae5e16c Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 17:00:31 +0200 Subject: [PATCH 09/84] Improve error message --- scripts/incremental/benchmarking/efficiency.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index f9aa42d94..7c63aa839 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -29,7 +29,7 @@ # Load some project dependent settings: project = projects.projects.get(sys.argv[2]) if project == None: - print("Given Project " + project + " is not one of the supported projects. Add a new project by modifying projects.py.") + print("Given Project \"" + sys.argv[2] + "\" is not one of the supported projects. Add a new project by modifying projects.py.\n" + usage) exit() url = project.url From 11e92bfd480d8c77571382fda7c836f3cda533a6 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 17:03:01 +0200 Subject: [PATCH 10/84] Change annotation of type for compatibility with Python 3.6 --- scripts/incremental/benchmarking/projects.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index 1dd5030d7..503b63f3d 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -9,9 +9,9 @@ class ProjectConfig: conf_incrpost: str begin: datetime.datetime to: datetime.datetime - diff_exclude: list[str] + diff_exclude: list # list[str] '''Files to analyze. If this list is not empty, the given files will be analyzed (not those in the compiledb)''' - files: list[str] + files: list # list[str] def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin, to, diff_exclude, files): self.url = url From 2b7eafd4e504d5fd4a8e43253f00c476ab9b2469 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 17:04:32 +0200 Subject: [PATCH 11/84] Add configuration for incremental postsolver to sqlite benchmarking config. --- scripts/incremental/benchmarking/projects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index 503b63f3d..d2efffefe 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -29,7 +29,7 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin repo_name = "sqlite", build_compdb = "../build/build_compdb_sqlite.sh", conf_base = os.path.join("custom", "sqlite-minimal"), # very minimal: "zstd-minimal" - conf_incrpost = os.path.join("custom", "sqlite-minimal"), #TODO: Use incremental postprocessing, + conf_incrpost = os.path.join("custom", "sqlite-minimal-incrpostsolver"), begin = datetime.datetime(2021,8,1), to = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], From 150075221b64ccd4116e9b349e5f45297a60bb53 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 6 Oct 2022 17:54:28 +0200 Subject: [PATCH 12/84] Utils.analyze_commit: Fix handling of case that file list is passed. --- scripts/incremental/benchmarking/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index b0c310fa8..c53b30b4a 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -62,13 +62,13 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT) outfile.close() - files = [] - if files != [] and files != None: + file_list = [] + if files: def append_to_repo_path(file): return os.path.join(repo_path, file) - files = list(map(append_to_repo_path, files)) + file_list = list(map(append_to_repo_path, files)) - analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *files, *extra_options] + analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *file_list, *extra_options] # If the list of files was empty, we pass the repo_path to goblint if not files: From 79c940f8f39e02e249687e890d0ccc71bef017a1 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 7 Oct 2022 09:50:13 +0200 Subject: [PATCH 13/84] Print analyze command and commit hash in config.out created by incremental benchmarking script. --- scripts/incremental/benchmarking/utils.py | 36 +++++++++++++++-------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index c53b30b4a..539abc301 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -45,7 +45,19 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, gr.checkout(commit_hash) conf_path = os.path.join(analyzer_dir, 'conf', conf + '.json') - # print configuration + # Creat the analyze command + file_list = [] + if files: + def append_to_repo_path(file): + return os.path.join(repo_path, file) + file_list = list(map(append_to_repo_path, files)) + + analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *file_list, *extra_options] + # If the list of files was empty, we pass the repo_path to goblint + if not files: + analyze_command.append(repo_path) + + # print configuration and analyze command with open(outdir+'/config.out', "a+") as file: with open(conf_path, "r") as c: file.write("config: " + c.read()) @@ -53,27 +65,25 @@ def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, file.write("added options:\n") for o in extra_options: file.write(o + " ") + file.write("\n\n") + + file.write("analyze command:\n") + for c in analyze_command: + file.write(c + " ") + file.write("\n\n") + + file.write("Commit hash:\n" + commit_hash + "\n") file.close() script_path = os.path.abspath(os.path.dirname(__file__)) + # Prepare the repo prepare_command = ['sh', os.path.join(script_path, build_compdb)] with open(os.path.join(outdir, preparelog), "w+") as outfile: subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT) outfile.close() - file_list = [] - if files: - def append_to_repo_path(file): - return os.path.join(repo_path, file) - file_list = list(map(append_to_repo_path, files)) - - analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *file_list, *extra_options] - - # If the list of files was empty, we pass the repo_path to goblint - if not files: - analyze_command.append(repo_path) - + # Run the analysis with open(os.path.join(outdir, analyzerlog), "w+") as outfile: subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) outfile.close() From a248b6bd2f29c521c1037321befdf25ffbd3ed99 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 7 Oct 2022 10:01:41 +0200 Subject: [PATCH 14/84] Add -v option to analyze commands in efficiency script. --- scripts/incremental/benchmarking/efficiency.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 7c63aa839..8c18e92c6 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -108,25 +108,27 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): outparent = os.path.join(outtry, 'parent') os.makedirs(outparent) - add_options = ['--disable', 'incremental.load', '--enable', 'incremental.save'] + default_options = ['-v'] + + add_options = default_options + ['--disable', 'incremental.load', '--enable', 'incremental.save'] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options, files) #print('And now analyze', str(commit.hash), 'incrementally.') outchild = os.path.join(outtry, 'child') os.makedirs(outchild) - add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save'] + add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options, files) #print('And again incremental, this time with incremental postsolver') outchildincrpost = os.path.join(outtry, 'child-incr-post') os.makedirs(outchildincrpost) - add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save'] + add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options, files) #print('And again incremental, this time with incremental postsolver and reluctant') outchildrel = os.path.join(outtry, 'child-rel') os.makedirs(outchildrel) - add_options = ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] + add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options, files) count_analyzed+=1 From 6d1a67d8356192355ead24ec3d2c47cc7d89be77 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Mon, 28 Nov 2022 22:06:15 +0100 Subject: [PATCH 15/84] Add figlet as project. Not completely working yet, as conflicting flags are collected in the compile_commands file for figlet. --- scripts/incremental/benchmarking/projects.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index d2efffefe..9c79dd5af 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -40,7 +40,7 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin url = "https://github.com/facebook/zstd", repo_name = "zstd", build_compdb = "../build/build_compdb_zstd.sh", - conf_base = os.path.join("custom", "zstd-race-baseline"), # very minimal: "zstd-minimal" + conf_base = os.path.join("custom", "zstd-race"), # very minimal: "zstd-minimal" conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"), begin = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal" to = datetime.datetime(2021,10,10), # minimal subset: datetime(2021,8,4) @@ -48,4 +48,16 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin files = None ) -projects = {"sqlite": sqlite, "zstd": zstd} +figlet = ProjectConfig( + url = "https://github.com/cmatsuoka/figlet", + repo_name = "figlet", + build_compdb = "../build/build_compdb_figlet.sh", + conf_base = os.path.join("custom", "figlet"), + conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"), + begin = datetime.datetime(2010,1,1), + to = datetime.datetime(2022,10,10), + diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], + files = None +) + +projects = {"sqlite": sqlite, "zstd": zstd, "figlet": figlet} From 595de4cd7b28dbc01b0c0a300e3eedaf12ee5b02 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 29 Nov 2022 14:23:05 +0100 Subject: [PATCH 16/84] Adapt regular expression to extract runtime from output. --- scripts/incremental/benchmarking/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 539abc301..f8d2908f4 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -122,7 +122,7 @@ def find_line(pattern, log): return None def extract_from_analyzer_log(log): - runtime_pattern = 'TOTAL[ ]+(?P[0-9\.]+) s' + runtime_pattern = 'Default[ ]+(?P[0-9\.]+)s' change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*); added = (?P[0-9]*); removed = (?P[0-9]*) }' r = find_line(runtime_pattern, log) ch = find_line(change_info_pattern, log) or {"unchanged": 0, "changed": 0, "added": 0, "removed": 0} From 8e5888f57919afadfd01c551fc152f546142b044 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 29 Nov 2022 14:24:32 +0100 Subject: [PATCH 17/84] incremental benchmarking: Use Makefile instead of compiledb for figlet. --- scripts/incremental/benchmarking/projects.py | 6 +++--- scripts/incremental/benchmarking/utils.py | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index 9c79dd5af..3cc8b180a 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -51,13 +51,13 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin figlet = ProjectConfig( url = "https://github.com/cmatsuoka/figlet", repo_name = "figlet", - build_compdb = "../build/build_compdb_figlet.sh", + build_compdb = None, conf_base = os.path.join("custom", "figlet"), conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"), begin = datetime.datetime(2010,1,1), to = datetime.datetime(2022,10,10), - diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], - files = None + diff_exclude = [], + files = ['Makefile'] ) projects = {"sqlite": sqlite, "zstd": zstd, "figlet": figlet} diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index f8d2908f4..a9d320d34 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -78,10 +78,11 @@ def append_to_repo_path(file): script_path = os.path.abspath(os.path.dirname(__file__)) # Prepare the repo - prepare_command = ['sh', os.path.join(script_path, build_compdb)] - with open(os.path.join(outdir, preparelog), "w+") as outfile: - subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT) - outfile.close() + if build_compdb != None: + prepare_command = ['sh', os.path.join(script_path, build_compdb)] + with open(os.path.join(outdir, preparelog), "w+") as outfile: + subprocess.run(prepare_command, cwd = gr.path, check=True, stdout=outfile, stderr=subprocess.STDOUT) + outfile.close() # Run the analysis with open(os.path.join(outdir, analyzerlog), "w+") as outfile: From 8bcc304225e13ebc0f77f8d833ba9338f5e4427d Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 29 Nov 2022 18:31:20 +0100 Subject: [PATCH 18/84] Add biuld_compdb_figlet.sh --- scripts/incremental/build/build_compdb_figlet.sh | 2 ++ 1 file changed, 2 insertions(+) create mode 100755 scripts/incremental/build/build_compdb_figlet.sh diff --git a/scripts/incremental/build/build_compdb_figlet.sh b/scripts/incremental/build/build_compdb_figlet.sh new file mode 100755 index 000000000..7dc7672cc --- /dev/null +++ b/scripts/incremental/build/build_compdb_figlet.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +# do nothing From 97ea54ef65b4ae4b8ec3ed4f406cc15796ca246b Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 29 Nov 2022 18:52:36 +0100 Subject: [PATCH 19/84] Add preliminary setup for chrony incremental benchmarks. Make does not succeed yet, so this requires further adaption. --- scripts/incremental/benchmarking/projects.py | 14 +++++++++++++- scripts/incremental/build/build_compdb_chrony.sh | 6 ++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100755 scripts/incremental/build/build_compdb_chrony.sh diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index 3cc8b180a..448260fa1 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -60,4 +60,16 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin files = ['Makefile'] ) -projects = {"sqlite": sqlite, "zstd": zstd, "figlet": figlet} +chrony = ProjectConfig( + url = "https://git.tuxfamily.org/chrony/chrony.git", + repo_name = "chrony", + build_compdb = "../build/build_compdb_chrony.sh", + conf_base = os.path.join("custom", "figlet"), + conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"), + begin = datetime.datetime(2010,1,1), + to = datetime.datetime(2022,10,10), + diff_exclude = [], + files = ['Makefile'] +) + +projects = {"sqlite": sqlite, "chrony": chrony, "figlet": figlet, "zstd": zstd} diff --git a/scripts/incremental/build/build_compdb_chrony.sh b/scripts/incremental/build/build_compdb_chrony.sh new file mode 100755 index 000000000..9edaf9ab8 --- /dev/null +++ b/scripts/incremental/build/build_compdb_chrony.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +# git clean -fdx +# ./configure +# make -j 1 chronyd | tee build.log +# compiledb --parse build.log +./configure && bear -- make chronyd From 587a2dd47f2291ebfc52a7d1d17168be856f67df Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 29 Nov 2022 18:56:51 +0100 Subject: [PATCH 20/84] Reset time range of considered commits for zstd. Resets the range of considered commits to be the same as originally in analyzer/#778. --- scripts/incremental/benchmarking/projects.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index 3cc8b180a..48953eeaf 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -43,7 +43,7 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin conf_base = os.path.join("custom", "zstd-race"), # very minimal: "zstd-minimal" conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"), begin = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal" - to = datetime.datetime(2021,10,10), # minimal subset: datetime(2021,8,4) + to = datetime.datetime(2022,2,1), # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], files = None ) From 0345ce7123ce5aa890b60280721ff99dc275343b Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 29 Nov 2022 22:26:02 +0100 Subject: [PATCH 21/84] Update build script for chrony. --- scripts/incremental/build/build_compdb_chrony.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/incremental/build/build_compdb_chrony.sh b/scripts/incremental/build/build_compdb_chrony.sh index 9edaf9ab8..0ba4a563c 100755 --- a/scripts/incremental/build/build_compdb_chrony.sh +++ b/scripts/incremental/build/build_compdb_chrony.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# git clean -fdx -# ./configure -# make -j 1 chronyd | tee build.log -# compiledb --parse build.log -./configure && bear -- make chronyd +git clean -fdx +./configure +make -j 1 chronyd | tee build.log +compiledb --parse build.log +# ./configure && bear -- make chronyd From c943f9f8e65f7bfa08187e2512507787ac40fc69 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 29 Nov 2022 22:28:06 +0100 Subject: [PATCH 22/84] Fix project configuration for chrony to not provide files (but use compiledb). --- scripts/incremental/benchmarking/projects.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index 448260fa1..d2e80b8ee 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -61,15 +61,15 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin ) chrony = ProjectConfig( - url = "https://git.tuxfamily.org/chrony/chrony.git", - repo_name = "chrony", - build_compdb = "../build/build_compdb_chrony.sh", - conf_base = os.path.join("custom", "figlet"), - conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"), - begin = datetime.datetime(2010,1,1), - to = datetime.datetime(2022,10,10), - diff_exclude = [], - files = ['Makefile'] + url="https://git.tuxfamily.org/chrony/chrony.git", + repo_name="chrony", + build_compdb="../build/build_compdb_chrony.sh", + conf_base=os.path.join("custom", "figlet"), + conf_incrpost=os.path.join("custom", "figlet-incrpostsolver"), + begin=datetime.datetime(2020, 1, 1), + to=datetime.datetime(2022, 10, 10), + diff_exclude=[], + files=None ) projects = {"sqlite": sqlite, "chrony": chrony, "figlet": figlet, "zstd": zstd} From 2639efb0e808b9a2417d291e8a7c1f0ad209d668 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Wed, 30 Nov 2022 11:27:51 +0100 Subject: [PATCH 23/84] Use github mirror for chrony.. The repo at tuxfamily does not allow for multiple connections/parallel clones. --- scripts/incremental/benchmarking/projects.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index ac0495adf..fdcf082d6 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -61,7 +61,9 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin ) chrony = ProjectConfig( - url="https://git.tuxfamily.org/chrony/chrony.git", + # Official repo is at https://git.tuxfamily.org/chrony/chrony.git, + # but does not allow multiple parallel clones. So use mirror on GitHub. + url="https://github.com/mlichvar/chrony.git", repo_name="chrony", build_compdb="../build/build_compdb_chrony.sh", conf_base=os.path.join("custom", "figlet"), From 1eb1a8288b50aaef20aeab7bfc891f62451abfdb Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 9 Dec 2022 11:05:52 +0100 Subject: [PATCH 24/84] Plot.py: Create cummulative graphs again, combining results from 3 configs in one graph. Comments out the creation using "paper_efficiency_graphs" and "paper_precision_graph". . --- scripts/incremental/benchmarking/plot.py | 30 ++++++++++++++--------- scripts/incremental/benchmarking/utils.py | 4 ++- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 4720fd56a..d062314d0 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -2,32 +2,35 @@ import os import shutil -def cummulative_distr_compare2(outdir, result_csv_filename): +def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): num_bins = 2000 outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf" outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf" - df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) + df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) - data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child]) + data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"} dataincr = {"values": data[1], "label": "Incremental analysis of commit"} - utils.cummulative_distr_plot([datanonincr, dataincr], base, outfile_nonincr_vs_incr) + datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"} + + utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) dataincr = {"values": data[0], "label": "Incremental analysis of commit"} datarelincr = {"values": data[1], "label": "Reluctant incremental analysis of commit"} - utils.cummulative_distr_plot([dataincr, datarelincr], base, outfile_incr_vs_incrrel, logscale=True) -def cummulative_distr_all3(outdir, result_csv_filename): + utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True) + +def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir): num_bins = 2000 outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf" - df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) + df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"} dataincr = {"values": data[1], "label": "Incremental analysis of commit"} datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"} - utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) + utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) @@ -135,10 +138,13 @@ def paper_precision_graph(results_precision, filename, outdir): shutil.rmtree(outdir) os.mkdir(outdir) filename = "total_results.csv" -paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) +# paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) + +cummulative_distr_compare2(results_efficiency, filename, outdir) +cummulative_distr_all3(results_efficiency, filename, outdir) # precision plot -results_precision = "result_precision" -filename = "results.json" -paper_precision_graph(results_precision, filename, outdir) +# results_precision = "result_precision" +# filename = "results.json" +# paper_precision_graph(results_precision, filename, outdir) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index a9d320d34..c417fe1ef 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -184,7 +184,7 @@ def create_cum_data(dataFrame, num_bins, relColumns): data = data + [cum] return data, base[:-1] -def cummulative_distr_plot(data_sets, base, outfile, figsize=None, title=None, logscale=False): +def cummulative_distr_plot(data_sets, base, figure_dir, outfile, figsize=None, title=None, logscale=False): if figsize: plt.figure(figsize=figsize) else: @@ -204,6 +204,8 @@ def cummulative_distr_plot(data_sets, base, outfile, figsize=None, title=None, l plt.tight_layout() plt.legend() plt.title(title) + + outfile = os.path.join(figure_dir, outfile) plt.savefig(outfile) def hist_plot(data, step, title, xlabel, ylabel, outfile, size, xlim_left=None, xlim_right=None, cutoffs=None): From 3d7620a9ab20feb2a89f16622b4249f4b4ec55be Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 9 Dec 2022 11:22:25 +0100 Subject: [PATCH 25/84] Plot.py: Iterate over folders of form result_efficiency_[project], for given projects. --- scripts/incremental/benchmarking/plot.py | 50 ++++++++++++++++-------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index d062314d0..797653d50 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -30,7 +30,7 @@ def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir): datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"} dataincr = {"values": data[1], "label": "Incremental analysis of commit"} datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"} - utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) + utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=False) def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) @@ -132,19 +132,35 @@ def paper_precision_graph(results_precision, filename, outdir): # efficiency plots -results_efficiency = "result_efficiency" -outdir = "figures" -if os.path.exists(outdir): - shutil.rmtree(outdir) -os.mkdir(outdir) -filename = "total_results.csv" - -# paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) - -cummulative_distr_compare2(results_efficiency, filename, outdir) -cummulative_distr_all3(results_efficiency, filename, outdir) - -# precision plot -# results_precision = "result_precision" -# filename = "results.json" -# paper_precision_graph(results_precision, filename, outdir) + +def main(): + projects = ["figlet", "chrony", "zstd"] + results_efficiency = "result_efficiency_" + + for project in projects: + project_efficiency_results = results_efficiency + project + + if not os.path.exists(project_efficiency_results): + print("Results for project " + project + " do not exist. Skipping.") + continue + else: + print("Creating plots for project " + project + ".") + + outdir = os.path.join("figures", project) + if os.path.exists(outdir): + shutil.rmtree(outdir) + os.makedirs(outdir) + filename = "total_results.csv" + + cummulative_distr_compare2(project_efficiency_results, filename, outdir) + cummulative_distr_all3(project_efficiency_results, filename, outdir) + + # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) + + + # precision plot + # results_precision = "result_precision" + # filename = "results.json" + # paper_precision_graph(results_precision, filename, outdir) + +main() \ No newline at end of file From 33b687fbdb4809fc2a055e02b39230f8544713c6 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 9 Dec 2022 11:26:45 +0100 Subject: [PATCH 26/84] Change cummulative_distr_compare2 to again only create comparisons between 2 configs. --- scripts/incremental/benchmarking/plot.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 797653d50..0b905a0bd 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -11,9 +11,8 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"} dataincr = {"values": data[1], "label": "Incremental analysis of commit"} - datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"} - utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr) + utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) dataincr = {"values": data[0], "label": "Incremental analysis of commit"} @@ -157,7 +156,6 @@ def main(): # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) - # precision plot # results_precision = "result_precision" # filename = "results.json" From 1d1fc2a1db7c947d7db1b1eaca5c71d570d06449 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 9 Dec 2022 12:15:58 +0100 Subject: [PATCH 27/84] Logarithmic plots: choose y-min depending on minimum of plotted data (rather than a constant). --- scripts/incremental/benchmarking/utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index c417fe1ef..0db9dd001 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -1,4 +1,6 @@ import os +import sys +import math import shutil from pathlib import Path import subprocess @@ -184,12 +186,21 @@ def create_cum_data(dataFrame, num_bins, relColumns): data = data + [cum] return data, base[:-1] +def largest_power_of_two_smaller(x): + p = math.floor(math.log2(x)) - 1 + p = max(1, p) + 2 ** p + def cummulative_distr_plot(data_sets, base, figure_dir, outfile, figsize=None, title=None, logscale=False): if figsize: plt.figure(figsize=figsize) else: plt.figure() + min = sys.maxsize for d in data_sets: + min_d = d["values"].min() + if min_d < min: + min = min_d plt.plot(d["values"], base, label=d["label"]) plt.xlabel('Number of Commits') if logscale: @@ -197,7 +208,7 @@ def cummulative_distr_plot(data_sets, base, figure_dir, outfile, figsize=None, t plt.yscale('log', base=2) plt.gca().yaxis.set_major_formatter(ScalarFormatter()) plt.xlim(left=0) - plt.ylim(bottom=95) + plt.ylim(bottom=largest_power_of_two_smaller(min)) #plt.yticks(np.arange(100,1500,100)) else: plt.ylabel('Runtime in s') From 00944abc0133159b302f04ffe89c035ffb2f40ce Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 9 Dec 2022 12:16:17 +0100 Subject: [PATCH 28/84] Plot 3-way comparision again logarithmically. --- scripts/incremental/benchmarking/plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 0b905a0bd..eabc2e768 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -29,7 +29,7 @@ def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir): datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"} dataincr = {"values": data[1], "label": "Incremental analysis of commit"} datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"} - utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=False) + utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) From 527398497e3999cfc75498f1414ab39e85b8a4db Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Mon, 12 Dec 2022 10:51:17 +0100 Subject: [PATCH 29/84] Avoid artefact in cummulative graph. --- scripts/incremental/benchmarking/utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 0db9dd001..1b42d80de 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -183,6 +183,17 @@ def create_cum_data(dataFrame, num_bins, relColumns): base = basec cum = np.cumsum(valuesc, dtype=np.float) cum[cum==0] = np.nan + + # If there is a tail of values that are the same, set the ones after its first occurrence to NaN. + # In the resulting graph, this avoids the artefact that all the lines go up to the largest y-value of any line. + last = len(cum) - 1 + last_value = cum[last] + for i in range(last - 1 , 0, -1): + if cum[i] == last_value: + cum[i + 1] = np.nan + else: + break + data = data + [cum] return data, base[:-1] From 533608520bbe6ac6e5ca7f54cdd507fa4b143fa1 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Mon, 12 Dec 2022 11:22:30 +0100 Subject: [PATCH 30/84] Plot incremental analysis with incremental postsolver separately. --- scripts/incremental/benchmarking/plot.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index eabc2e768..a41d0ee67 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -2,6 +2,11 @@ import os import shutil +description_non_incr = "Non-incremental analysis" +description_incr = "Incremental analysis" +description_incr_post = "Incremental analysis with incremental postsolver" +description_incr_rel ="Reluctant incremental analysis with incremental postsolver" + def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): num_bins = 2000 outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf" @@ -9,14 +14,14 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) - datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"} - dataincr = {"values": data[1], "label": "Incremental analysis of commit"} + datanonincr = {"values": data[0], "label": description_non_incr} + dataincr = {"values": data[1], "label": description_incr} utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) - dataincr = {"values": data[0], "label": "Incremental analysis of commit"} - datarelincr = {"values": data[1], "label": "Reluctant incremental analysis of commit"} + dataincr = {"values": data[0], "label": description_incr} + datarelincr = {"values": data[1], "label": description_incr_rel} utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True) @@ -25,11 +30,12 @@ def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir): outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) - data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) - datanonincr = {"values": data[0], "label": "Non-incremental analysis of parent commit"} - dataincr = {"values": data[1], "label": "Incremental analysis of commit"} - datarelincr = {"values": data[2], "label": "Reluctant incremental analysis of commit"} - utils.cummulative_distr_plot([datanonincr, dataincr, datarelincr], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) + data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_child, utils.header_runtime_incr_posts_rel_child]) + data_non_incr = {"values": data[0], "label": description_non_incr} + data_incr = {"values": data[1], "label": description_incr} + data_incr_post = {"values": data[2], "label": description_incr_post} + data_incr_rel = {"values": data[3], "label": description_incr_rel} + utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) From 36bf6b33171ee66374b5da4fde1a7f1c01caaa7d Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Mon, 12 Dec 2022 16:07:01 +0100 Subject: [PATCH 31/84] Rename function cummulative_distr_all3 -> cummulative_distr_all4 --- scripts/incremental/benchmarking/plot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index a41d0ee67..ab53a7bd2 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -25,7 +25,7 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True) -def cummulative_distr_all3(results_dir, result_csv_filename, figure_dir): +def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): num_bins = 2000 outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) @@ -158,7 +158,7 @@ def main(): filename = "total_results.csv" cummulative_distr_compare2(project_efficiency_results, filename, outdir) - cummulative_distr_all3(project_efficiency_results, filename, outdir) + cummulative_distr_all4(project_efficiency_results, filename, outdir) # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) From 800db88a11cc2ae52bdad3714a1ce0dc6dc525e0 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Mon, 12 Dec 2022 16:21:47 +0100 Subject: [PATCH 32/84] Change output for .pdf files to .pgf. --- scripts/incremental/benchmarking/plot.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index ab53a7bd2..a50bac0ed 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -9,8 +9,8 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): num_bins = 2000 - outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf" - outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf" + outfile_nonincr_vs_incr = "figure_cum_distr_incr.pgf" + outfile_incr_vs_incrrel = "figure_cum_distr_rel.pgf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) @@ -27,7 +27,7 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): num_bins = 2000 - outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf" + outfile_nonincr_vs_incr = "figure_cum_distr_all3.pgf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_child, utils.header_runtime_incr_posts_rel_child]) @@ -42,11 +42,11 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N # plot incremental vs non-incremental diff = df.loc[:,utils.header_runtime_parent] - df.loc[:,utils.header_runtime_incr_child] - utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr) + utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pgf"), cutoffs_incr) # plot reluctant vs. basic incremental diff = df.loc[:,utils.header_runtime_incr_child] - df.loc[:,utils.header_runtime_incr_posts_rel_child] - utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel) + utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pgf"), cutoffs_rel) def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) @@ -54,11 +54,11 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N # plot incremental vs non-incremental print(df[utils.header_runtime_incr_child].astype('float')) diff = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float') - utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr) + utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pgf"), cutoffs_incr) # plot reluctant vs. basic incremental diff = 1 - df.loc[:,utils.header_runtime_incr_posts_rel_child] / df.loc[:,utils.header_runtime_incr_child] - utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel) + utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pgf"), cutoffs_rel) def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False): df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges) From 3beedd8026dbc1fae199be676003800d12b7b732 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Mon, 12 Dec 2022 16:54:11 +0100 Subject: [PATCH 33/84] Change figsize for cummulative_distr functions. --- scripts/incremental/benchmarking/plot.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index a50bac0ed..baf597b2f 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -2,10 +2,14 @@ import os import shutil -description_non_incr = "Non-incremental analysis" -description_incr = "Incremental analysis" -description_incr_post = "Incremental analysis with incremental postsolver" -description_incr_rel ="Reluctant incremental analysis with incremental postsolver" +description_non_incr = "Non-Inc" +description_incr = "Inc" +description_incr_post = "Inc-Post" +description_incr_rel ="Rel" + +# measures in inches +textwidth = 7 +figsize = (textwidth / 2.5, textwidth / 3) def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): num_bins = 2000 @@ -17,13 +21,13 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): datanonincr = {"values": data[0], "label": description_non_incr} dataincr = {"values": data[1], "label": description_incr} - utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr) + utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr, figsize = figsize) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) dataincr = {"values": data[0], "label": description_incr} datarelincr = {"values": data[1], "label": description_incr_rel} - utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True) + utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True, figsize = figsize) def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): num_bins = 2000 @@ -35,7 +39,7 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): data_incr = {"values": data[1], "label": description_incr} data_incr_post = {"values": data[2], "label": description_incr_post} data_incr_rel = {"values": data[3], "label": description_incr_rel} - utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) + utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize = figsize, logscale=True) def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) From 3c67b91b8ec43e737ccbba23b51f6789507b5c29 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Wed, 14 Dec 2022 13:50:37 +0100 Subject: [PATCH 34/84] Revert "Change figsize for cummulative_distr functions." This reverts commit 3beedd8026dbc1fae199be676003800d12b7b732. --- scripts/incremental/benchmarking/plot.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index baf597b2f..a50bac0ed 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -2,14 +2,10 @@ import os import shutil -description_non_incr = "Non-Inc" -description_incr = "Inc" -description_incr_post = "Inc-Post" -description_incr_rel ="Rel" - -# measures in inches -textwidth = 7 -figsize = (textwidth / 2.5, textwidth / 3) +description_non_incr = "Non-incremental analysis" +description_incr = "Incremental analysis" +description_incr_post = "Incremental analysis with incremental postsolver" +description_incr_rel ="Reluctant incremental analysis with incremental postsolver" def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): num_bins = 2000 @@ -21,13 +17,13 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): datanonincr = {"values": data[0], "label": description_non_incr} dataincr = {"values": data[1], "label": description_incr} - utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr, figsize = figsize) + utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) dataincr = {"values": data[0], "label": description_incr} datarelincr = {"values": data[1], "label": description_incr_rel} - utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True, figsize = figsize) + utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True) def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): num_bins = 2000 @@ -39,7 +35,7 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): data_incr = {"values": data[1], "label": description_incr} data_incr_post = {"values": data[2], "label": description_incr_post} data_incr_rel = {"values": data[3], "label": description_incr_rel} - utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize = figsize, logscale=True) + utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) From 162e7436c738df45c8f850f4c5c7863840534830 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Wed, 14 Dec 2022 13:50:56 +0100 Subject: [PATCH 35/84] Revert "Change output for .pdf files to .pgf." This reverts commit 800db88a11cc2ae52bdad3714a1ce0dc6dc525e0. --- scripts/incremental/benchmarking/plot.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index a50bac0ed..ab53a7bd2 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -9,8 +9,8 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): num_bins = 2000 - outfile_nonincr_vs_incr = "figure_cum_distr_incr.pgf" - outfile_incr_vs_incrrel = "figure_cum_distr_rel.pgf" + outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf" + outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) @@ -27,7 +27,7 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): num_bins = 2000 - outfile_nonincr_vs_incr = "figure_cum_distr_all3.pgf" + outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_child, utils.header_runtime_incr_posts_rel_child]) @@ -42,11 +42,11 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N # plot incremental vs non-incremental diff = df.loc[:,utils.header_runtime_parent] - df.loc[:,utils.header_runtime_incr_child] - utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pgf"), cutoffs_incr) + utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr) # plot reluctant vs. basic incremental diff = df.loc[:,utils.header_runtime_incr_child] - df.loc[:,utils.header_runtime_incr_posts_rel_child] - utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pgf"), cutoffs_rel) + utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel) def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) @@ -54,11 +54,11 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N # plot incremental vs non-incremental print(df[utils.header_runtime_incr_child].astype('float')) diff = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float') - utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pgf"), cutoffs_incr) + utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr) # plot reluctant vs. basic incremental diff = 1 - df.loc[:,utils.header_runtime_incr_posts_rel_child] / df.loc[:,utils.header_runtime_incr_child] - utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pgf"), cutoffs_rel) + utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel) def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False): df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges) From 57025e346c434cfdd7e94829e1c7fa6b7243b3f3 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 15 Dec 2022 11:24:21 +0100 Subject: [PATCH 36/84] Plot: Create precision plots again. --- scripts/incremental/benchmarking/plot.py | 39 +++++++++++++++--------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index ab53a7bd2..3d3a88797 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -141,30 +141,39 @@ def paper_precision_graph(results_precision, filename, outdir): def main(): projects = ["figlet", "chrony", "zstd"] results_efficiency = "result_efficiency_" + results_precision = "result_precision_" for project in projects: - project_efficiency_results = results_efficiency + project + efficiency_results = results_efficiency + project + precision_results = results_precision + project - if not os.path.exists(project_efficiency_results): + + if not (os.path.exists(efficiency_results) or os.path.exists(precision_results)): print("Results for project " + project + " do not exist. Skipping.") continue else: print("Creating plots for project " + project + ".") - outdir = os.path.join("figures", project) - if os.path.exists(outdir): - shutil.rmtree(outdir) - os.makedirs(outdir) - filename = "total_results.csv" - - cummulative_distr_compare2(project_efficiency_results, filename, outdir) - cummulative_distr_all4(project_efficiency_results, filename, outdir) - - # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) + figures_dir = os.path.join("figures", project) + if os.path.exists(figures_dir): + shutil.rmtree(figures_dir) + os.makedirs(figures_dir) + + if os.path.exists(efficiency_results): + efficieny_filename = "total_results.csv" + print("Creating efficiency plots.") + cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir) + cummulative_distr_all4(efficiency_results, efficieny_filename, figures_dir) + # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) + else: + print("No efficiency results available.") # precision plot - # results_precision = "result_precision" - # filename = "results.json" - # paper_precision_graph(results_precision, filename, outdir) + if os.path.exists(precision_results): + precision_filename = "results.json" + print("Creating precision plots.") + paper_precision_graph(precision_results, precision_filename, figures_dir) + else: + print("No precision results available.") main() \ No newline at end of file From fdb7ffb1402b8020d9922026902290e17f1722b4 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 15 Dec 2022 13:24:43 +0100 Subject: [PATCH 37/84] Remove printstats from argumeents for compare_runs. --- scripts/incremental/benchmarking/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 1b42d80de..e62b1c411 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -92,7 +92,7 @@ def append_to_repo_path(file): outfile.close() def compare_runs(analyzer_dir, dummy_c_file, outdir, conf, compare_data_1, compare_data_2): - options = ['--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), '--disable', 'printstats', '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2] + options = ['--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2] analyze_command = [os.path.join(analyzer_dir, 'goblint'), *options, dummy_c_file] with open(os.path.join(outdir, comparelog), "w+") as outfile: subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) From e946610d115956063c89977182a63fb041478efd Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 15 Dec 2022 13:33:25 +0100 Subject: [PATCH 38/84] Define analyzed branch per repository; needed for precision.py. --- scripts/incremental/benchmarking/precision.py | 3 ++- scripts/incremental/benchmarking/projects.py | 16 +++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index 510731026..df92a6106 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -36,6 +36,7 @@ to = project.to diff_exclude = project.diff_exclude files = project.files +branch = project.branch try: numcores = int(sys.argv[3]) @@ -56,7 +57,7 @@ def start_commit_for_sequence_search(): current_commit = "" - for commit in Repository(url, to=to, only_in_branch='dev', order='reverse', clone_repo_to=res_dir).traverse_commits(): + for commit in Repository(url, to=to, only_in_branch=branch, order='reverse', clone_repo_to=res_dir).traverse_commits(): current_commit = commit break gr = Git(os.path.join(res_dir, repo_name)) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index fdcf082d6..f04b63aec 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -12,8 +12,9 @@ class ProjectConfig: diff_exclude: list # list[str] '''Files to analyze. If this list is not empty, the given files will be analyzed (not those in the compiledb)''' files: list # list[str] + branch: str - def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin, to, diff_exclude, files): + def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin, to, diff_exclude, files, branch): self.url = url self.repo_name = repo_name self.build_compdb = build_compdb @@ -23,6 +24,7 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin self.to = to self.diff_exclude = diff_exclude self.files = files + self.branch = branch sqlite = ProjectConfig( url = "https://github.com/sqlite/sqlite", @@ -33,7 +35,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin begin = datetime.datetime(2021,8,1), to = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], - files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c'] + files = ['sqlite3.c', 'sqlite3.h', 'sqlite3ext.h', 'shell.c'], + branch = "master" ) zstd = ProjectConfig( @@ -45,7 +48,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin begin = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal" to = datetime.datetime(2022,2,1), # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], - files = None + files = None, + branch = "dev" ) figlet = ProjectConfig( @@ -57,7 +61,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin begin = datetime.datetime(2010,1,1), to = datetime.datetime(2022,10,10), diff_exclude = [], - files = ['Makefile'] + files = ['Makefile'], + branch = "master" ) chrony = ProjectConfig( @@ -71,7 +76,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin begin=datetime.datetime(2020, 1, 1), to=datetime.datetime(2022, 10, 10), diff_exclude=[], - files=None + files=None, + branch = "master" ) projects = {"sqlite": sqlite, "chrony": chrony, "figlet": figlet, "zstd": zstd} From 9d03382ec984001bd4788196b3f043396053b327 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 15 Dec 2022 18:07:05 +0100 Subject: [PATCH 39/84] Extract analysis and solving time in efficiency script. --- .../incremental/benchmarking/efficiency.py | 45 ++++++++++++++----- scripts/incremental/benchmarking/plot.py | 30 ++++++------- scripts/incremental/benchmarking/utils.py | 37 ++++++++++++--- 3 files changed, 80 insertions(+), 32 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 8c18e92c6..5672f9062 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -149,8 +149,12 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): def collect_data(outdir): data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [], - utils.header_runtime_parent: [], utils.header_runtime_incr_child: [], - utils.header_runtime_incr_posts_child: [], utils.header_runtime_incr_posts_rel_child: [], + utils.runtime_header_parent: [], utils.runtime_header_incr_child: [], + utils.runtime_header_incr_posts_child: [], utils.runtime_header_incr_posts_rel_child: [], + utils.analysis_header_parent: [], utils.analysis_header_incr_child: [], + utils.analysis_header_incr_posts_child: [], utils.analysis_header_incr_posts_rel_child: [], + utils.solving_header_parent: [], utils.solving_header_incr_child: [], + utils.solving_header_incr_posts_child: [], utils.solving_header_incr_posts_rel_child: [], "Change in number of race warnings": []} for t in os.listdir(outdir): parentlog = os.path.join(outdir, t, 'parent', utils.analyzerlog) @@ -165,10 +169,19 @@ def collect_data(outdir): data["Failed?"].append(commit_prop["failed"]) data["Commit"].append(commit_prop["hash"][:7]) if commit_prop["failed"] == True: - data[utils.header_runtime_parent].append(0) - data[utils.header_runtime_incr_child].append(0) - data[utils.header_runtime_incr_posts_child].append(0) - data[utils.header_runtime_incr_posts_rel_child].append(0) + data[utils.runtime_header_parent].append(0) + data[utils.runtime_header_incr_child].append(0) + data[utils.runtime_header_incr_posts_child].append(0) + data[utils.runtime_header_incr_posts_rel_child].append(0) + data[utils.analysis_header_parent].append(0) + data[utils.analysis_header_incr_child].append(0) + data[utils.analysis_header_incr_posts_child].append(0) + data[utils.analysis_header_incr_posts_rel_child].append(0) + data[utils.solving_header_parent].append(0) + data[utils.solving_header_incr_child].append(0) + data[utils.solving_header_incr_posts_child].append(0) + data[utils.solving_header_incr_posts_rel_child].append(0) + data["Changed/Added/Removed functions"].append(0) data["Change in number of race warnings"].append(0) continue @@ -177,10 +190,22 @@ def collect_data(outdir): child_posts_info = utils.extract_from_analyzer_log(childpostslog) child_posts_rel_info = utils.extract_from_analyzer_log(childpostsrellog) data["Changed/Added/Removed functions"].append(int(child_info["changed"]) + int(child_info["added"]) + int(child_info["removed"])) - data[utils.header_runtime_parent].append(float(parent_info["runtime"])) - data[utils.header_runtime_incr_child].append(float(child_info["runtime"])) - data[utils.header_runtime_incr_posts_child].append(float(child_posts_info["runtime"])) - data[utils.header_runtime_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"])) + data[utils.runtime_header_parent].append(float(parent_info["runtime"])) + data[utils.runtime_header_incr_child].append(float(child_info["runtime"])) + data[utils.runtime_header_incr_posts_child].append(float(child_posts_info["runtime"])) + data[utils.runtime_header_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"])) + + + data[utils.analysis_header_parent].append(float(parent_info["analysis_time"])) + data[utils.analysis_header_incr_child].append(float(child_info["analysis_time"])) + data[utils.analysis_header_incr_posts_child].append(float(child_posts_info["analysis_time"])) + data[utils.analysis_header_incr_posts_rel_child].append(float(child_posts_rel_info["analysis_time"])) + + data[utils.solving_header_parent].append(float(parent_info["solving_time"])) + data[utils.solving_header_incr_child].append(float(child_info["solving_time"])) + data[utils.solving_header_incr_posts_child].append(float(child_posts_info["solving_time"])) + data[utils.solving_header_incr_posts_rel_child].append(float(child_posts_rel_info["solving_time"])) + data["Change in number of race warnings"].append(int(child_info["race_warnings"] - int(parent_info["race_warnings"]))) return data diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 3d3a88797..98474d84d 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -13,13 +13,13 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) - data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) + data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_parent, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child]) datanonincr = {"values": data[0], "label": description_non_incr} dataincr = {"values": data[1], "label": description_incr} utils.cummulative_distr_plot([datanonincr, dataincr], base, figure_dir, outfile_nonincr_vs_incr) - data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_incr_child, utils.header_runtime_incr_posts_rel_child]) + data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child]) dataincr = {"values": data[0], "label": description_incr} datarelincr = {"values": data[1], "label": description_incr_rel} @@ -30,7 +30,7 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) - data, base = utils.create_cum_data(df, num_bins, [utils.header_runtime_parent, utils.header_runtime_incr_child, utils.header_runtime_incr_posts_child, utils.header_runtime_incr_posts_rel_child]) + data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_parent, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]) data_non_incr = {"values": data[0], "label": description_non_incr} data_incr = {"values": data[1], "label": description_incr} data_incr_post = {"values": data[2], "label": description_incr_post} @@ -41,31 +41,31 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) # plot incremental vs non-incremental - diff = df.loc[:,utils.header_runtime_parent] - df.loc[:,utils.header_runtime_incr_child] + diff = df.loc[:,utils.runtime_header_parent] - df.loc[:,utils.runtime_header_incr_child] utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr) # plot reluctant vs. basic incremental - diff = df.loc[:,utils.header_runtime_incr_child] - df.loc[:,utils.header_runtime_incr_posts_rel_child] + diff = df.loc[:,utils.runtime_header_incr_child] - df.loc[:,utils.runtime_header_incr_posts_rel_child] utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel) def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) # plot incremental vs non-incremental - print(df[utils.header_runtime_incr_child].astype('float')) - diff = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float') + print(df[utils.runtime_header_incr_child].astype('float')) + diff = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float') utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr) # plot reluctant vs. basic incremental - diff = 1 - df.loc[:,utils.header_runtime_incr_posts_rel_child] / df.loc[:,utils.header_runtime_incr_child] + diff = 1 - df.loc[:,utils.runtime_header_incr_posts_rel_child] / df.loc[:,utils.runtime_header_incr_child] utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel) def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False): df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges) - diff1 = 1 - df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float') - diff2 = 1 - df[utils.header_runtime_incr_posts_child].astype('float') / df[utils.header_runtime_incr_child].astype('float') - diff3 = 1 - df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_incr_posts_child].astype('float') - diff4 = 1 - df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_parent].astype('float') + diff1 = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float') + diff2 = 1 - df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_incr_child].astype('float') + diff3 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_incr_posts_child].astype('float') + diff4 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_parent].astype('float') step = 0.01 for i, diff in enumerate([diff1,diff2,diff3,diff4]): # output textwidth in latex with @@ -97,9 +97,9 @@ def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=Fal for e in diff: if (xlimleft and e < xlimleft) or (xlimright and e > xlimright): print("excluded", e, "from efficiency figure", i) - diff1 = df[utils.header_runtime_incr_child].astype('float') / df[utils.header_runtime_parent].astype('float') - diff2 = df[utils.header_runtime_incr_posts_child].astype('float') / df[utils.header_runtime_parent].astype('float') - diff3 = df[utils.header_runtime_incr_posts_rel_child].astype('float') / df[utils.header_runtime_parent].astype('float') + diff1 = df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float') + diff2 = df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_parent].astype('float') + diff3 = df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_parent].astype('float') for n, diff in [("incr", diff1), ("+ incr postsolver", diff2), ("+ reluctant", diff3)]: print("80% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.8) * 100, "%") print("75% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.75) * 100, "%") diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index e62b1c411..c8cc214e0 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -29,10 +29,29 @@ import matplotlib.pyplot as plt from matplotlib.ticker import ScalarFormatter -header_runtime_parent = "Runtime for parent commit (non-incremental)" -header_runtime_incr_child = "Runtime for commit (incremental)" -header_runtime_incr_posts_child = "Runtime for commit (incremental + incr postsolver)" -header_runtime_incr_posts_rel_child = "Runtime for commit (incremental + incr postsolver + reluctant)" +runtime_prefix = "Runtime" +analysis_prefix = "Analysis" +solving_prefix = "Solving" + +header_parent = " for parent commit (non-incremental)" +header_incr_child = " for commit (incremental)" +header_incr_posts_child = " for commit (incremental + incr postsolver)" +header_incr_posts_rel_child = " for commit (incremental + incr postsolver + reluctant)" + +runtime_header_parent = runtime_prefix + header_parent +runtime_header_incr_child = runtime_prefix + header_incr_child +runtime_header_incr_posts_child = runtime_prefix + header_incr_posts_child +runtime_header_incr_posts_rel_child = runtime_prefix + header_incr_posts_rel_child + +analysis_header_parent = analysis_prefix + header_parent +analysis_header_incr_child = analysis_prefix + header_incr_child +analysis_header_incr_posts_child = analysis_prefix + header_incr_posts_child +analysis_header_incr_posts_rel_child = analysis_prefix + header_incr_posts_rel_child + +solving_header_parent = solving_prefix + header_parent +solving_header_incr_child = solving_prefix + header_incr_child +solving_header_incr_posts_child = solving_prefix + header_incr_posts_child +solving_header_incr_posts_rel_child = solving_prefix + header_incr_posts_rel_child preparelog = "prepare.log" analyzerlog = "analyzer.log" @@ -126,10 +145,14 @@ def find_line(pattern, log): def extract_from_analyzer_log(log): runtime_pattern = 'Default[ ]+(?P[0-9\.]+)s' + analysis_time_pattern = 'analysis[ ]+(?P[0-9\.]+)s' + solving_time_pattern = 'solving[ ]+(?P[0-9\.]+)s' change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*); added = (?P[0-9]*); removed = (?P[0-9]*) }' - r = find_line(runtime_pattern, log) + runtime = find_line(runtime_pattern, log) + analysis_time = find_line(analysis_time_pattern, log) + solving_time = find_line(solving_time_pattern, log) ch = find_line(change_info_pattern, log) or {"unchanged": 0, "changed": 0, "added": 0, "removed": 0} - d = dict(list(r.items()) + list(ch.items())) + d = dict(list(runtime.items()) + list(analysis_time.items()) + list(solving_time.items()) + list(ch.items())) with open(log, "r") as file: num_racewarnings = file.read().count('[Warning][Race]') d["race_warnings"] = num_racewarnings @@ -159,7 +182,7 @@ def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetect # clean dataset (remove all rows for which any of the runtime entries is 0 which means that the respective analysis # run failed) - df = df[(df[header_runtime_parent] != 0)] + df = df[(df[header_parent] != 0)] if filterRelCLOC: df = df[df["Relevant changed LOC"] > 0] if filterDetectedChanges: From 203d9dc696c2f338e19cccfcf9afce348349141d Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 16 Dec 2022 10:00:51 +0100 Subject: [PATCH 40/84] Fix efficiency plot. --- scripts/incremental/benchmarking/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index c8cc214e0..9247c61c7 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -182,7 +182,7 @@ def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetect # clean dataset (remove all rows for which any of the runtime entries is 0 which means that the respective analysis # run failed) - df = df[(df[header_parent] != 0)] + df = df[(df[runtime_header_parent] != 0)] if filterRelCLOC: df = df[df["Relevant changed LOC"] > 0] if filterDetectedChanges: From 9f9a4e2453dbcce77e445dc5997cee71c082fc03 Mon Sep 17 00:00:00 2001 From: stilscher <66023521+stilscher@users.noreply.github.com> Date: Fri, 16 Dec 2022 15:29:17 +0100 Subject: [PATCH 41/84] add boxplot generation as alternative for precision graph --- scripts/incremental/benchmarking/plot.py | 27 +++++++++++++++++++++++ scripts/incremental/benchmarking/utils.py | 14 +++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 98474d84d..82004f600 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -104,6 +104,33 @@ def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=Fal print("80% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.8) * 100, "%") print("75% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.75) * 100, "%") +def paper_precision_graph_box(results_precision, filename, outdir): + df = utils.get_data_from_json(os.path.join(results_precision, filename)) + + # Plot precision loss after x commits, where x is in {1, 2, 5, 10, 15} + lessprec1 = 'intermediate precision.1.precision.lessprec' + lessprec2 = 'intermediate precision.2.precision.lessprec' + lessprec5 = 'intermediate precision.5.precision.lessprec' + lessprec10 = 'intermediate precision.10.precision.lessprec' + lessprec15 = 'intermediate precision.15.precision.lessprec' + total1 = 'intermediate precision.1.precision.total' + total2 = 'intermediate precision.2.precision.total' + total5 = 'intermediate precision.5.precision.total' + total10 = 'intermediate precision.10.precision.total' + total15 = 'intermediate precision.15.precision.total' + + x = [1,2,5,10,15] + data = [] + lessprec = [lessprec1, lessprec2, lessprec5, lessprec10, lessprec15] + total = [total1, total2, total5, total10, total15] + for l, t in zip(lessprec, total): + ratio = df[l] / df[t] + data.append(ratio.dropna()) + + halftextwidth = 3.3 + size=(halftextwidth,halftextwidth*2/3) + utils.quantile_plot(data, x, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size) + def paper_precision_graph(results_precision, filename, outdir): df = utils.get_data_from_json(os.path.join(results_precision, filename)) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 9247c61c7..b74bc2806 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -307,7 +307,7 @@ def scatter_plot(data, xlabel, ylabel, outfile, size): width, height = size fig.set_size_inches(w=width, h=height) colors=['red','azure','blue','brown','chartreuse','chocolate','darkblue','darkgreen','seagreen','green','indigo','orangered','orange','coral','olive','mediumseagreen','grey','teal'] - markers = ['x','+','o','s','p','*','D','d','v','^','<','>','1','2','3','4','H','P'] + #markers = ['x','+','o','s','p','*','D','d','v','^','<','>','1','2','3','4','H','P'] linestyles = ['dashed'] for i, (x, y) in enumerate(data): plt.plot(x,y, marker='x', linewidth=0.4, markersize=1, alpha=0.85, color=colors[i % len(colors)], linestyle=linestyles[i % len(linestyles)]) @@ -316,3 +316,15 @@ def scatter_plot(data, xlabel, ylabel, outfile, size): plt.ylim(bottom=-0.005, top=0.19) plt.tight_layout(pad=0.4) plt.savefig(outfile) + +def quantile_plot(data, x, xlabel, ylabel, outfile, size): + fig = plt.figure() + width, height = size + fig.set_size_inches(w=width, h=height) + plt.boxplot(data, flierprops=dict(markersize=3), positions=x) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.ylim(bottom=-0.005, top=0.19) + plt.tight_layout(pad=0.4) + plt.savefig(outfile) + print(outfile) From 67ff095e6123b91b9511a5d3c67ab7b280ad3bb1 Mon Sep 17 00:00:00 2001 From: stilscher <66023521+stilscher@users.noreply.github.com> Date: Fri, 16 Dec 2022 15:30:39 +0100 Subject: [PATCH 42/84] fix naming --- scripts/incremental/benchmarking/plot.py | 2 +- scripts/incremental/benchmarking/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 82004f600..63bcd69cd 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -129,7 +129,7 @@ def paper_precision_graph_box(results_precision, filename, outdir): halftextwidth = 3.3 size=(halftextwidth,halftextwidth*2/3) - utils.quantile_plot(data, x, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size) + utils.box_plot(data, x, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size) def paper_precision_graph(results_precision, filename, outdir): diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index b74bc2806..0491ee450 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -317,7 +317,7 @@ def scatter_plot(data, xlabel, ylabel, outfile, size): plt.tight_layout(pad=0.4) plt.savefig(outfile) -def quantile_plot(data, x, xlabel, ylabel, outfile, size): +def box_plot(data, x, xlabel, ylabel, outfile, size): fig = plt.figure() width, height = size fig.set_size_inches(w=width, h=height) From f358d136d193f67192a0594a5383d1010f7563df Mon Sep 17 00:00:00 2001 From: stilscher <66023521+stilscher@users.noreply.github.com> Date: Fri, 16 Dec 2022 15:34:41 +0100 Subject: [PATCH 43/84] fix ylim --- scripts/incremental/benchmarking/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 0491ee450..798fbbcc1 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -324,7 +324,6 @@ def box_plot(data, x, xlabel, ylabel, outfile, size): plt.boxplot(data, flierprops=dict(markersize=3), positions=x) plt.xlabel(xlabel) plt.ylabel(ylabel) - plt.ylim(bottom=-0.005, top=0.19) plt.tight_layout(pad=0.4) plt.savefig(outfile) print(outfile) From 1c6f0182fe8a129c1ad874aee3db2793a520fb51 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sat, 17 Dec 2022 11:19:14 +0100 Subject: [PATCH 44/84] Extract walltime instead of CPU time. This adapts to the changed output format of Goblints runtime stats. --- scripts/incremental/benchmarking/utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 798fbbcc1..4a98d8f7e 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -144,9 +144,10 @@ def find_line(pattern, log): return None def extract_from_analyzer_log(log): - runtime_pattern = 'Default[ ]+(?P[0-9\.]+)s' - analysis_time_pattern = 'analysis[ ]+(?P[0-9\.]+)s' - solving_time_pattern = 'solving[ ]+(?P[0-9\.]+)s' + # First comes the cpu time (which is ignored); we look at the walltime. + runtime_pattern = 'Default[ ]+[0-9\.]+s[ ]+(?P[0-9\.]+)s' + analysis_time_pattern = 'analysis[ ]+[0-9\.]+s[ ]+(?P[0-9\.]+)s' + solving_time_pattern = 'solving[ ]+[0-9\.]+s[ ]+(?P[0-9\.]+)s' change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*); added = (?P[0-9]*); removed = (?P[0-9]*) }' runtime = find_line(runtime_pattern, log) analysis_time = find_line(analysis_time_pattern, log) From 621e84dd6990d6e9c34fe4988de91ab9903cfd46 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sat, 17 Dec 2022 14:02:15 +0100 Subject: [PATCH 45/84] Peform non-incremental run on child commit as well. --- .../incremental/benchmarking/efficiency.py | 51 ++++++++++++------- scripts/incremental/benchmarking/plot.py | 18 +++---- scripts/incremental/benchmarking/utils.py | 4 ++ 3 files changed, 46 insertions(+), 27 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 5672f9062..fea267198 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -113,6 +113,13 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): add_options = default_options + ['--disable', 'incremental.load', '--enable', 'incremental.save'] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, parent.hash, outparent, conf_base, add_options, files) + #print('And now analyze', str(commit.hash), 'from scratch.') + outchild_non_incr = os.path.join(outtry, 'child-non-incr') + os.makedirs(outchild_non_incr) + # Do not save in this run to not pollute results + add_options = default_options + ['--disable', 'incremental.load', '--disable', 'incremental.save'] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild_non_incr, conf_base, add_options, files) + #print('And now analyze', str(commit.hash), 'incrementally.') outchild = os.path.join(outtry, 'child') os.makedirs(outchild) @@ -120,16 +127,16 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild, conf_base, add_options, files) #print('And again incremental, this time with incremental postsolver') - outchildincrpost = os.path.join(outtry, 'child-incr-post') - os.makedirs(outchildincrpost) + outchild_incr_post = os.path.join(outtry, 'child-incr-post') + os.makedirs(outchild_incr_post) add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save'] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildincrpost, conf_incrpost, add_options, files) + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild_incr_post, conf_incrpost, add_options, files) #print('And again incremental, this time with incremental postsolver and reluctant') - outchildrel = os.path.join(outtry, 'child-rel') - os.makedirs(outchildrel) + outchild_rel = os.path.join(outtry, 'child-rel') + os.makedirs(outchild_rel) add_options = default_options + ['--enable', 'incremental.load', '--disable', 'incremental.save', '--enable', 'incremental.reluctant.enabled'] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchildrel, conf_incrpost, add_options, files) + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, outchild_rel, conf_incrpost, add_options, files) count_analyzed+=1 failed = False @@ -149,18 +156,19 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): def collect_data(outdir): data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [], - utils.runtime_header_parent: [], utils.runtime_header_incr_child: [], + utils.runtime_header_parent: [], utils.runtime_header_non_incr_child: [], utils.runtime_header_incr_child: [], utils.runtime_header_incr_posts_child: [], utils.runtime_header_incr_posts_rel_child: [], - utils.analysis_header_parent: [], utils.analysis_header_incr_child: [], + utils.analysis_header_parent: [], utils.analysis_header_non_incr_child: [], utils.analysis_header_incr_child: [], utils.analysis_header_incr_posts_child: [], utils.analysis_header_incr_posts_rel_child: [], - utils.solving_header_parent: [], utils.solving_header_incr_child: [], + utils.solving_header_parent: [], utils.solving_header_non_incr_child: [], utils.solving_header_incr_child: [], utils.solving_header_incr_posts_child: [], utils.solving_header_incr_posts_rel_child: [], "Change in number of race warnings": []} for t in os.listdir(outdir): - parentlog = os.path.join(outdir, t, 'parent', utils.analyzerlog) - childlog = os.path.join(outdir, t, 'child', utils.analyzerlog) - childpostslog = os.path.join(outdir, t, 'child-incr-post', utils.analyzerlog) - childpostsrellog = os.path.join(outdir, t, 'child-rel', utils.analyzerlog) + parent_log = os.path.join(outdir, t, 'parent', utils.analyzerlog) + child_non_incr_log = os.path.join(outdir, t, 'child-non-incr', utils.analyzerlog) + child_log = os.path.join(outdir, t, 'child', utils.analyzerlog) + child_posts_log = os.path.join(outdir, t, 'child-incr-post', utils.analyzerlog) + child_posts_rel_log = os.path.join(outdir, t, 'child-rel', utils.analyzerlog) commit_prop_log = os.path.join(outdir, t, 'commit_properties.log') t = int(t) commit_prop = json.load(open(commit_prop_log, "r")) @@ -170,14 +178,17 @@ def collect_data(outdir): data["Commit"].append(commit_prop["hash"][:7]) if commit_prop["failed"] == True: data[utils.runtime_header_parent].append(0) + data[utils.runtime_header_non_incr_child].append(0) data[utils.runtime_header_incr_child].append(0) data[utils.runtime_header_incr_posts_child].append(0) data[utils.runtime_header_incr_posts_rel_child].append(0) data[utils.analysis_header_parent].append(0) + data[utils.analysis_header_non_incr_child].append(0) data[utils.analysis_header_incr_child].append(0) data[utils.analysis_header_incr_posts_child].append(0) data[utils.analysis_header_incr_posts_rel_child].append(0) data[utils.solving_header_parent].append(0) + data[utils.solving_header_non_incr_child].append(0) data[utils.solving_header_incr_child].append(0) data[utils.solving_header_incr_posts_child].append(0) data[utils.solving_header_incr_posts_rel_child].append(0) @@ -185,23 +196,27 @@ def collect_data(outdir): data["Changed/Added/Removed functions"].append(0) data["Change in number of race warnings"].append(0) continue - parent_info = utils.extract_from_analyzer_log(parentlog) - child_info = utils.extract_from_analyzer_log(childlog) - child_posts_info = utils.extract_from_analyzer_log(childpostslog) - child_posts_rel_info = utils.extract_from_analyzer_log(childpostsrellog) + + parent_info = utils.extract_from_analyzer_log(parent_log) + child_non_incr_info = utils.extract_from_analyzer_log(child_non_incr_log) + child_info = utils.extract_from_analyzer_log(child_log) + child_posts_info = utils.extract_from_analyzer_log(child_posts_log) + child_posts_rel_info = utils.extract_from_analyzer_log(child_posts_rel_log) data["Changed/Added/Removed functions"].append(int(child_info["changed"]) + int(child_info["added"]) + int(child_info["removed"])) data[utils.runtime_header_parent].append(float(parent_info["runtime"])) + data[utils.runtime_header_non_incr_child].append(float(child_non_incr_info["runtime"])) data[utils.runtime_header_incr_child].append(float(child_info["runtime"])) data[utils.runtime_header_incr_posts_child].append(float(child_posts_info["runtime"])) data[utils.runtime_header_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"])) - data[utils.analysis_header_parent].append(float(parent_info["analysis_time"])) + data[utils.analysis_header_non_incr_child].append(float(child_non_incr_info["analysis_time"])) data[utils.analysis_header_incr_child].append(float(child_info["analysis_time"])) data[utils.analysis_header_incr_posts_child].append(float(child_posts_info["analysis_time"])) data[utils.analysis_header_incr_posts_rel_child].append(float(child_posts_rel_info["analysis_time"])) data[utils.solving_header_parent].append(float(parent_info["solving_time"])) + data[utils.solving_header_non_incr_child].append(float(child_non_incr_info["solving_time"])) data[utils.solving_header_incr_child].append(float(child_info["solving_time"])) data[utils.solving_header_incr_posts_child].append(float(child_posts_info["solving_time"])) data[utils.solving_header_incr_posts_rel_child].append(float(child_posts_rel_info["solving_time"])) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 63bcd69cd..0df4057ae 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -13,7 +13,7 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) - data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_parent, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child]) + data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child]) datanonincr = {"values": data[0], "label": description_non_incr} dataincr = {"values": data[1], "label": description_incr} @@ -30,7 +30,7 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) - data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_parent, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]) + data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]) data_non_incr = {"values": data[0], "label": description_non_incr} data_incr = {"values": data[1], "label": description_incr} data_incr_post = {"values": data[2], "label": description_incr_post} @@ -41,7 +41,7 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) # plot incremental vs non-incremental - diff = df.loc[:,utils.runtime_header_parent] - df.loc[:,utils.runtime_header_incr_child] + diff = df.loc[:,utils.runtime_header_non_incr_child] - df.loc[:,utils.runtime_header_incr_child] utils.hist_plot(diff, 20, title, 'Improvement in s (incremental compared to non-incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_incr.pdf"), cutoffs_incr) # plot reluctant vs. basic incremental @@ -53,7 +53,7 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N # plot incremental vs non-incremental print(df[utils.runtime_header_incr_child].astype('float')) - diff = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float') + diff = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') utils.hist_plot(diff, 0.01, title, "Relative Improvement in s (incremental compared to non-incremental)", 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_incr.pdf"), cutoffs_incr) # plot reluctant vs. basic incremental @@ -62,10 +62,10 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False): df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges) - diff1 = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float') + diff1 = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') diff2 = 1 - df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_incr_child].astype('float') diff3 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_incr_posts_child].astype('float') - diff4 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_parent].astype('float') + diff4 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') step = 0.01 for i, diff in enumerate([diff1,diff2,diff3,diff4]): # output textwidth in latex with @@ -97,9 +97,9 @@ def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=Fal for e in diff: if (xlimleft and e < xlimleft) or (xlimright and e > xlimright): print("excluded", e, "from efficiency figure", i) - diff1 = df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_parent].astype('float') - diff2 = df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_parent].astype('float') - diff3 = df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_parent].astype('float') + diff1 = df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') + diff2 = df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') + diff3 = df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') for n, diff in [("incr", diff1), ("+ incr postsolver", diff2), ("+ reluctant", diff3)]: print("80% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.8) * 100, "%") print("75% quantile for", n, "compared to from-scratch analysis:", diff.quantile(q=0.75) * 100, "%") diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 4a98d8f7e..79d5e9027 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -34,21 +34,25 @@ solving_prefix = "Solving" header_parent = " for parent commit (non-incremental)" +header_non_incr_child = " for commit (non-incremental)" header_incr_child = " for commit (incremental)" header_incr_posts_child = " for commit (incremental + incr postsolver)" header_incr_posts_rel_child = " for commit (incremental + incr postsolver + reluctant)" runtime_header_parent = runtime_prefix + header_parent +runtime_header_non_incr_child = runtime_prefix + header_non_incr_child runtime_header_incr_child = runtime_prefix + header_incr_child runtime_header_incr_posts_child = runtime_prefix + header_incr_posts_child runtime_header_incr_posts_rel_child = runtime_prefix + header_incr_posts_rel_child analysis_header_parent = analysis_prefix + header_parent +analysis_header_non_incr_child = analysis_prefix + header_non_incr_child analysis_header_incr_child = analysis_prefix + header_incr_child analysis_header_incr_posts_child = analysis_prefix + header_incr_posts_child analysis_header_incr_posts_rel_child = analysis_prefix + header_incr_posts_rel_child solving_header_parent = solving_prefix + header_parent +solving_header_non_incr_child = solving_prefix + header_non_incr_child solving_header_incr_child = solving_prefix + header_incr_child solving_header_incr_posts_child = solving_prefix + header_incr_posts_child solving_header_incr_posts_rel_child = solving_prefix + header_incr_posts_rel_child From 7eba011e8904dc59cbfd755c370cf69c6586473c Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sun, 18 Dec 2022 10:09:38 +0100 Subject: [PATCH 46/84] Add script to run all efficiency scripts. --- .../incremental/benchmarking/run_efficiency.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100755 scripts/incremental/benchmarking/run_efficiency.sh diff --git a/scripts/incremental/benchmarking/run_efficiency.sh b/scripts/incremental/benchmarking/run_efficiency.sh new file mode 100755 index 000000000..e6a284312 --- /dev/null +++ b/scripts/incremental/benchmarking/run_efficiency.sh @@ -0,0 +1,17 @@ +#!/bin/bash +ANALYZER_DIR=$1 + +#Number of cores to be used +NCORES=$2 + +echo "Starting run on figlet" +python3 efficiency.py $ANALYZER_DIR figlet $NCORES +mv result_efficiency result_efficiency_figlet + +echo "Starting run on chrony" +python3 efficiency.py $ANALYZER_DIR chrony $NCORES +mv result_efficiency result_efficiency_chrony + +echo "Starting run on zstd" +python3 efficiency.py $ANALYZER_DIR zstd $NCORES +mv result_efficiency result_efficiency_zstd From d19d7cb4608d8532c3b674eadfc0bce69129a5a9 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Mon, 19 Dec 2022 15:46:12 +0100 Subject: [PATCH 47/84] Change line style for efficiency plots, text for configuration description. --- scripts/incremental/benchmarking/utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 79d5e9027..e97cd8846 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -236,11 +236,18 @@ def cummulative_distr_plot(data_sets, base, figure_dir, outfile, figsize=None, t else: plt.figure() min = sys.maxsize + + linestyle_tuple = [ + "solid", + "--", + (0, (10, 1)), # long dash + (0, (3, 1, 1, 1)) # dash dots + ] for d in data_sets: min_d = d["values"].min() if min_d < min: min = min_d - plt.plot(d["values"], base, label=d["label"]) + plt.plot(d["values"], base, linestyle=linestyle_tuple.pop(0), label=d["label"]) plt.xlabel('Number of Commits') if logscale: plt.ylabel('Runtime in s ($log_{2}$ scale)') From fa658ffea912a7e880419fb7d37b2aa53e9c2355 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 20 Dec 2022 12:15:04 +0100 Subject: [PATCH 48/84] Add run_precision script. --- .../incremental/benchmarking/run_precision.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100755 scripts/incremental/benchmarking/run_precision.sh diff --git a/scripts/incremental/benchmarking/run_precision.sh b/scripts/incremental/benchmarking/run_precision.sh new file mode 100755 index 000000000..9e86c8682 --- /dev/null +++ b/scripts/incremental/benchmarking/run_precision.sh @@ -0,0 +1,17 @@ +#!/bin/bash +ANALYZER_DIR=$1 + +#Number of cores to be used +NCORES=$2 + +echo "Starting run on figlet" +python3 precision.py $ANALYZER_DIR figlet $NCORES +mv result_precision result_precision_figlet + +echo "Starting run on zstd" +python3 precision.py $ANALYZER_DIR zstd $NCORES +mv result_precision result_precision_zstd + +echo "Starting run on chrony" +python3 precision.py $ANALYZER_DIR chrony $NCORES +mv result_precision result_precision_chrony From 24d7a9b2f6d5a21efb668bbaab78b22c0630b167 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Wed, 21 Dec 2022 15:20:04 +0100 Subject: [PATCH 49/84] Use chrony configuration for chrony. --- scripts/incremental/benchmarking/projects.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index f04b63aec..80315ebbe 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -71,8 +71,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin url="https://github.com/mlichvar/chrony.git", repo_name="chrony", build_compdb="../build/build_compdb_chrony.sh", - conf_base=os.path.join("custom", "figlet"), - conf_incrpost=os.path.join("custom", "figlet-incrpostsolver"), + conf_base=os.path.join("custom", "chrony"), + conf_incrpost=os.path.join("custom", "chrony-incrpostsolver"), begin=datetime.datetime(2020, 1, 1), to=datetime.datetime(2022, 10, 10), diff_exclude=[], From bfc219a31a99474716f063becf76bb96590e2a04 Mon Sep 17 00:00:00 2001 From: stilscher <66023521+stilscher@users.noreply.github.com> Date: Fri, 23 Dec 2022 16:26:52 +0100 Subject: [PATCH 50/84] reset current time to static value for chrony --- scripts/incremental/build/build_compdb_chrony.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/incremental/build/build_compdb_chrony.sh b/scripts/incremental/build/build_compdb_chrony.sh index 0ba4a563c..f222e093a 100755 --- a/scripts/incremental/build/build_compdb_chrony.sh +++ b/scripts/incremental/build/build_compdb_chrony.sh @@ -4,3 +4,4 @@ git clean -fdx make -j 1 chronyd | tee build.log compiledb --parse build.log # ./configure && bear -- make chronyd +sed -i -E 's/#define NTP_ERA_SPLIT \([0-9]+LL/#define NTP_ERA_SPLIT \(1671796396LL/' config.h From 294cf9a1dc062347d92df0cfc8eece0320420513 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sun, 25 Dec 2022 17:25:24 +0100 Subject: [PATCH 51/84] Precision: Run figlet and zstd. --- scripts/incremental/benchmarking/run_precision.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scripts/incremental/benchmarking/run_precision.sh b/scripts/incremental/benchmarking/run_precision.sh index 9e86c8682..e94a3d580 100755 --- a/scripts/incremental/benchmarking/run_precision.sh +++ b/scripts/incremental/benchmarking/run_precision.sh @@ -11,7 +11,3 @@ mv result_precision result_precision_figlet echo "Starting run on zstd" python3 precision.py $ANALYZER_DIR zstd $NCORES mv result_precision result_precision_zstd - -echo "Starting run on chrony" -python3 precision.py $ANALYZER_DIR chrony $NCORES -mv result_precision result_precision_chrony From 6ad3b2013865e796fb47f4e35500a3f350c4f9ac Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Wed, 28 Dec 2022 15:13:05 +0100 Subject: [PATCH 52/84] Change ymax for precision graphs: 0.3 --- scripts/incremental/benchmarking/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index e97cd8846..a527f57bf 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -325,7 +325,7 @@ def scatter_plot(data, xlabel, ylabel, outfile, size): plt.plot(x,y, marker='x', linewidth=0.4, markersize=1, alpha=0.85, color=colors[i % len(colors)], linestyle=linestyles[i % len(linestyles)]) plt.xlabel(xlabel) plt.ylabel(ylabel) - plt.ylim(bottom=-0.005, top=0.19) + plt.ylim(bottom=-0.005, top=0.3) plt.tight_layout(pad=0.4) plt.savefig(outfile) From 696506f5450a0463b4312367a0dda3de30c99955 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 30 Dec 2022 12:26:04 +0100 Subject: [PATCH 53/84] Change text for different configurations to enumeration. --- scripts/incremental/benchmarking/plot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 0df4057ae..da4ef09fb 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -2,10 +2,10 @@ import os import shutil -description_non_incr = "Non-incremental analysis" -description_incr = "Incremental analysis" -description_incr_post = "Incremental analysis with incremental postsolver" -description_incr_rel ="Reluctant incremental analysis with incremental postsolver" +description_non_incr = "(1)" +description_incr = "(2)" +description_incr_post = "(3)" +description_incr_rel ="(4)" def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): num_bins = 2000 From e671d9cbcb6c0819dde2f0f03432d511d672816d Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 30 Dec 2022 17:00:06 +0100 Subject: [PATCH 54/84] Update size of fonts for precision graph. --- scripts/incremental/benchmarking/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index a527f57bf..da2e7642a 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -17,13 +17,13 @@ 'pgf.rcfonts': False, 'text.usetex': True, 'font.family': 'serif', - 'font.size': 9, - 'axes.titlesize': 9, - 'legend.fontsize': 9, - 'figure.titlesize': 9, + 'font.size': 6, + 'axes.titlesize': 6, + 'legend.fontsize': 6, + 'figure.titlesize': 7, 'figure.dpi': 300, - 'xtick.labelsize': 9, - 'ytick.labelsize': 9, + 'xtick.labelsize': 6, + 'ytick.labelsize': 6, }) import matplotlib.pyplot as plt From 6333415e9b5b5157b3b462b673726faa93c97488 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sat, 31 Dec 2022 18:40:10 +0100 Subject: [PATCH 55/84] Precision plot: Add ticks at every commit. --- scripts/incremental/benchmarking/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index da2e7642a..4dddc6c43 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -11,6 +11,8 @@ import numpy as np import brokenaxes import matplotlib as mpl +import matplotlib.ticker as mticker + mpl.use("pgf") mpl.rcParams.update({ "pgf.texsystem": "pdflatex", @@ -325,6 +327,8 @@ def scatter_plot(data, xlabel, ylabel, outfile, size): plt.plot(x,y, marker='x', linewidth=0.4, markersize=1, alpha=0.85, color=colors[i % len(colors)], linestyle=linestyles[i % len(linestyles)]) plt.xlabel(xlabel) plt.ylabel(ylabel) + plt.gca().xaxis.set_major_locator(mticker.MultipleLocator(1)) + # plt.xticks([1,2,5,10,15]) plt.ylim(bottom=-0.005, top=0.3) plt.tight_layout(pad=0.4) plt.savefig(outfile) From b3838d378b0d0919452860597e65b7c01ebca962 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 15 Jun 2023 10:54:43 +0200 Subject: [PATCH 56/84] Move confs for figlet, chrony, zstd into bench repo, adapt scripts for it. --- .../conf/chrony-incrpostsolver.json | 114 ++++++++++++++++ .../incremental/benchmarking/conf/chrony.json | 114 ++++++++++++++++ .../conf/figlet-incrpostsolver.json | 103 +++++++++++++++ .../incremental/benchmarking/conf/figlet.json | 103 +++++++++++++++ .../conf/zstd-race-incrpostsolver.json | 122 ++++++++++++++++++ .../benchmarking/conf/zstd-race.json | 122 ++++++++++++++++++ .../incremental/benchmarking/efficiency.py | 2 + scripts/incremental/benchmarking/projects.py | 16 +-- scripts/incremental/benchmarking/utils.py | 6 +- 9 files changed, 691 insertions(+), 11 deletions(-) create mode 100644 scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json create mode 100644 scripts/incremental/benchmarking/conf/chrony.json create mode 100644 scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json create mode 100644 scripts/incremental/benchmarking/conf/figlet.json create mode 100644 scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json create mode 100644 scripts/incremental/benchmarking/conf/zstd-race.json diff --git a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json new file mode 100644 index 000000000..8a97510dd --- /dev/null +++ b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json @@ -0,0 +1,114 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + }, + "malloc": { + "wrappers": [ + "Malloc", + "Realloc", + "Malloc2", + "Realloc2", + "ARR_CreateInstance", + "realloc_array", + "ARR_GetNewElement" + ] + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": true + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/chrony.json b/scripts/incremental/benchmarking/conf/chrony.json new file mode 100644 index 000000000..a2fe392e4 --- /dev/null +++ b/scripts/incremental/benchmarking/conf/chrony.json @@ -0,0 +1,114 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + }, + "malloc": { + "wrappers": [ + "Malloc", + "Realloc", + "Malloc2", + "Realloc2", + "ARR_CreateInstance", + "realloc_array", + "ARR_GetNewElement" + ] + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": false + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json new file mode 100644 index 000000000..46ad26fce --- /dev/null +++ b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json @@ -0,0 +1,103 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": true + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/figlet.json b/scripts/incremental/benchmarking/conf/figlet.json new file mode 100644 index 000000000..3e80b8ffe --- /dev/null +++ b/scripts/incremental/benchmarking/conf/figlet.json @@ -0,0 +1,103 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": false + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json new file mode 100644 index 000000000..dbe858b98 --- /dev/null +++ b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json @@ -0,0 +1,122 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "malloc": { + "wrappers": [ + "ZSTD_customMalloc", + "ZSTD_customCalloc" + ] + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true, + "extraspecials": [ + "ZSTD_customMalloc", + "ZSTD_customCalloc", + "ZSTD_customFree" + ] + }, + "pre": { + "cppflags": [ + "-DZSTD_NO_INTRINSICS", + "-D_FORTIFY_SOURCE=0", + "-DGOBLINT_NO_ASSERT", + "-DGOBLINT_NO_BSEARCH" + ] + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": true + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/conf/zstd-race.json b/scripts/incremental/benchmarking/conf/zstd-race.json new file mode 100644 index 000000000..b3c4a49ac --- /dev/null +++ b/scripts/incremental/benchmarking/conf/zstd-race.json @@ -0,0 +1,122 @@ +{ + "ana": { + "activated": [ + "expRelation", + "base", + "threadid", + "threadflag", + "threadreturn", + "escape", + "mutexEvents", + "mutex", + "access", + "mallocWrapper", + "mhp", + "symb_locks", + "var_eq", + "mallocFresh", + "race" + ], + "ctx_insens": [ + "var_eq" + ], + "base": { + "privatization": "none", + "context": { + "non-ptr": false + } + }, + "thread": { + "domain": "plain", + "include-node": false + }, + "malloc": { + "wrappers": [ + "ZSTD_customMalloc", + "ZSTD_customCalloc" + ] + }, + "race": { + "free": false + }, + "dead-code": { + "lines": true + }, + "int": { + "interval": true, + "def_exc": true + } + }, + "sem": { + "unknown_function": { + "spawn": false, + "invalidate": { + "globals": false, + "args": false + } + } + }, + "solvers": { + "td3": { + "restart": { + "wpoint": { + "enabled": false + } + } + } + }, + "exp": { + "earlyglobs": true, + "extraspecials": [ + "ZSTD_customMalloc", + "ZSTD_customCalloc", + "ZSTD_customFree" + ] + }, + "pre": { + "cppflags": [ + "-DZSTD_NO_INTRINSICS", + "-D_FORTIFY_SOURCE=0", + "-DGOBLINT_NO_ASSERT", + "-DGOBLINT_NO_BSEARCH" + ] + }, + "cil": { + "merge": { + "inlines": false + } + }, + "dbg": { + "timing": { + "enabled": true + } + }, + "warn": { + "assert": false, + "behavior": false, + "integer": false, + "cast": false, + "race": true, + "deadcode": true, + "analyzer": false, + "unsound": true, + "imprecise": false, + "unknown": false, + "error": false, + "warning": true, + "info": false, + "debug": false, + "success": true + }, + "incremental": { + "postsolver": { + "enabled": false + }, + "restart": { + "sided": { + "enabled": false + }, + "write-only": true + } + } +} \ No newline at end of file diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index fea267198..16e3b3bcc 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -36,7 +36,9 @@ repo_name = project.repo_name build_compdb = project.build_compdb conf_base = project.conf_base +conf_base = os.path.join(os.getcwd(), conf_base + ".json") conf_incrpost = project.conf_incrpost +conf_incrpost = os.path.join(os.getcwd(), conf_incrpost + ".json") begin = project.begin to = project.to files = project.files diff --git a/scripts/incremental/benchmarking/projects.py b/scripts/incremental/benchmarking/projects.py index 80315ebbe..ba2125cd5 100644 --- a/scripts/incremental/benchmarking/projects.py +++ b/scripts/incremental/benchmarking/projects.py @@ -30,8 +30,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin url = "https://github.com/sqlite/sqlite", repo_name = "sqlite", build_compdb = "../build/build_compdb_sqlite.sh", - conf_base = os.path.join("custom", "sqlite-minimal"), # very minimal: "zstd-minimal" - conf_incrpost = os.path.join("custom", "sqlite-minimal-incrpostsolver"), + conf_base = os.path.join("conf", "sqlite-minimal"), # very minimal: "zstd-minimal" + conf_incrpost = os.path.join("conf", "sqlite-minimal-incrpostsolver"), begin = datetime.datetime(2021,8,1), to = datetime.datetime(2021,8,10), # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], @@ -43,8 +43,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin url = "https://github.com/facebook/zstd", repo_name = "zstd", build_compdb = "../build/build_compdb_zstd.sh", - conf_base = os.path.join("custom", "zstd-race"), # very minimal: "zstd-minimal" - conf_incrpost = os.path.join("custom", "zstd-race-incrpostsolver"), + conf_base = os.path.join("conf", "zstd-race"), # very minimal: "zstd-minimal" + conf_incrpost = os.path.join("conf", "zstd-race-incrpostsolver"), begin = datetime.datetime(2021,8,1), # very minimal: "zstd-minimal" to = datetime.datetime(2022,2,1), # minimal subset: datetime(2021,8,4) diff_exclude = ["build", "doc", "examples", "tests", "zlibWrapper", "contrib"], @@ -56,8 +56,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin url = "https://github.com/cmatsuoka/figlet", repo_name = "figlet", build_compdb = None, - conf_base = os.path.join("custom", "figlet"), - conf_incrpost = os.path.join("custom", "figlet-incrpostsolver"), + conf_base = os.path.join("conf", "figlet"), + conf_incrpost = os.path.join("conf", "figlet-incrpostsolver"), begin = datetime.datetime(2010,1,1), to = datetime.datetime(2022,10,10), diff_exclude = [], @@ -71,8 +71,8 @@ def __init__(self, url, repo_name, build_compdb, conf_base, conf_incrpost, begin url="https://github.com/mlichvar/chrony.git", repo_name="chrony", build_compdb="../build/build_compdb_chrony.sh", - conf_base=os.path.join("custom", "chrony"), - conf_incrpost=os.path.join("custom", "chrony-incrpostsolver"), + conf_base=os.path.join("conf", "chrony"), + conf_incrpost=os.path.join("conf", "chrony-incrpostsolver"), begin=datetime.datetime(2020, 1, 1), to=datetime.datetime(2022, 10, 10), diff_exclude=[], diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 4dddc6c43..b2c8a441a 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -70,7 +70,7 @@ def reset_incremental_data(incr_data_dir): def analyze_commit(analyzer_dir, gr : Git, repo_path, build_compdb, commit_hash, outdir, conf, extra_options, files): gr.checkout(commit_hash) - conf_path = os.path.join(analyzer_dir, 'conf', conf + '.json') + conf_path = conf # Creat the analyze command file_list = [] @@ -79,7 +79,7 @@ def append_to_repo_path(file): return os.path.join(repo_path, file) file_list = list(map(append_to_repo_path, files)) - analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), *file_list, *extra_options] + analyze_command = [os.path.join(analyzer_dir, 'goblint'), '--conf', conf_path, *file_list, *extra_options] # If the list of files was empty, we pass the repo_path to goblint if not files: analyze_command.append(repo_path) @@ -117,7 +117,7 @@ def append_to_repo_path(file): outfile.close() def compare_runs(analyzer_dir, dummy_c_file, outdir, conf, compare_data_1, compare_data_2): - options = ['--conf', os.path.join(analyzer_dir, 'conf', conf + '.json'), '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2] + options = ['--conf', conf, '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2] analyze_command = [os.path.join(analyzer_dir, 'goblint'), *options, dummy_c_file] with open(os.path.join(outdir, comparelog), "w+") as outfile: subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) From e3db195806c34790de9884f9ee83e26056165b53 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 15 Jun 2023 10:57:36 +0200 Subject: [PATCH 57/84] Relax constraint on numpy, to make constraints satisfiable, add compiledb as requirement. --- scripts/incremental/benchmarking/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/requirements.txt b/scripts/incremental/benchmarking/requirements.txt index 7d705bd6a..29e5c8733 100644 --- a/scripts/incremental/benchmarking/requirements.txt +++ b/scripts/incremental/benchmarking/requirements.txt @@ -1,7 +1,8 @@ brokenaxes==0.5.0 matplotlib==3.5.1 -numpy==1.19.5 +numpy>=1.19.5 pandas==1.4.1 psutil==5.9.0 PyDriller==2.1 pytz==2021.1 +compiledb>=0.10.1 From a6d27d9b59029c6f20e1d39b309e6b01d74cf0f0 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 15 Jun 2023 18:05:35 +0200 Subject: [PATCH 58/84] Refactor collect_data to reduce redundancy. --- .../incremental/benchmarking/efficiency.py | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 16e3b3bcc..1cf0f824e 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -199,31 +199,30 @@ def collect_data(outdir): data["Change in number of race warnings"].append(0) continue - parent_info = utils.extract_from_analyzer_log(parent_log) - child_non_incr_info = utils.extract_from_analyzer_log(child_non_incr_log) - child_info = utils.extract_from_analyzer_log(child_log) - child_posts_info = utils.extract_from_analyzer_log(child_posts_log) - child_posts_rel_info = utils.extract_from_analyzer_log(child_posts_rel_log) - data["Changed/Added/Removed functions"].append(int(child_info["changed"]) + int(child_info["added"]) + int(child_info["removed"])) - data[utils.runtime_header_parent].append(float(parent_info["runtime"])) - data[utils.runtime_header_non_incr_child].append(float(child_non_incr_info["runtime"])) - data[utils.runtime_header_incr_child].append(float(child_info["runtime"])) - data[utils.runtime_header_incr_posts_child].append(float(child_posts_info["runtime"])) - data[utils.runtime_header_incr_posts_rel_child].append(float(child_posts_rel_info["runtime"])) - - data[utils.analysis_header_parent].append(float(parent_info["analysis_time"])) - data[utils.analysis_header_non_incr_child].append(float(child_non_incr_info["analysis_time"])) - data[utils.analysis_header_incr_child].append(float(child_info["analysis_time"])) - data[utils.analysis_header_incr_posts_child].append(float(child_posts_info["analysis_time"])) - data[utils.analysis_header_incr_posts_rel_child].append(float(child_posts_rel_info["analysis_time"])) - - data[utils.solving_header_parent].append(float(parent_info["solving_time"])) - data[utils.solving_header_non_incr_child].append(float(child_non_incr_info["solving_time"])) - data[utils.solving_header_incr_child].append(float(child_info["solving_time"])) - data[utils.solving_header_incr_posts_child].append(float(child_posts_info["solving_time"])) - data[utils.solving_header_incr_posts_rel_child].append(float(child_posts_rel_info["solving_time"])) - - data["Change in number of race warnings"].append(int(child_info["race_warnings"] - int(parent_info["race_warnings"]))) + logs = [parent_log, child_non_incr_log, child_log, child_posts_log, child_posts_rel_log] + headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child] + infos = list(map(utils.extract_from_analyzer_log, logs)) + + data["Changed/Added/Removed functions"].append(int(infos[1]["changed"]) + int(infos[1]["added"]) + int(infos[1]["removed"])) + + field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix] + field_indexes = ["runtime", "analysis_time", "solving_time"] + + for field in range(field_indexes.__len__()): + header_prefix = field_prefixes[field] + field_index = field_indexes[field] + for config in range(logs.__len__()): + header = header_prefix + headers[config] + info = infos[config] + data[header].append(float(info[field_index])) + + parent_index = 0 + parent_info = infos[parent_index] + + child_non_incr_index = 2 + child_non_incr_info = infos[child_non_incr_index] + + data["Change in number of race warnings"].append(int(child_non_incr_info["race_warnings"] - int(parent_info["race_warnings"]))) return data def runperprocess(core, from_c, to_c): From 670cf3e13f29cdd39b561d0d37ec6b258c6a25dc Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 15 Jun 2023 18:18:54 +0200 Subject: [PATCH 59/84] Refactor collect_data further to reduce redundancy. --- .../incremental/benchmarking/efficiency.py | 35 +++++++------------ 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 1cf0f824e..2663cf2b7 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -178,41 +178,32 @@ def collect_data(outdir): data["Relevant changed LOC"].append(commit_prop["relCLOC"]) data["Failed?"].append(commit_prop["failed"]) data["Commit"].append(commit_prop["hash"][:7]) + + config_headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child] + field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix] + field_indexes = ["runtime", "analysis_time", "solving_time"] + if commit_prop["failed"] == True: - data[utils.runtime_header_parent].append(0) - data[utils.runtime_header_non_incr_child].append(0) - data[utils.runtime_header_incr_child].append(0) - data[utils.runtime_header_incr_posts_child].append(0) - data[utils.runtime_header_incr_posts_rel_child].append(0) - data[utils.analysis_header_parent].append(0) - data[utils.analysis_header_non_incr_child].append(0) - data[utils.analysis_header_incr_child].append(0) - data[utils.analysis_header_incr_posts_child].append(0) - data[utils.analysis_header_incr_posts_rel_child].append(0) - data[utils.solving_header_parent].append(0) - data[utils.solving_header_non_incr_child].append(0) - data[utils.solving_header_incr_child].append(0) - data[utils.solving_header_incr_posts_child].append(0) - data[utils.solving_header_incr_posts_rel_child].append(0) + for field in range(field_indexes.__len__()): + header_prefix = field_prefixes[field] + field_index = field_indexes[field] + for config in range(config_headers.__len__()): + header = header_prefix + config_headers[config] + data[header].append(float(0)) data["Changed/Added/Removed functions"].append(0) data["Change in number of race warnings"].append(0) continue logs = [parent_log, child_non_incr_log, child_log, child_posts_log, child_posts_rel_log] - headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child] infos = list(map(utils.extract_from_analyzer_log, logs)) - data["Changed/Added/Removed functions"].append(int(infos[1]["changed"]) + int(infos[1]["added"]) + int(infos[1]["removed"])) - field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix] - field_indexes = ["runtime", "analysis_time", "solving_time"] - for field in range(field_indexes.__len__()): header_prefix = field_prefixes[field] field_index = field_indexes[field] - for config in range(logs.__len__()): - header = header_prefix + headers[config] + for config in range(config_headers.__len__()): + header = header_prefix + config_headers[config] info = infos[config] data[header].append(float(info[field_index])) From 376a17603db3d274458f63c502badf485b6cdfd3 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 16 Jun 2023 13:38:00 +0200 Subject: [PATCH 60/84] Collec changed/added/removed functions from child (incremental) config. --- .../incremental/benchmarking/efficiency.py | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 2663cf2b7..25581ab26 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -157,14 +157,15 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): print("Skipped: ", count_skipped) def collect_data(outdir): - data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [], - utils.runtime_header_parent: [], utils.runtime_header_non_incr_child: [], utils.runtime_header_incr_child: [], - utils.runtime_header_incr_posts_child: [], utils.runtime_header_incr_posts_rel_child: [], - utils.analysis_header_parent: [], utils.analysis_header_non_incr_child: [], utils.analysis_header_incr_child: [], - utils.analysis_header_incr_posts_child: [], utils.analysis_header_incr_posts_rel_child: [], - utils.solving_header_parent: [], utils.solving_header_non_incr_child: [], utils.solving_header_incr_child: [], - utils.solving_header_incr_posts_child: [], utils.solving_header_incr_posts_rel_child: [], - "Change in number of race warnings": []} + data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [], "Change in number of race warnings": []} + + config_headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child] + field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix] + + for prefix in field_prefixes: + for config in config_headers: + data[prefix + config] = [] + for t in os.listdir(outdir): parent_log = os.path.join(outdir, t, 'parent', utils.analyzerlog) child_non_incr_log = os.path.join(outdir, t, 'child-non-incr', utils.analyzerlog) @@ -179,8 +180,6 @@ def collect_data(outdir): data["Failed?"].append(commit_prop["failed"]) data["Commit"].append(commit_prop["hash"][:7]) - config_headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child] - field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix] field_indexes = ["runtime", "analysis_time", "solving_time"] if commit_prop["failed"] == True: @@ -197,7 +196,11 @@ def collect_data(outdir): logs = [parent_log, child_non_incr_log, child_log, child_posts_log, child_posts_rel_log] infos = list(map(utils.extract_from_analyzer_log, logs)) - data["Changed/Added/Removed functions"].append(int(infos[1]["changed"]) + int(infos[1]["added"]) + int(infos[1]["removed"])) + + + child_incr_index = 2 + child_incr_info = infos[child_incr_index] + data["Changed/Added/Removed functions"].append(int(child_incr_info["changed"]) + int(child_incr_info["added"]) + int(child_incr_info["removed"])) for field in range(field_indexes.__len__()): header_prefix = field_prefixes[field] @@ -210,7 +213,7 @@ def collect_data(outdir): parent_index = 0 parent_info = infos[parent_index] - child_non_incr_index = 2 + child_non_incr_index = 1 child_non_incr_info = infos[child_non_incr_index] data["Change in number of race warnings"].append(int(child_non_incr_info["race_warnings"] - int(parent_info["race_warnings"]))) From 9b67c0774c17eb690c8f7c9f8b77701b067a17b7 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 16 Jun 2023 13:38:27 +0200 Subject: [PATCH 61/84] Adapt match for changed/added/removed functions for changed goblint output. --- scripts/incremental/benchmarking/utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index b2c8a441a..4a4bce3f3 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -151,11 +151,13 @@ def find_line(pattern, log): def extract_from_analyzer_log(log): # First comes the cpu time (which is ignored); we look at the walltime. - runtime_pattern = 'Default[ ]+[0-9\.]+s[ ]+(?P[0-9\.]+)s' - analysis_time_pattern = 'analysis[ ]+[0-9\.]+s[ ]+(?P[0-9\.]+)s' - solving_time_pattern = 'solving[ ]+[0-9\.]+s[ ]+(?P[0-9\.]+)s' - change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*); added = (?P[0-9]*); removed = (?P[0-9]*) }' + runtime_pattern = 'Default[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' + analysis_time_pattern = 'analysis[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' + solving_time_pattern = 'solving[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' + change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*) \\(with unchangedHeader = (?P[0-9]*)\\); added = (?P[0-9]*); removed = (?P[0-9]*) }' + runtime = find_line(runtime_pattern, log) + analysis_time = find_line(analysis_time_pattern, log) solving_time = find_line(solving_time_pattern, log) ch = find_line(change_info_pattern, log) or {"unchanged": 0, "changed": 0, "added": 0, "removed": 0} From 4f24c8993f4d6d3f8afcda5b755c747302f4a6dd Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 16 Jun 2023 16:07:12 +0200 Subject: [PATCH 62/84] Extract CPU times for total runtimes, analysis and solving times. --- scripts/incremental/benchmarking/efficiency.py | 9 ++++++++- scripts/incremental/benchmarking/utils.py | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 25581ab26..5060c2cc5 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -156,11 +156,19 @@ def analyze_small_commits_in_repo(cwd, outdir, from_c, to_c): print("Failed: ", count_failed) print("Skipped: ", count_skipped) +def add_version_with_cpu_suffix(strings): + string_with_cpu_suffix = list(map (lambda prefix : "CPU_" + prefix , strings)) + return strings + string_with_cpu_suffix + def collect_data(outdir): data = {"Commit": [], "Failed?": [], "Changed LOC": [], "Relevant changed LOC": [], "Changed/Added/Removed functions": [], "Change in number of race warnings": []} config_headers = [utils.header_parent, utils.header_non_incr_child, utils.header_incr_child, utils.header_incr_posts_child, utils.header_incr_posts_rel_child] field_prefixes = [utils.runtime_prefix, utils.analysis_prefix, utils.solving_prefix] + field_indexes = ["runtime", "analysis_time", "solving_time"] + + field_prefixes = add_version_with_cpu_suffix(field_prefixes) + field_indexes = add_version_with_cpu_suffix(field_indexes) for prefix in field_prefixes: for config in config_headers: @@ -180,7 +188,6 @@ def collect_data(outdir): data["Failed?"].append(commit_prop["failed"]) data["Commit"].append(commit_prop["hash"][:7]) - field_indexes = ["runtime", "analysis_time", "solving_time"] if commit_prop["failed"] == True: for field in range(field_indexes.__len__()): diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 4a4bce3f3..da5898922 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -151,9 +151,9 @@ def find_line(pattern, log): def extract_from_analyzer_log(log): # First comes the cpu time (which is ignored); we look at the walltime. - runtime_pattern = 'Default[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' - analysis_time_pattern = 'analysis[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' - solving_time_pattern = 'solving[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' + runtime_pattern = 'Default[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' + analysis_time_pattern = 'analysis[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' + solving_time_pattern = 'solving[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*) \\(with unchangedHeader = (?P[0-9]*)\\); added = (?P[0-9]*); removed = (?P[0-9]*) }' runtime = find_line(runtime_pattern, log) From e4dc7feda4558397ef121d783f873db7f44e9f7e Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 16 Jun 2023 17:05:21 +0200 Subject: [PATCH 63/84] Collect_data: Check whether result directory exists before iterating over it. This avoids related errors in the command prompt. --- scripts/incremental/benchmarking/efficiency.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 5060c2cc5..71424d853 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -174,6 +174,9 @@ def collect_data(outdir): for config in config_headers: data[prefix + config] = [] + if not os.path.exists(outdir): + return + for t in os.listdir(outdir): parent_log = os.path.join(outdir, t, 'parent', utils.analyzerlog) child_non_incr_log = os.path.join(outdir, t, 'child-non-incr', utils.analyzerlog) @@ -236,6 +239,7 @@ def runperprocess(core, from_c, to_c): shutil.rmtree(outdir) analyze_small_commits_in_repo(cwd, outdir, from_c, to_c) data_set = collect_data(outdir) + df = pd.DataFrame(data_set) #df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0]))) print(df) From 7e6e8a72cee251b085b91631f7a714028cb2926f Mon Sep 17 00:00:00 2001 From: stilscher <66023521+stilscher@users.noreply.github.com> Date: Wed, 28 Jun 2023 17:42:54 +0200 Subject: [PATCH 64/84] additionally plot efficiency bar plots --- scripts/incremental/benchmarking/plot.py | 21 ++++++++++++++++----- scripts/incremental/benchmarking/utils.py | 13 +++++-------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index da4ef09fb..51cbf54cd 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -7,6 +7,19 @@ description_incr_post = "(3)" description_incr_rel ="(4)" +def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir): + outfile_nonincr_vs_incr = "figure_bar.pgf" + df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) + + data_set = df[["Relevant changed LOC", utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]] + data_set = data_set.rename(columns={utils.runtime_header_non_incr_child: description_non_incr, utils.runtime_header_incr_child: description_incr, utils.runtime_header_incr_posts_child: description_incr_post, utils.runtime_header_incr_posts_rel_child: description_incr_rel}) + + colors = ["tab:olive", "tab:blue", "tab:orange", "tab:green", "tab:red"] + textwidth = 7 + size = (textwidth,textwidth/3) + + utils.barplot(data_set, figure_dir, outfile_nonincr_vs_incr, size, colors) + def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): num_bins = 2000 outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf" @@ -162,9 +175,6 @@ def paper_precision_graph(results_precision, filename, outdir): utils.scatter_plot(data, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size) - -# efficiency plots - def main(): projects = ["figlet", "chrony", "zstd"] results_efficiency = "result_efficiency_" @@ -191,7 +201,8 @@ def main(): print("Creating efficiency plots.") cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir) cummulative_distr_all4(efficiency_results, efficieny_filename, figures_dir) - # paper_efficiency_graphs(results_efficiency, filename, outdir, filterRelCLOC=True, filterDetectedChanges=False) + efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir) + # paper_efficiency_graphs(efficiency_results, efficieny_filename, figures_dir, filterRelCLOC=True, filterDetectedChanges=False) else: print("No efficiency results available.") @@ -203,4 +214,4 @@ def main(): else: print("No precision results available.") -main() \ No newline at end of file +main() diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index da5898922..c80282b19 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -173,17 +173,14 @@ def extract_precision_from_compare_log(log): precision = find_line(pattern, log) return {k: int(v) for k,v in precision.items()} if precision else None -def barplot(data_set): - df = pandas.DataFrame(data_set["data"], index=data_set["index"]) # TODO: index=analyzed_commits - df.sort_index(inplace=True, key=lambda idx: idx.map(lambda x: int(x.split(":")[0]))) - print(df) - df.to_csv('results.csv') - - df.plot.bar(rot=0, width=0.7, figsize=(25,10)) +def barplot(df, figure_dir, outfile, figsize=None, colors=None): + df.plot.bar(rot=0, width=0.7, figsize=figsize, color=colors) plt.xticks(rotation=45, ha='right', rotation_mode='anchor') plt.xlabel('Commit') plt.tight_layout() - plt.savefig("figure.pdf") + + outfile = os.path.join(figure_dir, outfile) + plt.savefig(outfile) def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetectedChanges=False): df=pandas.read_csv(result_csv_file, index_col='Commit', sep=";") From ddaabd72786d3ab57769421da0f52f2f82e2c5a3 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 11 Jul 2023 16:17:36 +0200 Subject: [PATCH 65/84] Interactive confs: set incremental.detect-renames to false. --- scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json | 1 + scripts/incremental/benchmarking/conf/chrony.json | 1 + scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json | 1 + scripts/incremental/benchmarking/conf/figlet.json | 1 + .../incremental/benchmarking/conf/zstd-race-incrpostsolver.json | 1 + scripts/incremental/benchmarking/conf/zstd-race.json | 1 + 6 files changed, 6 insertions(+) diff --git a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json index 8a97510dd..b87872714 100644 --- a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json +++ b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json @@ -104,6 +104,7 @@ "postsolver": { "enabled": true }, + "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/chrony.json b/scripts/incremental/benchmarking/conf/chrony.json index a2fe392e4..8cfa70f2f 100644 --- a/scripts/incremental/benchmarking/conf/chrony.json +++ b/scripts/incremental/benchmarking/conf/chrony.json @@ -104,6 +104,7 @@ "postsolver": { "enabled": false }, + "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json index 46ad26fce..68d3fee50 100644 --- a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json +++ b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json @@ -93,6 +93,7 @@ "postsolver": { "enabled": true }, + "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/figlet.json b/scripts/incremental/benchmarking/conf/figlet.json index 3e80b8ffe..0e93dc207 100644 --- a/scripts/incremental/benchmarking/conf/figlet.json +++ b/scripts/incremental/benchmarking/conf/figlet.json @@ -93,6 +93,7 @@ "postsolver": { "enabled": false }, + "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json index dbe858b98..4df9e9a2c 100644 --- a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json +++ b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json @@ -112,6 +112,7 @@ "postsolver": { "enabled": true }, + "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/zstd-race.json b/scripts/incremental/benchmarking/conf/zstd-race.json index b3c4a49ac..095596d25 100644 --- a/scripts/incremental/benchmarking/conf/zstd-race.json +++ b/scripts/incremental/benchmarking/conf/zstd-race.json @@ -112,6 +112,7 @@ "postsolver": { "enabled": false }, + "detect-renames": false, "restart": { "sided": { "enabled": false From e169eff9e24d8647f2697313ac0bca003e3eecc0 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 11 Jul 2023 16:55:52 +0200 Subject: [PATCH 66/84] Extract incremental analysis of child commit in precision script. --- scripts/incremental/benchmarking/precision.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index df92a6106..651f9b128 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -32,6 +32,7 @@ repo_name = project.repo_name build_compdb = project.build_compdb conf = project.conf_base +conf_incrpost = project.conf_incrpost begin = project.begin to = project.to diff_exclude = project.diff_exclude @@ -106,6 +107,15 @@ def find_sequences(): json.dump(seq_list, outfile, indent=4) return seq_list +def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, repo_path): + # analyze commit incrementally based on the previous commit and save run for comparison + # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') + out_incr = os.path.join(out_commit, out_dir_name) + os.makedirs(out_incr) + file_incremental_run = os.path.join(out_incr, compare_data_file) + add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run] + utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) + def analyze_series_in_repo(series): prev_commit = "" commit_num = 0 @@ -173,6 +183,8 @@ def analyze_series_in_repo(series): add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) + incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path) + if commit_num in compare_commits or commit_num == len(series) - 1: # compare stored data of original and incremental run # print('Compare both runs.') From 249ebf5ad6fdf31ddc784fd47f829186e4e0019a Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Wed, 12 Jul 2023 17:15:32 +0200 Subject: [PATCH 67/84] Exclude figlet from precision run. --- scripts/incremental/benchmarking/run_precision.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/incremental/benchmarking/run_precision.sh b/scripts/incremental/benchmarking/run_precision.sh index e94a3d580..c0f8032ee 100755 --- a/scripts/incremental/benchmarking/run_precision.sh +++ b/scripts/incremental/benchmarking/run_precision.sh @@ -4,9 +4,9 @@ ANALYZER_DIR=$1 #Number of cores to be used NCORES=$2 -echo "Starting run on figlet" -python3 precision.py $ANALYZER_DIR figlet $NCORES -mv result_precision result_precision_figlet +# echo "Starting run on figlet" +# python3 precision.py $ANALYZER_DIR figlet $NCORES +# mv result_precision result_precision_figlet echo "Starting run on zstd" python3 precision.py $ANALYZER_DIR zstd $NCORES From de4208b3698d9ce554b64f2cf8bea6f1d5bc7b18 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Wed, 12 Jul 2023 17:19:01 +0200 Subject: [PATCH 68/84] Precision script: fix config file look-up. --- scripts/incremental/benchmarking/precision.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index 651f9b128..077944e62 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -31,8 +31,9 @@ url = project.url repo_name = project.repo_name build_compdb = project.build_compdb -conf = project.conf_base -conf_incrpost = project.conf_incrpost +cwd = os.getcwd() +conf = os.path.join(cwd, project.conf_base + ".json") +conf_incrpost = os.path.join(cwd, project.conf_incrpost + ".json") begin = project.begin to = project.to diff_exclude = project.diff_exclude From bbb3e7d03dfc2609973bbb3d41e71bec6ca2351e Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 13 Jul 2023 10:57:08 +0200 Subject: [PATCH 69/84] Incremental efficiency script: Do not limit commits to 50 relevant LoC changes. --- scripts/incremental/benchmarking/efficiency.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/efficiency.py b/scripts/incremental/benchmarking/efficiency.py index 71424d853..873d00e62 100644 --- a/scripts/incremental/benchmarking/efficiency.py +++ b/scripts/incremental/benchmarking/efficiency.py @@ -46,7 +46,7 @@ # Project independent settings result_dir = os.path.join(os.getcwd(), 'result_efficiency') -maxCLOC = 50 # can be deactivated with None +maxCLOC = None # was 50; can be deactivated with None analyzer_dir = sys.argv[1] only_collect_results = False # can be turned on to collect results, if data collection was aborted before the creation of result tables ################################################################################ From 793496c5560a9fc5d73e19bcfd8d00b3b814761b Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 20 Jul 2023 11:05:07 +0200 Subject: [PATCH 70/84] Precision: run all configurations. --- scripts/incremental/benchmarking/precision.py | 21 +++++++++++-------- scripts/incremental/benchmarking/utils.py | 8 ++++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index 077944e62..addae366c 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -108,14 +108,16 @@ def find_sequences(): json.dump(seq_list, outfile, indent=4) return seq_list -def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, repo_path): +# returns the file where the incremental results are stored for comparison +def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, repo_path, conf, add_options): # analyze commit incrementally based on the previous commit and save run for comparison # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') out_incr = os.path.join(out_commit, out_dir_name) os.makedirs(out_incr) file_incremental_run = os.path.join(out_incr, compare_data_file) - add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run] + add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', '--set', 'save_run', file_incremental_run] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) + return file_incremental_run def analyze_series_in_repo(series): prev_commit = "" @@ -178,20 +180,21 @@ def analyze_series_in_repo(series): # analyze commit incrementally based on the previous commit and save run for comparison # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') - out_incr = os.path.join(out_commit, 'incr') - os.makedirs(out_incr) - file_incremental_run = os.path.join(out_incr, "compare-data-incr") - add_options = ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', 'incremental.reluctant.enabled', '--set', 'save_run', file_incremental_run] - utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) - incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path) + file_incr_run = incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path, conf, []) + file_incr_post_run = incremental_analyze(commit, out_commit, 'incr-post', "compare-data-incr-post", gr, repo_path, conf_incrpost, []) + reluctant_option = ['--enable', 'incremental.reluctant.enabled'] + file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', "compare-data-incr-post-rel", gr, repo_path, reluctant_option, []) if commit_num in compare_commits or commit_num == len(series) - 1: # compare stored data of original and incremental run # print('Compare both runs.') out_compare = os.path.join(out_commit, 'compare') os.makedirs(out_compare) - utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, conf, file_incremental_run, file_original_run) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "incr", conf, file_incr_run, file_original_run) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "incr_post", conf, file_incr_post_run, file_original_run) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "inr_rel_post", conf, file_incr_rel_post_run, file_original_run) + except utils.subprocess.CalledProcessError as e: print('Aborted because command ', e.cmd, 'failed.') diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index c80282b19..77643be9e 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -61,7 +61,9 @@ preparelog = "prepare.log" analyzerlog = "analyzer.log" -comparelog = "compare.log" + +def comparelog_with_suffix (suffix): + return "compare_" +suffix + ".log" def reset_incremental_data(incr_data_dir): if os.path.exists(incr_data_dir) and os.path.isdir(incr_data_dir): @@ -116,10 +118,10 @@ def append_to_repo_path(file): subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) outfile.close() -def compare_runs(analyzer_dir, dummy_c_file, outdir, conf, compare_data_1, compare_data_2): +def compare_runs(analyzer_dir, dummy_c_file, outdir, log_suffix, conf, compare_data_1, compare_data_2): options = ['--conf', conf, '--disable', 'warn.warning', '--disable', 'warn.race', '--disable', 'dbg.compare_runs.diff', '--disable', 'dbg.compare_runs.eqsys', '--enable', 'dbg.compare_runs.node', '--compare_runs', compare_data_1, compare_data_2] analyze_command = [os.path.join(analyzer_dir, 'goblint'), *options, dummy_c_file] - with open(os.path.join(outdir, comparelog), "w+") as outfile: + with open(os.path.join(outdir, comparelog_with_suffix(log_suffix)), "w+") as outfile: subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) outfile.close() From 2e50ca1bd8e724b7ec7575308daccf5b761ee253 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 20 Jul 2023 11:11:15 +0200 Subject: [PATCH 71/84] Fix parameters for increm rel with incr. postsolver. --- scripts/incremental/benchmarking/precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index addae366c..5969381d2 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -184,7 +184,7 @@ def analyze_series_in_repo(series): file_incr_run = incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path, conf, []) file_incr_post_run = incremental_analyze(commit, out_commit, 'incr-post', "compare-data-incr-post", gr, repo_path, conf_incrpost, []) reluctant_option = ['--enable', 'incremental.reluctant.enabled'] - file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', "compare-data-incr-post-rel", gr, repo_path, reluctant_option, []) + file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', "compare-data-incr-post-rel", gr, repo_path, conf_incrpost, reluctant_option) if commit_num in compare_commits or commit_num == len(series) - 1: # compare stored data of original and incremental run From 5c9d7e1e942d5ca38e1b7591f631087ecf3d7e44 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 20 Jul 2023 11:27:57 +0200 Subject: [PATCH 72/84] Incremental precision script: perform merge results for all configs. --- scripts/incremental/benchmarking/precision.py | 36 ++++++++++--------- scripts/incremental/benchmarking/utils.py | 1 + 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index 5969381d2..1a3fae852 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -191,9 +191,9 @@ def analyze_series_in_repo(series): # print('Compare both runs.') out_compare = os.path.join(out_commit, 'compare') os.makedirs(out_compare) - utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "incr", conf, file_incr_run, file_original_run) - utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "incr_post", conf, file_incr_post_run, file_original_run) - utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, "inr_rel_post", conf, file_incr_rel_post_run, file_original_run) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, utils.compare_runs_suffixes[0], conf, file_incr_run, file_original_run) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, utils.compare_runs_suffixes[1], conf, file_incr_post_run, file_original_run) + utils.compare_runs(analyzer_dir, dummy_c_file, out_compare, utils.compare_runs_suffixes[2], conf, file_incr_rel_post_run, file_original_run) except utils.subprocess.CalledProcessError as e: @@ -264,20 +264,22 @@ def merge_results(outfilename): relCLOC = 0 for i in filter(lambda x: x != "0", commits): ith_dir = os.path.join(outdir, i) - compare_log_path = os.path.join(ith_dir, "compare", utils.comparelog) - with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f: - relCLOC += json.load(f)["relCLOC"] - if int(i) in compare_commits: - if os.path.isdir(ith_dir) and os.path.exists(compare_log_path): - int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path) - int_prec[i]["relCLOC"] = relCLOC - if int_prec[i]["precision"]: - result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])} - result_sums[i]["number_of_commits"] += 1 - result_sums[i]["relCLOC"] += relCLOC - if int(i) != 0 and int(i) == len(commits) - 1: - if os.path.exists(compare_log_path): - final_prec = utils.extract_precision_from_compare_log(compare_log_path) + for suffix in utils.compare_runs_suffixes: + comparelog = utils.comparelog_with_suffix(suffix) + compare_log_path = os.path.join(ith_dir, "compare", comparelog) + with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f: + relCLOC += json.load(f)["relCLOC"] + if int(i) in compare_commits: + if os.path.isdir(ith_dir) and os.path.exists(compare_log_path): + int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path) + int_prec[i]["relCLOC"] = relCLOC + if int_prec[i]["precision"]: + result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])} + result_sums[i]["number_of_commits"] += 1 + result_sums[i]["relCLOC"] += relCLOC + if int(i) != 0 and int(i) == len(commits) - 1: + if os.path.exists(compare_log_path): + final_prec = utils.extract_precision_from_compare_log(compare_log_path) summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC} seq_summaries.append(summary) os.chdir(wd) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 77643be9e..c8afe2bc4 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -61,6 +61,7 @@ preparelog = "prepare.log" analyzerlog = "analyzer.log" +compare_runs_suffixes = ["incr", "incr_post", "incr_rel_post"] def comparelog_with_suffix (suffix): return "compare_" +suffix + ".log" From c7d9e15712bd5cba9b5ae2c77fce903ae5ece960 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 20 Jul 2023 13:24:06 +0200 Subject: [PATCH 73/84] Interactive precision: iterate over configurations in outer instead of inner loop. --- scripts/incremental/benchmarking/precision.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index 1a3fae852..688c0b81d 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -262,9 +262,10 @@ def merge_results(outfilename): int_prec = {str(i): {"precision": None, "relCLOC": None} for i in compare_commits} final_prec = None relCLOC = 0 - for i in filter(lambda x: x != "0", commits): - ith_dir = os.path.join(outdir, i) - for suffix in utils.compare_runs_suffixes: + + for suffix in utils.compare_runs_suffixes: + for i in filter(lambda x: x != "0", commits): + ith_dir = os.path.join(outdir, i) comparelog = utils.comparelog_with_suffix(suffix) compare_log_path = os.path.join(ith_dir, "compare", comparelog) with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f: @@ -280,9 +281,9 @@ def merge_results(outfilename): if int(i) != 0 and int(i) == len(commits) - 1: if os.path.exists(compare_log_path): final_prec = utils.extract_precision_from_compare_log(compare_log_path) - summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC} - seq_summaries.append(summary) - os.chdir(wd) + summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC} + seq_summaries.append(summary) + os.chdir(wd) result_avgs = {i: None for i in result_sums.keys()} for i, ps in result_sums.items(): if ps["number_of_commits"] != 0: From 54e6359961250e9d14f1702803c29650c7f37d07 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 20 Jul 2023 13:25:28 +0200 Subject: [PATCH 74/84] Use float instead of np.float as np.float has been removed. --- scripts/incremental/benchmarking/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index c8afe2bc4..55e5f9040 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -213,7 +213,7 @@ def create_cum_data(dataFrame, num_bins, relColumns): for c in relColumns: valuesc, basec = np.histogram(dataFrame.loc[:,c], bins=bins) base = basec - cum = np.cumsum(valuesc, dtype=np.float) + cum = np.cumsum(valuesc, dtype=float) cum[cum==0] = np.nan # If there is a tail of values that are the same, set the ones after its first occurrence to NaN. From 92af04787758915f32363ee1d39b4a03387001ad Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 20 Jul 2023 13:48:45 +0200 Subject: [PATCH 75/84] Plot separate efficiency graphs for <= 50 loc changes and > 50 loc changes. --- scripts/incremental/benchmarking/plot.py | 40 ++++++++++++++--------- scripts/incremental/benchmarking/utils.py | 5 +-- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 51cbf54cd..e10e9b54a 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -7,9 +7,9 @@ description_incr_post = "(3)" description_incr_rel ="(4)" -def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir): +def efficiency_bar_plot_all4(results_dir, changed_loc_filter, result_csv_filename, figure_dir): outfile_nonincr_vs_incr = "figure_bar.pgf" - df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) + df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) data_set = df[["Relevant changed LOC", utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]] data_set = data_set.rename(columns={utils.runtime_header_non_incr_child: description_non_incr, utils.runtime_header_incr_child: description_incr, utils.runtime_header_incr_posts_child: description_incr_post, utils.runtime_header_incr_posts_rel_child: description_incr_rel}) @@ -20,11 +20,11 @@ def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir): utils.barplot(data_set, figure_dir, outfile_nonincr_vs_incr, size, colors) -def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): +def cummulative_distr_compare2(results_dir, suffix, changed_loc_filter, result_csv_filename, figure_dir): num_bins = 2000 outfile_nonincr_vs_incr = "figure_cum_distr_incr.pdf" outfile_incr_vs_incrrel = "figure_cum_distr_rel.pdf" - df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) + df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_rel_child]) datanonincr = {"values": data[0], "label": description_non_incr} @@ -38,10 +38,12 @@ def cummulative_distr_compare2(results_dir, result_csv_filename, figure_dir): utils.cummulative_distr_plot([dataincr, datarelincr], base, figure_dir, outfile_incr_vs_incrrel, logscale=True) -def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): + + +def cummulative_distr_all4_filter(results_dir, suffix, changed_loc_filter, result_csv_filename, figure_dir): num_bins = 2000 - outfile_nonincr_vs_incr = "figure_cum_distr_all3.pdf" - df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), filterDetectedChanges=True) + outfile_nonincr_vs_incr = "figure_cum_distr_all3"+ suffix + ".pdf" + df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]) data_non_incr = {"values": data[0], "label": description_non_incr} @@ -50,8 +52,16 @@ def cummulative_distr_all4(results_dir, result_csv_filename, figure_dir): data_incr_rel = {"values": data[3], "label": description_incr_rel} utils.cummulative_distr_plot([data_non_incr, data_incr, data_incr_post, data_incr_rel], base, figure_dir, outfile_nonincr_vs_incr, figsize=(6,4), logscale=True) -def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): - df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) +def cummulative_distr_all4(results_dir, results_csv_filenmane, figure_dir): + greater_50 = lambda x : x > 50 + cummulative_distr_all4_filter(results_dir, "_greater_50_loc_changed", greater_50, results_csv_filenmane, figure_dir) + + leq_50 = lambda x : x <= 50 + cummulative_distr_all4_filter(results_dir, "_leq_50_loc_changed", leq_50, results_csv_filenmane, figure_dir) + + +def distribution_absdiff_plot(title, changed_loc_filter, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): + df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) # plot incremental vs non-incremental diff = df.loc[:,utils.runtime_header_non_incr_child] - df.loc[:,utils.runtime_header_incr_child] @@ -61,8 +71,8 @@ def distribution_absdiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N diff = df.loc[:,utils.runtime_header_incr_child] - df.loc[:,utils.runtime_header_incr_posts_rel_child] utils.hist_plot(diff, 2, title, 'Improvement in s (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_absdiff_distr_rel.pdf"), cutoffs_rel) -def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): - df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), filterDetectedChanges=True) +def distribution_reldiff_plot(title, changed_loc_filter, result_csv_filename, outdir, cutoffs_incr=None, cutoffs_rel=None): + df = utils.get_cleaned_filtered_data(os.path.join(outdir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) # plot incremental vs non-incremental print(df[utils.runtime_header_incr_child].astype('float')) @@ -73,8 +83,8 @@ def distribution_reldiff_plot(title, result_csv_filename, outdir, cutoffs_incr=N diff = 1 - df.loc[:,utils.runtime_header_incr_posts_rel_child] / df.loc[:,utils.runtime_header_incr_child] utils.hist_plot(diff, 0.005, title, 'Relative Improvement (reluctant compared to incremental)', 'Number of Commits', os.path.join(outdir, "figure_reldiff_distr_rel.pdf"), cutoffs_rel) -def paper_efficiency_graphs(dir_results, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False): - df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges) +def paper_efficiency_graphs(dir_results, changed_loc_filter, csv_filename, outdir, filterRelCLOC=False, filterDetectedChanges=False): + df = utils.get_cleaned_filtered_data(os.path.join(dir_results,csv_filename), changed_loc_filter, filterRelCLOC=filterRelCLOC, filterDetectedChanges=filterDetectedChanges) diff1 = 1 - df[utils.runtime_header_incr_child].astype('float') / df[utils.runtime_header_non_incr_child].astype('float') diff2 = 1 - df[utils.runtime_header_incr_posts_child].astype('float') / df[utils.runtime_header_incr_child].astype('float') diff3 = 1 - df[utils.runtime_header_incr_posts_rel_child].astype('float') / df[utils.runtime_header_incr_posts_child].astype('float') @@ -199,9 +209,9 @@ def main(): if os.path.exists(efficiency_results): efficieny_filename = "total_results.csv" print("Creating efficiency plots.") - cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir) + # cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir) cummulative_distr_all4(efficiency_results, efficieny_filename, figures_dir) - efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir) + # efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir) # paper_efficiency_graphs(efficiency_results, efficieny_filename, figures_dir, filterRelCLOC=True, filterDetectedChanges=False) else: print("No efficiency results available.") diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 55e5f9040..68d1ede9e 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -185,13 +185,14 @@ def barplot(df, figure_dir, outfile, figsize=None, colors=None): outfile = os.path.join(figure_dir, outfile) plt.savefig(outfile) -def get_cleaned_filtered_data(result_csv_file, filterRelCLOC=False, filterDetectedChanges=False): - df=pandas.read_csv(result_csv_file, index_col='Commit', sep=";") +def get_cleaned_filtered_data(result_csv_file, changed_loc_filter, filterRelCLOC=False, filterDetectedChanges=False): + df = pandas.read_csv(result_csv_file, index_col='Commit', sep=";") df = df.loc[:, ~df.columns.str.contains('^Unnamed')] # clean dataset (remove all rows for which any of the runtime entries is 0 which means that the respective analysis # run failed) df = df[(df[runtime_header_parent] != 0)] + df = df[changed_loc_filter(df["Relevant changed LOC"])] if filterRelCLOC: df = df[df["Relevant changed LOC"] > 0] if filterDetectedChanges: From c020267c1b6b2ab67d11b1e5631e3ac6bf9b2a20 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 20 Jul 2023 14:06:30 +0200 Subject: [PATCH 76/84] Efficiency plots: add efficiency_bar_plots again. --- scripts/incremental/benchmarking/plot.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index e10e9b54a..3ad39dbc5 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -7,7 +7,8 @@ description_incr_post = "(3)" description_incr_rel ="(4)" -def efficiency_bar_plot_all4(results_dir, changed_loc_filter, result_csv_filename, figure_dir): +def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir): + changed_loc_filter = lambda x : x >= 0 # no filtering outfile_nonincr_vs_incr = "figure_bar.pgf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) @@ -211,7 +212,7 @@ def main(): print("Creating efficiency plots.") # cummulative_distr_compare2(efficiency_results, efficieny_filename, figures_dir) cummulative_distr_all4(efficiency_results, efficieny_filename, figures_dir) - # efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir) + efficiency_bar_plot_all4(efficiency_results, efficieny_filename, figures_dir) # paper_efficiency_graphs(efficiency_results, efficieny_filename, figures_dir, filterRelCLOC=True, filterDetectedChanges=False) else: print("No efficiency results available.") From ca79b69e4f536fd5e585615983bf0b1394af68b9 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Thu, 20 Jul 2023 14:23:35 +0200 Subject: [PATCH 77/84] Interactive efficiency: plot cpu_time instead of wall time. --- scripts/incremental/benchmarking/plot.py | 6 +++--- scripts/incremental/benchmarking/utils.py | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 3ad39dbc5..51261fbd2 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -12,8 +12,8 @@ def efficiency_bar_plot_all4(results_dir, result_csv_filename, figure_dir): outfile_nonincr_vs_incr = "figure_bar.pgf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) - data_set = df[["Relevant changed LOC", utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]] - data_set = data_set.rename(columns={utils.runtime_header_non_incr_child: description_non_incr, utils.runtime_header_incr_child: description_incr, utils.runtime_header_incr_posts_child: description_incr_post, utils.runtime_header_incr_posts_rel_child: description_incr_rel}) + data_set = df[["Relevant changed LOC", utils.cpu_runtime_header_non_incr_child, utils.cpu_runtime_header_incr_child, utils.cpu_runtime_header_incr_posts_child, utils.cpu_runtime_header_incr_posts_rel_child]] + data_set = data_set.rename(columns={utils.cpu_runtime_header_non_incr_child: description_non_incr, utils.cpu_runtime_header_incr_child: description_incr, utils.cpu_runtime_header_incr_posts_child: description_incr_post, utils.cpu_runtime_header_incr_posts_rel_child: description_incr_rel}) colors = ["tab:olive", "tab:blue", "tab:orange", "tab:green", "tab:red"] textwidth = 7 @@ -46,7 +46,7 @@ def cummulative_distr_all4_filter(results_dir, suffix, changed_loc_filter, resul outfile_nonincr_vs_incr = "figure_cum_distr_all3"+ suffix + ".pdf" df = utils.get_cleaned_filtered_data(os.path.join(results_dir,result_csv_filename), changed_loc_filter, filterDetectedChanges=True) - data, base = utils.create_cum_data(df, num_bins, [utils.runtime_header_non_incr_child, utils.runtime_header_incr_child, utils.runtime_header_incr_posts_child, utils.runtime_header_incr_posts_rel_child]) + data, base = utils.create_cum_data(df, num_bins, [utils.cpu_runtime_header_non_incr_child, utils.cpu_runtime_header_incr_child, utils.cpu_runtime_header_incr_posts_child, utils.cpu_runtime_header_incr_posts_rel_child]) data_non_incr = {"values": data[0], "label": description_non_incr} data_incr = {"values": data[1], "label": description_incr} data_incr_post = {"values": data[2], "label": description_incr_post} diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 68d1ede9e..284346e72 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -47,6 +47,14 @@ runtime_header_incr_posts_child = runtime_prefix + header_incr_posts_child runtime_header_incr_posts_rel_child = runtime_prefix + header_incr_posts_rel_child +cpu_prefix = "CPU_" + +cpu_runtime_header_parent = cpu_prefix + runtime_header_parent +cpu_runtime_header_non_incr_child = cpu_prefix + runtime_header_non_incr_child +cpu_runtime_header_incr_child = cpu_prefix + runtime_header_incr_child +cpu_runtime_header_incr_posts_child = cpu_prefix + runtime_header_incr_posts_child +cpu_runtime_header_incr_posts_rel_child = cpu_prefix + runtime_header_incr_posts_rel_child + analysis_header_parent = analysis_prefix + header_parent analysis_header_non_incr_child = analysis_prefix + header_non_incr_child analysis_header_incr_child = analysis_prefix + header_incr_child From 086955ba0e5865aca6f60b1d57696b4c429244af Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sun, 23 Jul 2023 17:28:33 +0200 Subject: [PATCH 78/84] Add printouts for debugging. --- scripts/incremental/benchmarking/precision.py | 1 + scripts/incremental/benchmarking/utils.py | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index 688c0b81d..8b189d32e 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -114,6 +114,7 @@ def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') out_incr = os.path.join(out_commit, out_dir_name) os.makedirs(out_incr) + print("Creating directory" + out_incr) file_incremental_run = os.path.join(out_incr, compare_data_file) add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', '--set', 'save_run', file_incremental_run] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 284346e72..02b179bbd 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -125,6 +125,7 @@ def append_to_repo_path(file): # Run the analysis with open(os.path.join(outdir, analyzerlog), "w+") as outfile: subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) + print("Started run:\n" + analyze_command) outfile.close() def compare_runs(analyzer_dir, dummy_c_file, outdir, log_suffix, conf, compare_data_1, compare_data_2): From 02d9efc30a4b87ea622c94afdc9caa9eb34e8885 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sun, 23 Jul 2023 17:47:56 +0200 Subject: [PATCH 79/84] Remove spurious --enable from parameters passed in precision script. --- scripts/incremental/benchmarking/precision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index 8b189d32e..2c9e84710 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -116,7 +116,7 @@ def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, os.makedirs(out_incr) print("Creating directory" + out_incr) file_incremental_run = os.path.join(out_incr, compare_data_file) - add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--enable', '--set', 'save_run', file_incremental_run] + add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--set', 'save_run', file_incremental_run] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) return file_incremental_run From 8657739a9b722ef060a102b41099f4e5ee087cb4 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sun, 23 Jul 2023 18:33:29 +0200 Subject: [PATCH 80/84] Output precision results into different files, extend plot script to produce plots for each of them. --- scripts/incremental/benchmarking/plot.py | 12 +++-- scripts/incremental/benchmarking/precision.py | 53 ++++++++++--------- scripts/incremental/benchmarking/utils.py | 5 +- 3 files changed, 38 insertions(+), 32 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 51261fbd2..8b07c6341 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -156,7 +156,7 @@ def paper_precision_graph_box(results_precision, filename, outdir): utils.box_plot(data, x, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size) -def paper_precision_graph(results_precision, filename, outdir): +def paper_precision_graph(results_precision, filename, outdir, suffix): df = utils.get_data_from_json(os.path.join(results_precision, filename)) # Plot precision loss after x commits, where x is in {1, 2, 5, 10, 15} @@ -183,7 +183,8 @@ def paper_precision_graph(results_precision, filename, outdir): data.append((x,y)) halftextwidth = 3.3 size=(halftextwidth,halftextwidth*2/3) - utils.scatter_plot(data, "\# Commits", "Share of less precise program points", os.path.join(outdir, "precision_figure.pgf"), size) + outfile = os.path.join(outdir, "precision_figure" + suffix + ".pgf") + utils.scatter_plot(data, "\# Commits", "Share of less precise program points", outfile, size) def main(): @@ -219,9 +220,10 @@ def main(): # precision plot if os.path.exists(precision_results): - precision_filename = "results.json" - print("Creating precision plots.") - paper_precision_graph(precision_results, precision_filename, figures_dir) + for suffix in utils.compare_runs_suffixes: + precision_filename = utils.precision_result_file_name_with_suffix(suffix) + print("Creating precision plots for configuration:" + suffix) + paper_precision_graph(precision_results, precision_filename, figures_dir, suffix) else: print("No precision results available.") diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index 2c9e84710..41c224bf5 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -114,7 +114,6 @@ def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') out_incr = os.path.join(out_commit, out_dir_name) os.makedirs(out_incr) - print("Creating directory" + out_incr) file_incremental_run = os.path.join(out_incr, compare_data_file) add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--set', 'save_run', file_incremental_run] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) @@ -244,7 +243,7 @@ def analyze_seq_in_parallel(seq_list): p.join() -def merge_results(outfilename): +def merge_results(outfilename, suffix): wd = os.getcwd() seq_summaries = [] result_sums = {str(i): {"precpertotal": {"equal": 0, "moreprec": 0, "lessprec": 0, "incomp": 0, "total": 0}, "number_of_commits": 0, "relCLOC": 0} for i in compare_commits} @@ -264,27 +263,26 @@ def merge_results(outfilename): final_prec = None relCLOC = 0 - for suffix in utils.compare_runs_suffixes: - for i in filter(lambda x: x != "0", commits): - ith_dir = os.path.join(outdir, i) - comparelog = utils.comparelog_with_suffix(suffix) - compare_log_path = os.path.join(ith_dir, "compare", comparelog) - with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f: - relCLOC += json.load(f)["relCLOC"] - if int(i) in compare_commits: - if os.path.isdir(ith_dir) and os.path.exists(compare_log_path): - int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path) - int_prec[i]["relCLOC"] = relCLOC - if int_prec[i]["precision"]: - result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])} - result_sums[i]["number_of_commits"] += 1 - result_sums[i]["relCLOC"] += relCLOC - if int(i) != 0 and int(i) == len(commits) - 1: - if os.path.exists(compare_log_path): - final_prec = utils.extract_precision_from_compare_log(compare_log_path) - summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC} - seq_summaries.append(summary) - os.chdir(wd) + comparelog = utils.comparelog_with_suffix(suffix) + for i in filter(lambda x: x != "0", commits): + ith_dir = os.path.join(outdir, i) + compare_log_path = os.path.join(ith_dir, "compare", comparelog) + with open(os.path.join(outdir, i, "commit_properties.log"), "r") as f: + relCLOC += json.load(f)["relCLOC"] + if int(i) in compare_commits: + if os.path.isdir(ith_dir) and os.path.exists(compare_log_path): + int_prec[i]["precision"] = utils.extract_precision_from_compare_log(compare_log_path) + int_prec[i]["relCLOC"] = relCLOC + if int_prec[i]["precision"]: + result_sums[i]["precpertotal"] = {k: result_sums[i]["precpertotal"].get(k, 0) + (int_prec[i]["precision"].get(k, 0) / int_prec[i]["precision"]["total"]) for k in set(result_sums[i]["precpertotal"])} + result_sums[i]["number_of_commits"] += 1 + result_sums[i]["relCLOC"] += relCLOC + if int(i) != 0 and int(i) == len(commits) - 1: + if os.path.exists(compare_log_path): + final_prec = utils.extract_precision_from_compare_log(compare_log_path) + summary = {"name": os.path.basename(s), "sequence": seq, "length": len(seq), "intermediate precision": int_prec, "final precision": final_prec, "finalRelCLOC": relCLOC} + seq_summaries.append(summary) + os.chdir(wd) result_avgs = {i: None for i in result_sums.keys()} for i, ps in result_sums.items(): if ps["number_of_commits"] != 0: @@ -295,6 +293,11 @@ def merge_results(outfilename): json.dump(res, f, indent=4) res +def merge_all_results(): + print("\nmerge results") + for suffix in utils.compare_runs_suffixes: + results_filename = utils.precision_result_file_name_with_suffix(suffix) + merge_results(results_filename, suffix) if not only_collect_results: os.mkdir(res_dir) @@ -307,6 +310,4 @@ def merge_results(outfilename): print("\nanalyze sequences in parallel") analyze_seq_in_parallel(seq_list) -print("\nmerge results") -results_filename = "results.json" -merge_results(results_filename) +merge_all_results() diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 02b179bbd..ca2033ce8 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -125,7 +125,6 @@ def append_to_repo_path(file): # Run the analysis with open(os.path.join(outdir, analyzerlog), "w+") as outfile: subprocess.run(analyze_command, check=True, stdout=outfile, stderr=subprocess.STDOUT) - print("Started run:\n" + analyze_command) outfile.close() def compare_runs(analyzer_dir, dummy_c_file, outdir, log_suffix, conf, compare_data_1, compare_data_2): @@ -185,6 +184,10 @@ def extract_precision_from_compare_log(log): precision = find_line(pattern, log) return {k: int(v) for k,v in precision.items()} if precision else None +def precision_result_file_name_with_suffix(suffix): + result_file_name = "results" + suffix + ".json" + return result_file_name + def barplot(df, figure_dir, outfile, figsize=None, colors=None): df.plot.bar(rot=0, width=0.7, figsize=figsize, color=colors) plt.xticks(rotation=45, ha='right', rotation_mode='anchor') From 26fc662ac398f14c039404dddd9529fb86ed774a Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sun, 23 Jul 2023 18:44:45 +0200 Subject: [PATCH 81/84] Add underscore in file names of results produced by precision script and the precision plots. --- scripts/incremental/benchmarking/plot.py | 2 +- scripts/incremental/benchmarking/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/incremental/benchmarking/plot.py b/scripts/incremental/benchmarking/plot.py index 8b07c6341..ede93d8b7 100644 --- a/scripts/incremental/benchmarking/plot.py +++ b/scripts/incremental/benchmarking/plot.py @@ -183,7 +183,7 @@ def paper_precision_graph(results_precision, filename, outdir, suffix): data.append((x,y)) halftextwidth = 3.3 size=(halftextwidth,halftextwidth*2/3) - outfile = os.path.join(outdir, "precision_figure" + suffix + ".pgf") + outfile = os.path.join(outdir, "precision_figure_" + suffix + ".pgf") utils.scatter_plot(data, "\# Commits", "Share of less precise program points", outfile, size) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index ca2033ce8..235dd484b 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -185,7 +185,7 @@ def extract_precision_from_compare_log(log): return {k: int(v) for k,v in precision.items()} if precision else None def precision_result_file_name_with_suffix(suffix): - result_file_name = "results" + suffix + ".json" + result_file_name = "results_" + suffix + ".json" return result_file_name def barplot(df, figure_dir, outfile, figsize=None, colors=None): From 20571d34f44240fa5e39e5a026b67c587a770731 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Tue, 25 Jul 2023 10:25:09 +0200 Subject: [PATCH 82/84] Precision script: explicitely pass load-dir/save-dir for incremental data. --- scripts/incremental/benchmarking/precision.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/scripts/incremental/benchmarking/precision.py b/scripts/incremental/benchmarking/precision.py index 41c224bf5..cce567ebb 100644 --- a/scripts/incremental/benchmarking/precision.py +++ b/scripts/incremental/benchmarking/precision.py @@ -109,13 +109,13 @@ def find_sequences(): return seq_list # returns the file where the incremental results are stored for comparison -def incremental_analyze(commit, out_commit, out_dir_name, compare_data_file, gr, repo_path, conf, add_options): +def incremental_analyze(commit, out_commit, out_dir_name, incremental_dir, compare_data_file, gr, repo_path, conf, add_options): # analyze commit incrementally based on the previous commit and save run for comparison # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') out_incr = os.path.join(out_commit, out_dir_name) os.makedirs(out_incr) file_incremental_run = os.path.join(out_incr, compare_data_file) - add_options = add_options + ['--enable', 'incremental.load', '--enable', 'incremental.save', '--set', 'save_run', file_incremental_run] + add_options = add_options + ['--set','incremental.load-dir', incremental_dir, '--set','incremental.save-dir', incremental_dir, '--enable', 'incremental.load', '--enable', 'incremental.save', '--set', 'save_run', file_incremental_run] utils.analyze_commit(analyzer_dir, gr, repo_path, build_compdb, commit.hash, out_incr, conf, add_options, files) return file_incremental_run @@ -124,6 +124,13 @@ def analyze_series_in_repo(series): commit_num = 0 repo_path = os.path.abspath(repo_name) out_dir = os.path.abspath('out') + + incremental_data = "incremental_data_" + incr_data_dir = os.path.abspath(incremental_data + "incr") + incr_post_data_dir = os.path.abspath(incremental_data + "incr_post") + incr_post_rel_data_dir = os.path.abspath(incremental_data + "incr_post_rel") + + with open('sequence.json', 'w') as outfile: json.dump(series, outfile, indent=4) dummy_c_file = "file.c" @@ -181,10 +188,10 @@ def analyze_series_in_repo(series): # analyze commit incrementally based on the previous commit and save run for comparison # print('Analyze', str(commit.hash), 'incrementally (#', commit_num, ').') - file_incr_run = incremental_analyze(commit, out_commit, 'incr', "compare-data-incr", gr, repo_path, conf, []) - file_incr_post_run = incremental_analyze(commit, out_commit, 'incr-post', "compare-data-incr-post", gr, repo_path, conf_incrpost, []) + file_incr_run = incremental_analyze(commit, out_commit, 'incr', incr_data_dir, "compare-data-incr", gr, repo_path, conf, []) + file_incr_post_run = incremental_analyze(commit, out_commit, 'incr-post', incr_post_data_dir, "compare-data-incr-post", gr, repo_path, conf_incrpost, []) reluctant_option = ['--enable', 'incremental.reluctant.enabled'] - file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', "compare-data-incr-post-rel", gr, repo_path, conf_incrpost, reluctant_option) + file_incr_rel_post_run = incremental_analyze(commit, out_commit, 'incr-post-rel', incr_post_rel_data_dir, "compare-data-incr-post-rel", gr, repo_path, conf_incrpost, reluctant_option) if commit_num in compare_commits or commit_num == len(series) - 1: # compare stored data of original and incremental run From 41d5558c1b5134f65a574583018e16aa7283c146 Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Fri, 28 Jul 2023 10:00:58 +0200 Subject: [PATCH 83/84] Remove detect-renames field for compatibility with sttt-2022 goblint. --- scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json | 1 - scripts/incremental/benchmarking/conf/chrony.json | 1 - scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json | 1 - scripts/incremental/benchmarking/conf/figlet.json | 1 - .../incremental/benchmarking/conf/zstd-race-incrpostsolver.json | 1 - scripts/incremental/benchmarking/conf/zstd-race.json | 1 - 6 files changed, 6 deletions(-) diff --git a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json index b87872714..8a97510dd 100644 --- a/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json +++ b/scripts/incremental/benchmarking/conf/chrony-incrpostsolver.json @@ -104,7 +104,6 @@ "postsolver": { "enabled": true }, - "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/chrony.json b/scripts/incremental/benchmarking/conf/chrony.json index 8cfa70f2f..a2fe392e4 100644 --- a/scripts/incremental/benchmarking/conf/chrony.json +++ b/scripts/incremental/benchmarking/conf/chrony.json @@ -104,7 +104,6 @@ "postsolver": { "enabled": false }, - "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json index 68d3fee50..46ad26fce 100644 --- a/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json +++ b/scripts/incremental/benchmarking/conf/figlet-incrpostsolver.json @@ -93,7 +93,6 @@ "postsolver": { "enabled": true }, - "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/figlet.json b/scripts/incremental/benchmarking/conf/figlet.json index 0e93dc207..3e80b8ffe 100644 --- a/scripts/incremental/benchmarking/conf/figlet.json +++ b/scripts/incremental/benchmarking/conf/figlet.json @@ -93,7 +93,6 @@ "postsolver": { "enabled": false }, - "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json index 4df9e9a2c..dbe858b98 100644 --- a/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json +++ b/scripts/incremental/benchmarking/conf/zstd-race-incrpostsolver.json @@ -112,7 +112,6 @@ "postsolver": { "enabled": true }, - "detect-renames": false, "restart": { "sided": { "enabled": false diff --git a/scripts/incremental/benchmarking/conf/zstd-race.json b/scripts/incremental/benchmarking/conf/zstd-race.json index 095596d25..b3c4a49ac 100644 --- a/scripts/incremental/benchmarking/conf/zstd-race.json +++ b/scripts/incremental/benchmarking/conf/zstd-race.json @@ -112,7 +112,6 @@ "postsolver": { "enabled": false }, - "detect-renames": false, "restart": { "sided": { "enabled": false From 6158b17ed1d5f75ccee2ad0460a88a42f9fb275d Mon Sep 17 00:00:00 2001 From: Julian Erhard Date: Sun, 30 Jul 2023 11:14:04 +0200 Subject: [PATCH 84/84] Adapt script to extract changed functions from goblint versino sttt-2022. --- scripts/incremental/benchmarking/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/incremental/benchmarking/utils.py b/scripts/incremental/benchmarking/utils.py index 235dd484b..cb29a652d 100644 --- a/scripts/incremental/benchmarking/utils.py +++ b/scripts/incremental/benchmarking/utils.py @@ -165,7 +165,7 @@ def extract_from_analyzer_log(log): runtime_pattern = 'Default[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' analysis_time_pattern = 'analysis[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' solving_time_pattern = 'solving[ ]+(?P[0-9\.]+)s[ ]+(?P[0-9\.]+)s' - change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*) \\(with unchangedHeader = (?P[0-9]*)\\); added = (?P[0-9]*); removed = (?P[0-9]*) }' + change_info_pattern = 'change_info = { unchanged = (?P[0-9]*); changed = (?P[0-9]*); added = (?P[0-9]*); removed = (?P[0-9]*) }' runtime = find_line(runtime_pattern, log)