Skip to content

chore: EXC: Heap benchmarks (take 3) #4801

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 12 commits into
base: master
Choose a base branch
from
1,472 changes: 1,472 additions & 0 deletions 3cc2d870fb.log

Large diffs are not rendered by default.

192 changes: 192 additions & 0 deletions EMBEDDERS_HEAP.min

Large diffs are not rendered by default.

1,386 changes: 1,386 additions & 0 deletions b52f12533a.log

Large diffs are not rendered by default.

1,439 changes: 1,439 additions & 0 deletions e4fba84ca1.log

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions rs/embedders/benches/heap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@ enum Step {
#[strum(serialize = "step_8")]
Small = 8,
#[strum(serialize = "step_4kb")]
Page = 4096,
Page = PAGE_SIZE as isize,
#[strum(serialize = "step_16kb")]
FourPages = 16384,
FourPages = 4 * PAGE_SIZE as isize,
#[strum(serialize = "step_2mb")]
HugePage = 512 * PAGE_SIZE as isize,
}

#[derive(Copy, Clone, Display, EnumIter)]
Expand Down Expand Up @@ -111,7 +113,7 @@ fn loop_body(op: &str, mem: Mem, dir: Dir, size: Size, step: Step) -> String {
(loop $loop
(local.set $address (i{mem}.sub (local.get $address) (i{mem}.const {step})))
{op}
(br_if $loop (i{mem}.gt_s (local.get $address) (i{mem}.const 0)))
(br_if $loop (i{mem}.ge_s (local.get $address) (i{mem}.const {step})))
)
"#
),
Expand Down
9 changes: 9 additions & 0 deletions rs/embedders/src/wasmtime_embedder/host_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,15 @@ impl MmapMemory {
size_in_bytes,
Error::last_os_error()
);
// SAFETY: the memory region was just successfully mapped.
// Enable Transparent Huge Pages (THP) for the newly allocated memory.
#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
unsafe {
use nix::sys::mman::{madvise, MmapAdvise};
madvise(start, size_in_bytes, MmapAdvise::MADV_HUGEPAGE).unwrap_or_else(|err| {
eprintln!("[EXC-BUG] Error in `madvise` addr:{start:?} len:{size_in_bytes}: {err}")
});
}

// SAFETY: The allocated region includes the prologue guard region.
let wasm_memory =
Expand Down
336 changes: 192 additions & 144 deletions rs/execution_environment/benches/baseline/EMBEDDERS_HEAP.min

Large diffs are not rendered by default.

24 changes: 12 additions & 12 deletions rs/execution_environment/benches/run-all-benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,18 @@ run() {
}

for i in $(seq 1 "${REPEAT}"); do
run "${i}" "Embedders Compilation" \
"//rs/embedders:compilation_bench" "EMBEDDERS_COMPILATION.min"
# run "${i}" "Embedders Compilation" \
# "//rs/embedders:compilation_bench" "EMBEDDERS_COMPILATION.min"
run "${i}" "Embedders Heap" \
"//rs/embedders:heap_bench" "EMBEDDERS_HEAP.min"
run "${i}" "Embedders Stable Memory" \
"//rs/embedders:stable_memory_bench" "EMBEDDERS_STABLE_MEMORY.min"
run "${i}" "System API Inspect Message" \
"//rs/execution_environment:execute_inspect_message_bench" "SYSTEM_API_INSPECT_MESSAGE.min"
run "${i}" "System API Query" \
"//rs/execution_environment:execute_query_bench" "SYSTEM_API_QUERY.min"
run "${i}" "System API Update" \
"//rs/execution_environment:execute_update_bench" "SYSTEM_API_UPDATE.min"
run "${i}" "Wasm Instructions" \
"//rs/execution_environment:wasm_instructions_bench" "WASM_INSTRUCTIONS.min"
# run "${i}" "Embedders Stable Memory" \
# "//rs/embedders:stable_memory_bench" "EMBEDDERS_STABLE_MEMORY.min"
# run "${i}" "System API Inspect Message" \
# "//rs/execution_environment:execute_inspect_message_bench" "SYSTEM_API_INSPECT_MESSAGE.min"
# run "${i}" "System API Query" \
# "//rs/execution_environment:execute_query_bench" "SYSTEM_API_QUERY.min"
# run "${i}" "System API Update" \
# "//rs/execution_environment:execute_update_bench" "SYSTEM_API_UPDATE.min"
# run "${i}" "Wasm Instructions" \
# "//rs/execution_environment:wasm_instructions_bench" "WASM_INSTRUCTIONS.min"
done
65 changes: 45 additions & 20 deletions rs/execution_environment/benches/summarize-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ set -ue
DEPENDENCIES="awk rg sed"
which ${DEPENDENCIES} >/dev/null || (echo "Error checking dependencies: ${DEPENDENCIES}" >&2 && exit 1)

NOISE_THRESHOLD_PCT="2"
TOP_N="10"

printf " %-12s := %s\n" \
"MIN_FILE" "${MIN_FILE:=${0##*/}.min}" \
"BASELINE_DIR" "${BASELINE_DIR:=${0%/*}/baseline}" >&2
Expand All @@ -24,13 +27,20 @@ if [ ! -s "${BASELINE_FILE}" ]; then
echo " No baseline found in ${BASELINE_FILE}" >&2 && exit 1
fi

echo_diff() {
echo_diff_ms_pct() {
# The baseline file exists, but none of the benchmarks matched it.
if [ "${1}" -gt "0" ]; then
diff=$(((${2} - ${1}) * 100 * 10 / ${1}))
awk "BEGIN { print (${diff})^2 <= (2 * 10)^2 ? 0 : ${diff} / 10 }"
awk "BEGIN {
diff_pct = (${2} - ${1}) * 100 / ${1}
diff_ms = (${2} - ${1}) / 1000 / 1000
if (diff_pct ^ 2 <= ${NOISE_THRESHOLD_PCT} ^ 2) {
printf \"0 0\n\"
} else {
printf \"%.1f %.1f\n\", diff_ms, diff_pct
};
}"
else
echo "0"
echo "0 0"
fi
}

Expand All @@ -45,39 +55,54 @@ while read min_bench; do
name="${name% ... bench:*}"
new_result_ns="${min_bench#* ... bench: }"
new_result_ns="${new_result_ns% ns/iter*}"
total_new_ns=$((total_new_ns + new_result_ns))

baseline_bench=$(rg -F "test ${name} ... bench:" "${BASELINE_FILE}" || true)
baseline_result_ns="${baseline_bench#* ... bench: }"
baseline_result_ns="${baseline_result_ns% ns/iter*}"

if [ -n "${new_result_ns}" -a -n "${baseline_result_ns}" ]; then
total_baseline_ns=$((total_baseline_ns + baseline_result_ns))
total_new_ns=$((total_new_ns + new_result_ns))
echo "$(echo_diff "${baseline_result_ns}" "${new_result_ns}") ${name}" >>"${TMP_FILE}"
diff_ms_pct=$(echo_diff_ms_pct "${baseline_result_ns}" "${new_result_ns}")
echo "${diff_ms_pct} ${name}" >>"${TMP_FILE}"
fi
done <"${MIN_FILE}"

# Produce a summary.
baseline_commit=$(git rev-list --abbrev-commit -1 HEAD "${BASELINE_FILE}")
min_commit=$(git rev-list --abbrev-commit -1 HEAD)
total_diff=$(echo_diff "${total_baseline_ns}" "${total_new_ns}")
read total_diff_ms total_diff_pct < <(echo_diff_ms_pct "${total_baseline_ns}" "${total_new_ns}")
printf "= ${baseline_commit}..${min_commit}: ${NAME} total time: $((total_new_ns / 1000 / 1000)) ms "
case "${total_diff}" in
0) echo "(no change)" ;;
-*) echo "(improved by ${total_diff}%)" ;;
*) echo "(regressed by ${total_diff}%)" ;;
case "${total_diff_pct}/${total_baseline_ns}" in
0/0) echo "(new)" ;;
0*) echo "(no change)" ;;
-*) echo "(improved by ${total_diff_ms} ms / ${total_diff_pct}%)" ;;
*) echo "(regressed by ${total_diff_ms} ms / ${total_diff_pct}%)" ;;
esac

# Produce top regressed/improved details.
if [ "${total_diff}" != "0" ]; then
cat "${TMP_FILE}" | sort -rn | rg '^[1-9]' | head -5 | while read diff name; do
echo " + ${name} time regressed by ${diff}%"
done
cat "${TMP_FILE}" | sort -n | rg '^-' | head -5 | while read diff name; do
echo " - ${name} time improved by ${diff}%"
done
if [ "${total_diff_pct}" != "0" ]; then
echo " Top ${TOP_N} by time:"
cat "${TMP_FILE}" | sort -rn | rg '^[1-9]' | head -${TOP_N} \
| while read diff_ms diff_pct name; do
echo " + ${name} time regressed by ${diff_ms} ms (${diff_pct}%)"
done
cat "${TMP_FILE}" | sort -n | rg '^-' | head -${TOP_N} \
| while read diff_ms diff_pct name; do
echo " - ${name} time improved by ${diff_ms} ms (${diff_pct}%)"
done
echo " Top ${TOP_N} by percentage:"
cat "${TMP_FILE}" | sort -rnk 2 | rg '^[1-9]' | head -${TOP_N} \
| while read diff_ms diff_pct name; do
echo " + ${name} time regressed by ${diff_pct}% (${diff_ms} ms)"
done
cat "${TMP_FILE}" | sort -nk 2 | rg '^-' | head -${TOP_N} \
| while read diff_ms diff_pct name; do
echo " - ${name} time improved by ${diff_pct}% (${diff_ms} ms)"
done
fi
rm -f "${TMP_FILE}"

# Return an error if there are changes, so the calling script might retry or report an error.
[ "${total_diff}" == "0" ]
# Return an error if there are changes or the is no baseline (new benchmarks),
# so the calling script might retry or report an error.
[ "${total_diff_pct}" == "0" -a "${total_baseline_ns}" != "0" ]
78 changes: 76 additions & 2 deletions rs/memory_tracker/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use ic_replicated_state::{
page_map::{FileDescriptor, MemoryInstructions},
PageIndex, PageMap,
};
use ic_sys::PAGE_SIZE;
use ic_sys::{HUGE_PAGE_SIZE, PAGE_SIZE};
use ic_types::{NumBytes, NumOsPages};
use nix::{
errno::Errno,
Expand All @@ -20,7 +20,13 @@ use std::{
// checkpoint file per signal handler call. Higher value gives higher
// throughput in memory intensive workloads, but may regress performance
// in other workloads because it increases work per signal handler call.
const MAX_PAGES_TO_MAP: usize = 128;
//
// The Transparent Huge Pages (THP) are enabled for the entire memory region
// (refer to `MmapMemory::new()`). To use them, we must prefetch large blocks,
// at least 2 MiB.
//
// The number of normal pages within a 2 MiB huge page is 2 MiB / 4 KiB = 512.
const MAX_PAGES_TO_MAP: usize = 512;

// The new signal handler requires `AccessKind` which currently available only
// on Linux without WSL.
Expand Down Expand Up @@ -408,6 +414,64 @@ impl SigsegvMemoryTracker {
self.accessed_bitmap.borrow().page_range()
}

/// Converts a range of page indices into a corresponding range of memory addresses.
fn addr_range_from(&self, range: &Range<PageIndex>) -> Range<usize> {
let start_addr = self.page_start_addr_from(range.start) as usize;
let end_addr = self.page_start_addr_from(range.end) as usize;
start_addr..end_addr
}

/// Attempts to shrink the given page index range to be aligned with
/// huge page boundaries.
///
/// If aligning either the start or end of the range would exclude
/// the `faulting_page`, that side of the range will remain unaligned
/// to ensure the faulting page stays within the adjusted range.
fn try_align_to_hugepage(
&self,
faulting_page: PageIndex,
range: Range<PageIndex>,
) -> Range<PageIndex> {
debug_assert!(
range.contains(&faulting_page),
"Error checking page:{faulting_page} ∈ range:{range:?}"
);
let hugepage_mask = HUGE_PAGE_SIZE - 1;
let faulting_addr = self.page_start_addr_from(faulting_page) as usize;
let Range {
start: start_addr,
end: end_addr,
} = self.addr_range_from(&range);
// Try to align the range start address up to the nearest Huge Page.
let aligned_start_addr = (start_addr + hugepage_mask) & !hugepage_mask;
let start_idx = if faulting_addr >= aligned_start_addr {
self.page_index_from(aligned_start_addr as *mut libc::c_void)
} else {
range.start
};
// Try to align the range end address down to the nearest Huge Page.
let aligned_end_addr = end_addr & !hugepage_mask;
let end_idx = if faulting_addr < aligned_end_addr {
self.page_index_from(aligned_end_addr as *mut libc::c_void)
} else {
range.end
};

let aligned_range = start_idx..end_idx;
debug_assert!(
aligned_range.contains(&faulting_page),
"Error checking page:{faulting_page} ∈ aligned range:{aligned_range:?}"
);
debug_assert!(
aligned_range.start >= range.start && aligned_range.end <= range.end,
"Error checking aligned range:{:?} ⊆ original range:{:?}",
aligned_range,
range
);

aligned_range
}

fn add_dirty_pages(&self, dirty_page: PageIndex, prefetched_range: Range<PageIndex>) {
let range = prefetched_range.start.get() as usize..prefetched_range.end.get() as usize;
let mut dirty_pages = self.dirty_pages.borrow_mut();
Expand Down Expand Up @@ -647,8 +711,12 @@ pub fn sigsegv_fault_handler_new(
range_from_count(faulting_page, NumOsPages::new(MAX_PAGES_TO_MAP as u64));
let max_prefetch_range =
accessed_bitmap.restrict_range_to_unmarked(faulting_page, prefetch_range);
let max_prefetch_range =
tracker.try_align_to_hugepage(faulting_page, max_prefetch_range);
let min_prefetch_range = accessed_bitmap
.restrict_range_to_predicted(faulting_page, max_prefetch_range.clone());
let min_prefetch_range =
tracker.try_align_to_hugepage(faulting_page, min_prefetch_range);
let prefetch_range = map_unaccessed_pages(
tracker,
ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
Expand All @@ -664,8 +732,12 @@ pub fn sigsegv_fault_handler_new(
range_from_count(faulting_page, NumOsPages::new(MAX_PAGES_TO_MAP as u64));
let max_prefetch_range =
accessed_bitmap.restrict_range_to_unmarked(faulting_page, prefetch_range);
let max_prefetch_range =
tracker.try_align_to_hugepage(faulting_page, max_prefetch_range);
let min_prefetch_range = accessed_bitmap
.restrict_range_to_predicted(faulting_page, max_prefetch_range.clone());
let min_prefetch_range =
tracker.try_align_to_hugepage(faulting_page, min_prefetch_range);
let prefetch_range = map_unaccessed_pages(
tracker,
ProtFlags::PROT_READ,
Expand Down Expand Up @@ -694,6 +766,7 @@ pub fn sigsegv_fault_handler_new(
// Amortize the prefetch work based on the previously written pages.
let prefetch_range =
dirty_bitmap.restrict_range_to_predicted(faulting_page, prefetch_range);
let prefetch_range = tracker.try_align_to_hugepage(faulting_page, prefetch_range);
let page_start_addr = tracker.page_start_addr_from(prefetch_range.start);
unsafe {
mprotect(
Expand Down Expand Up @@ -725,6 +798,7 @@ pub fn sigsegv_fault_handler_new(
// Amortize the prefetch work based on the previously written pages.
let prefetch_range =
dirty_bitmap.restrict_range_to_predicted(faulting_page, prefetch_range);
let prefetch_range = tracker.try_align_to_hugepage(faulting_page, prefetch_range);
let prefetch_range = map_unaccessed_pages(
tracker,
ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
Expand Down
Loading