mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-22 17:55:57 +01:00
bst vs heap: move in fully from cpp-cheat
This commit is contained in:
87
README.adoc
87
README.adoc
@@ -975,7 +975,7 @@ This setup:
|
||||
+
|
||||
--
|
||||
** can run most examples, including those for other CPU architectures, with the notable exception of examples that rely on kernel modules
|
||||
** can run reproducible approximate performance experiments with gem5, see e.g. <<bst-vs-heap>>
|
||||
** can run reproducible approximate performance experiments with gem5, see e.g. <<bst-vs-heap-vs-hashmap>>
|
||||
--
|
||||
* from full system simulation as shown at: <<qemu-buildroot-setup-getting-started>>.
|
||||
+
|
||||
@@ -9961,7 +9961,7 @@ Now you can play a fun little game with your friends:
|
||||
* make a program that solves the computation problem, and outputs output to stdout
|
||||
* write the code that runs the correct computation in the smallest number of cycles possible
|
||||
|
||||
To find out why your program is slow, a good first step is to have a look at <<stats-txt>> file.
|
||||
To find out why your program is slow, a good first step is to have a look at <<gem5-stats-txt>> file.
|
||||
|
||||
==== Skip extra benchmark instructions
|
||||
|
||||
@@ -10210,36 +10210,79 @@ Buildroot built-in libraries, mostly under Libraries > Other:
|
||||
|
||||
There are not yet enabled, but it should be easy to so, see: <<add-new-buildroot-packages>>
|
||||
|
||||
===== BST vs heap
|
||||
===== BST vs heap vs hashmap
|
||||
|
||||
https://stackoverflow.com/questions/6147242/heap-vs-binary-search-tree-bst/29548834#29548834
|
||||
The following benchmark setup works both:
|
||||
|
||||
First we build it with <<m5ops-instructions>> enabled, and then we extract the stats:
|
||||
* on host through timers + link:https://stackoverflow.com/questions/51952471/why-do-i-get-a-constant-instead-of-logarithmic-curve-for-an-insert-time-benchmar/51953081#51953081[granule]
|
||||
* gem5 with <<m5ops-instructions,dumpstats>>, which can get more precise results with `granule == 1`
|
||||
|
||||
It has been used to answer:
|
||||
|
||||
* BST vs heap: https://stackoverflow.com/questions/6147243/heap-vs-binary-search-tree-bst/29548834#29548834
|
||||
* `std::set`: https://stackoverflow.com/questions/2558153/what-is-the-underlying-data-structure-of-a-stl-set-in-c/51944661#51944661
|
||||
* `std::map`: https://stackoverflow.com/questions/18414579/what-data-structure-is-inside-stdmap-in-c/51945119#51945119
|
||||
|
||||
To benchmark on the host, we do:
|
||||
|
||||
....
|
||||
./build-userland-in-tree --force-rebuild --optimization-level 3 ./userland/cpp/bst_vs_heap_vs_hashmap.cpp
|
||||
./userland/cpp/bst_vs_heap_vs_hashmap.out | tee bst_vs_heap_vs_hashmap.dat
|
||||
gnuplot \
|
||||
-e 'input_noext="bst_vs_heap_vs_hashmap"' \
|
||||
-e 'heap_zoom_max=50' \
|
||||
-e 'hashmap_zoom_max=400' \
|
||||
./bst-vs-heap-vs-hashmap.gnuplot \
|
||||
;
|
||||
xdg-open bst_vs_heap_vs_hashmap.tmp.png
|
||||
....
|
||||
|
||||
The parameters `heap_zoom_max` and `hashmap_zoom_max` are chosen manually interactively to best showcase the regions of interest in those plots.
|
||||
|
||||
First we build the benchmark with <<m5ops-instructions>> enabled, and then we run it and extract the stats:
|
||||
|
||||
....
|
||||
./build-userland \
|
||||
--arch aarch64 \
|
||||
--arch x86_64 \
|
||||
--ccflags='-DLKMC_M5OPS_ENABLE=1' \
|
||||
--force-rebuild cpp/bst_vs_heap \
|
||||
--force-rebuild userland/cpp/bst_vs_heap_vs_hashmap.cpp \
|
||||
--static \
|
||||
--optimization-level 3 \
|
||||
;
|
||||
./run \
|
||||
--arch aarch64 \
|
||||
--arch x86_64 \
|
||||
--emulator gem5 \
|
||||
--static \
|
||||
--userland userland/cpp/bst_vs_heap.cpp \
|
||||
--userland-args='1000' \
|
||||
--userland userland/cpp/bst_vs_heap_vs_hashmap.cpp \
|
||||
--userland-args='100000' \
|
||||
-- \
|
||||
--cpu-type=DerivO3CPU \
|
||||
--caches \
|
||||
--l2cache \
|
||||
--l1d_size=32kB \
|
||||
--l1i_size=32kB \
|
||||
--l2_size=256kB \
|
||||
--l3_size=20MB \
|
||||
;
|
||||
./bst-vs-heap --arch aarch64 > bst_vs_heap.dat
|
||||
./bst-vs-heap.gnuplot
|
||||
xdg-open bst-vs-heap.tmp.png
|
||||
./bst-vs-heap-vs-hashmap-gem5-stats --arch x86_64 | tee bst_vs_heap_vs_hashmap_gem5.dat
|
||||
gnuplot \
|
||||
-e 'input_noext="bst_vs_heap_vs_hashmap_gem5"' \
|
||||
-e 'heap_zoom_max=500' \
|
||||
-e 'hashmap_zoom_max=400' \
|
||||
./bst-vs-heap-vs-hashmap.gnuplot \
|
||||
;
|
||||
xdg-open bst_vs_heap_vs_hashmap_gem5.tmp.png
|
||||
....
|
||||
|
||||
The cache sizes were chosen to match the host <<p51>> to improve the comparison. Ideally we sould also use the same standard library.
|
||||
|
||||
Note that this will take a long time, and will produce a humongous ~40Gb stats file due to: <<gem5-only-dump-selected-stats>>
|
||||
|
||||
Sources:
|
||||
|
||||
* link:userland/cpp/bst_vs_heap.cpp[]
|
||||
* link:bst-vs-heap[]
|
||||
* link:bst-vs-heap.gnuplot[]
|
||||
* link:userland/cpp/bst_vs_heap_vs_hashmap.cpp[]
|
||||
* link:bst-vs-heap-vs-hashmap-gem5-stats[]
|
||||
* link:bst-vs-heap-vs-hashmap.gnuplot[]
|
||||
|
||||
===== BLAS
|
||||
|
||||
@@ -11110,7 +11153,7 @@ Contains UART output, both from the Linux kernel or from the baremetal system.
|
||||
|
||||
Can also be seen live on <<m5term>>.
|
||||
|
||||
==== stats.txt
|
||||
==== gem5 stats.txt
|
||||
|
||||
This file contains important statistics about the run:
|
||||
|
||||
@@ -11136,6 +11179,14 @@ system.cpu.dtb.inst_hits
|
||||
|
||||
For x86, it is interesting to try and correlate `numCycles` with:
|
||||
|
||||
===== gem5 only dump selected stats
|
||||
|
||||
TODO
|
||||
|
||||
https://stackoverflow.com/questions/52014953/how-to-dump-only-a-single-or-certain-selected-stats-in-gem5
|
||||
|
||||
To prevent the stats file from becoming humongous.
|
||||
|
||||
==== config.ini
|
||||
|
||||
The `config.ini` file, contains a very good high level description of the system:
|
||||
@@ -12974,7 +13025,7 @@ RDTSC stores its output to EDX:EAX, even in 64-bit mode, top bits are zeroed out
|
||||
|
||||
TODO: review this section, make a more controlled userland experiment with <<m5ops>> instrumentation.
|
||||
|
||||
Let's have some fun and try to correlate the gem5 <<stats-txt>> `system.cpu.numCycles` cycle count with the link:https://en.wikipedia.org/wiki/Time_Stamp_Counter[x86 RDTSC instruction] that is supposed to do the same thing:
|
||||
Let's have some fun and try to correlate the gem5 <<gem5-stats-txt>> `system.cpu.numCycles` cycle count with the link:https://en.wikipedia.org/wiki/Time_Stamp_Counter[x86 RDTSC instruction] that is supposed to do the same thing:
|
||||
|
||||
....
|
||||
./build-userland --static userland/arch/x86_64/inline_asm/rdtsc.S
|
||||
|
||||
@@ -18,13 +18,19 @@ Convert a BST vs heap stat file into a gnuplot input
|
||||
stats = self.get_stats()
|
||||
it = iter(stats)
|
||||
i = 1
|
||||
for stat in it:
|
||||
for heap_num_cycles in it:
|
||||
try:
|
||||
next_stat = next(it)
|
||||
bst_num_cycles = next(it)
|
||||
hashmap_num_cycles = next(it)
|
||||
except StopIteration:
|
||||
# Automatic dumpstats at end may lead to odd number of stats.
|
||||
# Automatic dumpstats at end may lead to one extra stat at the end.
|
||||
break
|
||||
print('{} {} {}'.format(i, stat, next_stat))
|
||||
print('{} {} {} {}'.format(
|
||||
i,
|
||||
heap_num_cycles,
|
||||
bst_num_cycles,
|
||||
hashmap_num_cycles,
|
||||
))
|
||||
i += 1
|
||||
|
||||
if __name__ == '__main__':
|
||||
27
bst-vs-heap-vs-hashmap.gnuplot
Executable file
27
bst-vs-heap-vs-hashmap.gnuplot
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env gnuplot
|
||||
|
||||
set terminal png noenhanced size 800, 1400
|
||||
set output input_noext . ".tmp.png"
|
||||
set multiplot layout 5,1 title "\nC++ Heap vs BST vs Hash map insert time" font ",22"
|
||||
set xlabel "container size"
|
||||
set ylabel "insert time (ns)"
|
||||
set title font ",16"
|
||||
|
||||
set title "Heap (std::priority_queue)"
|
||||
plot input_noext . ".dat" using 1:2 notitle
|
||||
|
||||
set title "Heap (zoom)"
|
||||
set yrange [0:heap_zoom_max]
|
||||
plot input_noext . ".dat" using 1:2 notitle
|
||||
|
||||
set title "BST (std::set)"
|
||||
set yrange [*:*]
|
||||
plot input_noext . ".dat" using 1:3 notitle
|
||||
|
||||
set title "Hash map (std::unordered_set)"
|
||||
set yrange [*:*]
|
||||
plot input_noext . ".dat" using 1:4 notitle
|
||||
|
||||
set title "Hash map zoom"
|
||||
set yrange [0:hashmap_zoom_max]
|
||||
plot input_noext . ".dat" using 1:4 notitle
|
||||
@@ -1,25 +0,0 @@
|
||||
#!/usr/bin/env gnuplot
|
||||
set terminal png size 1024, 2048
|
||||
set output "bst-vs-heap.tmp.png"
|
||||
set multiplot layout 5,1 title "Heap vs BST vs Hash map insert time"
|
||||
set xlabel "size"
|
||||
set ylabel "nanoseconds"
|
||||
|
||||
set title "Heap"
|
||||
plot "bst_vs_heap.dat" using 1:2 notitle
|
||||
|
||||
set title "Heap (zoom)"
|
||||
set yrange [0:25]
|
||||
plot "bst_vs_heap.dat" using 1:2 notitle
|
||||
|
||||
set title "BST"
|
||||
set yrange [*:*]
|
||||
plot "bst_vs_heap.dat" using 1:3 notitle
|
||||
|
||||
#set title "Hash map"
|
||||
#set yrange [*:*]
|
||||
#plot "bst_vs_heap.dat" using 1:4 notitle
|
||||
#
|
||||
#set title "Hash map zoom"
|
||||
#set yrange [0:350]
|
||||
#plot "bst_vs_heap.dat" using 1:4 notitle
|
||||
@@ -474,6 +474,12 @@ path_properties_tuples = (
|
||||
'return2.c': {'exit_status': 2},
|
||||
}
|
||||
),
|
||||
'cpp': (
|
||||
{},
|
||||
{
|
||||
'bst_vs_heap_vs_hashmap.cpp': {'more_than_1s': True},
|
||||
},
|
||||
),
|
||||
'gcc': (
|
||||
gnu_extension_properties,
|
||||
{
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
// https://github.com/cirosantilli/linux-kernel-module-cheat#bst-vs-heap
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <random>
|
||||
#include <set>
|
||||
|
||||
#include <lkmc/m5ops.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
typedef uint64_t I;
|
||||
std::vector<I> randoms;
|
||||
size_t i, n;
|
||||
std::priority_queue<I> heap;
|
||||
std::set<I> bst;
|
||||
unsigned int seed = std::random_device()();
|
||||
|
||||
// CLI arguments.
|
||||
if (argc > 1) {
|
||||
n = std::stoi(argv[1]);
|
||||
} else {
|
||||
n = 1;
|
||||
}
|
||||
|
||||
// Action.
|
||||
for (i = 0; i < n; ++i) {
|
||||
randoms.push_back(i);
|
||||
}
|
||||
std::shuffle(randoms.begin(), randoms.end(), std::mt19937(seed));
|
||||
for (i = 0; i < n; ++i) {
|
||||
auto random = randoms[i];
|
||||
|
||||
// Heap.
|
||||
LKMC_M5OPS_RESETSTATS;
|
||||
heap.emplace(random);
|
||||
LKMC_M5OPS_DUMPSTATS;
|
||||
|
||||
// BST.
|
||||
LKMC_M5OPS_RESETSTATS;
|
||||
bst.insert(random);
|
||||
LKMC_M5OPS_DUMPSTATS;
|
||||
}
|
||||
}
|
||||
154
userland/cpp/bst_vs_heap_vs_hashmap.cpp
Normal file
154
userland/cpp/bst_vs_heap_vs_hashmap.cpp
Normal file
@@ -0,0 +1,154 @@
|
||||
// https://github.com/cirosantilli/linux-kernel-module-cheat#bst-vs-heap-vs-hashmap
|
||||
|
||||
//#include <algorithm>
|
||||
//#include <iostream>
|
||||
//#include <queue>
|
||||
//#include <random>
|
||||
//#include <set>
|
||||
//
|
||||
//#include <lkmc/m5ops.h>
|
||||
//
|
||||
//int main(int argc, char **argv) {
|
||||
// typedef uint64_t I;
|
||||
// std::vector<I> randoms;
|
||||
// size_t i, n;
|
||||
// std::priority_queue<I> heap;
|
||||
// std::set<I> bst;
|
||||
// unsigned int seed = std::random_device()();
|
||||
//
|
||||
// // CLI arguments.
|
||||
// if (argc > 1) {
|
||||
// n = std::stoi(argv[1]);
|
||||
// } else {
|
||||
// n = 1;
|
||||
// }
|
||||
//
|
||||
// // Action.
|
||||
// for (i = 0; i < n; ++i) {
|
||||
// randoms.push_back(i);
|
||||
// }
|
||||
// std::shuffle(randoms.begin(), randoms.end(), std::mt19937(seed));
|
||||
// for (i = 0; i < n; ++i) {
|
||||
// auto random = randoms[i];
|
||||
//
|
||||
// // Heap.
|
||||
// LKMC_M5OPS_RESETSTATS;
|
||||
// heap.emplace(random);
|
||||
// LKMC_M5OPS_DUMPSTATS;
|
||||
//
|
||||
// // BST.
|
||||
// LKMC_M5OPS_RESETSTATS;
|
||||
// bst.insert(random);
|
||||
// LKMC_M5OPS_DUMPSTATS;
|
||||
// }
|
||||
//}
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <random>
|
||||
#include <set>
|
||||
#include <unordered_set>
|
||||
|
||||
#include <lkmc/m5ops.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
typedef uint64_t I;
|
||||
std::vector<I> randoms;
|
||||
size_t i, j, n, granule, base;
|
||||
std::priority_queue<I> heap;
|
||||
std::set<I> bst;
|
||||
std::unordered_set<I> hashmap;
|
||||
unsigned int seed = std::random_device()();
|
||||
|
||||
// CLI arguments.
|
||||
if (argc > 1) {
|
||||
n = std::stoi(argv[1]);
|
||||
} else {
|
||||
n = 10000000;
|
||||
}
|
||||
#ifdef LKMC_M5OPS_ENABLE
|
||||
// Let's comment useless stuff out to speed up gem5 simulations.
|
||||
granule = 1;
|
||||
j = 0;
|
||||
#else
|
||||
if (argc > 2) {
|
||||
granule = std::stoi(argv[2]);
|
||||
} else {
|
||||
granule = 10000;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Action.
|
||||
for (i = 0; i < n; ++i) {
|
||||
randoms.push_back(i);
|
||||
}
|
||||
std::shuffle(randoms.begin(), randoms.end(), std::mt19937(seed));
|
||||
for (i = 0; i < n / granule; ++i) {
|
||||
#ifndef LKMC_M5OPS_ENABLE
|
||||
using clk = std::chrono::high_resolution_clock;
|
||||
decltype(clk::now()) start, end;
|
||||
#endif
|
||||
base = i * granule;
|
||||
|
||||
// Heap.
|
||||
#ifndef LKMC_M5OPS_ENABLE
|
||||
start = clk::now();
|
||||
for (j = 0; j < granule; ++j) {
|
||||
#endif
|
||||
LKMC_M5OPS_RESETSTATS;
|
||||
heap.emplace(randoms[base + j]);
|
||||
LKMC_M5OPS_DUMPSTATS;
|
||||
#ifndef LKMC_M5OPS_ENABLE
|
||||
}
|
||||
end = clk::now();
|
||||
auto dt_heap = (end - start) / granule;
|
||||
#endif
|
||||
|
||||
// BST.
|
||||
#ifndef LKMC_M5OPS_ENABLE
|
||||
start = clk::now();
|
||||
for (j = 0; j < granule; ++j) {
|
||||
#endif
|
||||
LKMC_M5OPS_RESETSTATS;
|
||||
bst.insert(randoms[base + j]);
|
||||
LKMC_M5OPS_DUMPSTATS;
|
||||
#ifndef LKMC_M5OPS_ENABLE
|
||||
}
|
||||
end = clk::now();
|
||||
auto dt_bst = (end - start) / granule;
|
||||
#endif
|
||||
|
||||
// Hashmap.
|
||||
#ifndef LKMC_M5OPS_ENABLE
|
||||
start = clk::now();
|
||||
for (j = 0; j < granule; ++j) {
|
||||
#endif
|
||||
LKMC_M5OPS_RESETSTATS;
|
||||
hashmap.insert(randoms[base + j]);
|
||||
LKMC_M5OPS_DUMPSTATS;
|
||||
#ifndef LKMC_M5OPS_ENABLE
|
||||
}
|
||||
end = clk::now();
|
||||
auto dt_hashmap = (end - start) / granule;
|
||||
#endif
|
||||
|
||||
#ifndef LKMC_M5OPS_ENABLE
|
||||
// Output.
|
||||
std::cout
|
||||
<< base << " "
|
||||
<< std::chrono::duration_cast<std::chrono::nanoseconds>(dt_heap).count() << " "
|
||||
<< std::chrono::duration_cast<std::chrono::nanoseconds>(dt_bst).count() << " "
|
||||
<< std::chrono::duration_cast<std::chrono::nanoseconds>(dt_hashmap).count() << std::endl
|
||||
;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Sanity check.
|
||||
for (auto it = bst.rbegin(); it != bst.rend(); ++it) {
|
||||
assert(*it == heap.top());
|
||||
heap.pop();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user