From 5562474994d0d50cb3dc8f76a1fba3e6b1ffe37d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciro=20Santilli=20=E5=85=AD=E5=9B=9B=E4=BA=8B=E4=BB=B6=20?= =?UTF-8?q?=E6=B3=95=E8=BD=AE=E5=8A=9F?= Date: Fri, 25 Sep 2020 01:00:00 +0000 Subject: [PATCH] cpp: map, multimap, move in from cpp-cheat --- README.adoc | 36 +++++- userland/cpp/map.cpp | 256 ++++++++++++++++++++++++++++++++++++++ userland/cpp/multimap.cpp | 100 +++++++++++++++ 3 files changed, 391 insertions(+), 1 deletion(-) create mode 100644 userland/cpp/map.cpp create mode 100644 userland/cpp/multimap.cpp diff --git a/README.adoc b/README.adoc index 5f870e1..555944f 100644 --- a/README.adoc +++ b/README.adoc @@ -2048,6 +2048,38 @@ Note however that early boot parts appear to be relocated in memory somehow, and Further discussion at: <>. +In the specific case of gem5 aarch64 at least: + +* gem5 relocates the kernel in memory to a fixed location, see e.g. https://gem5.atlassian.net/browse/GEM5-787 +* `--param 'system.workload.early_kernel_symbols=True` should in theory duplicate the symbols to the correct physical location, but it was broken at one point: https://gem5.atlassian.net/browse/GEM5-785 +* gem5 executes directly from vmlinux, so there is no decompression code involved, so you actually immediately start running the "true" first instruction from `head.S` as described at: https://stackoverflow.com/questions/18266063/does-linux-kernel-have-main-function/33422401#33422401 +* once the MMU gets turned on at kernel symbol `__primary_switched`, the virtual address matches the ELF symbols, and you start seeing correct symbols without the need for `early_kernel_symbols`. This can be observed clearly with `function_trace = True`: https://stackoverflow.com/questions/64049487/how-to-trace-executed-guest-function-symbol-names-with-their-timestamp-in-gem5/64049488#64049488 which produces: ++ +.... +0: _kernel_flags_le_lo32 (12500) +12500: __crc_tcp_add_backlog (1000) +13500: __crc_crypto_alg_tested (6500) +20000: __crc_tcp_add_backlog (10000) +30000: __crc_crypto_alg_tested (500) +30500: __crc_scsi_is_host_device (5000) +35500: __crc_crypto_alg_tested (1500) +37000: __crc_scsi_is_host_device (4000) +41000: __crc_crypto_alg_tested (3000) +44000: __crc_tcp_add_backlog (263500) +307500: __crc_crypto_alg_tested (975500) +1283000: __crc_tcp_add_backlog (77191500) +78474500: __crc_crypto_alg_tested (1000) +78475500: __crc_scsi_is_host_device (19500) +78495000: __crc_crypto_alg_tested (500) +78495500: __crc_scsi_is_host_device (13500) +78509000: __primary_switched (14000) +78523000: memset (21118000) +99641000: __primary_switched (2500) +99643500: start_kernel (11000) +.... ++ +so we see that `__primary_switched` is the first non-trash symbol (non-`__crc_*` and non-`_kernel_flags_*`, which are just informative symbols, not actual executable code) + ==== Linux kernel entry point TODO https://stackoverflow.com/questions/2589845/what-are-the-first-operations-that-the-linux-kernel-executes-on-boot @@ -20286,8 +20318,10 @@ Programs under link:userland/cpp/[] are examples of https://en.wikipedia.org/wik ** link:userland/cpp/random.cpp[] * containers ** associative -*** <> contains a benchmark comparison of different c++ containers *** link:userland/cpp/set.cpp[]: `std::set` contains unique keys +*** link:userland/cpp/map.cpp[]: `std::map` +*** link:userland/cpp/multimap.cpp[]: `std::multimap` +** <> contains a benchmark comparison of different c++ containers [[cpp-initialization-types]] ==== C++ initialization types diff --git a/userland/cpp/map.cpp b/userland/cpp/map.cpp new file mode 100644 index 0000000..b7a42f8 --- /dev/null +++ b/userland/cpp/map.cpp @@ -0,0 +1,256 @@ +// https://cirosantilli.com/linux-kernel-module-cheat#cpp +// +// http://www.cplusplus.com/reference/map/map/ +// +// Also comes in an unordered version `unordered_map`. +// +// Ordered. +// +// Also comes in an multiple value input version `multimap`. +// +// Does not require a hash function. Usually implemented as a self balancing tree such as a rb tree. +// +// # hashmap +// +// There seems to be no explicit hashmap container, only a generic map interface, +// +// However unordered_map is likely to be hashmap based. +// +// A nonstandard `hash_map` already provided with gcc and msvc++. +// It is placed in the `std::` namespace, but it is *not* ISO. + +#include // map, multimap +#include // map, multimap +#include // stringstream + +template +std::string map_to_str(std::map map) { + std::stringstream result; + for (auto& pair : map) { + result << pair.first << ":" << pair.second << ", "; + } + return result.str(); +} + +int main() { + // Initializer list constructor. + { + std::map m{ + {0, "zero"}, + {1, "one"}, + }; + assert(m.at(0) == "zero"); + assert(m.at(1) == "one"); + } + + // # emplace + // + // Put a value pair into the map without creating the pair explicitly. + // + // Only inserts if not already present. + // + // Needs gcc 4.8: http://stackoverflow.com/questions/15812276/stdset-has-no-member-emplace + { + std::map m; + assert((m.emplace(0, "zero").second)); + assert((m.emplace(1, "one").second)); + assert(!(m.emplace(1, "one2").second)); + assert(m.at(0) == "zero"); + assert(m.at(1) == "one"); + } + + // # operator[] + // + // Get value from a given key. + // + // Creates if not present, so be very careful if that's not what you want! + // + // Use: + // + // - this to "add new or update existing" or "create default value and return it" + // - at() to find when you are sure it is there + // - find() to find when you are not sure it is there + // - emplace() for putting new values when you are sure they are not there + { + std::map m{ + {0, "zero"}, + {1, "one"}, + }; + assert(m[0] == "zero"); + assert(m[1] == "one"); + + // Returns a reference that can override the value. + m[1] = "one2"; + assert(m[1] == "one2"); + + // WARNING: if the key does not exist, it is inserted with a value with default constructor. + // This can be avoided by using `find` or `at` instead of `[]`. + // Inserts `(2,"")` because `""` is the value for the default String constructor. + // http://stackoverflow.com/questions/10124679/what-happens-if-i-read-a-maps-value-where-the-key-does-not-exist + { + assert(m[2] == ""); + assert(m.size() == 3); + + // This behaviour is however very convenient for nested containers. + { + std::map> m; + // Create the empty map at m[0], and immediately add a (0,0) pair to it. + m[0][0] = 0; + // map at m[0] already exists, now just add a new (1, 1) pair to it. + m[0][1] = 1; + m[1][0] = 2; + assert(m[0][0] == 0); + assert(m[0][1] == 1); + assert(m[1][0] == 2); + } + } + } + + // # find + // + // Similar to `std::set` find with respect to the keys: + // returns an iterator pointing to the pair which has given key, not the value. + // + // If not found, returns `map::end()` + // + // This is preferable to `[]` since it does not insert non-existent elements. + { + std::map m{ + {0, "zero"}, + {1, "one"}, + }; + + auto found = m.find(0); + assert(found != m.end()); + assert(found->first == 0); + assert(found->second == "zero"); + + assert(m.find(2) == m.end()); + assert(m.size() == 2); + + // https://stackoverflow.com/questions/2333728/stdmap-default-valueGet a default provided value if key not present + // + // TODO: any less verbose way than finding and check != end? Like: + // + // m.get(key, default) + { + std::map m{}; + int default_ = 42; + int result; + auto f = m.find(1); + if (f == m.end()) { + result = default_; + } else { + result = f->second; + } + assert(result == 42); + } + } + + // # at + // + // A convenient version of find() that returns the item directly. + // + // Throws if not present, so better when the key is supposed to be there. + // + // C++11. + { + std::map m{ + {0, "zero"}, + {1, "one"}, + }; + // Returns a reference, so we can modify it. + m.at(1) = "one2"; + assert(m.at(1) == "one2"); + } + + // # insert + // + // Insert pair into map. + // + // The return value is similar to that of a set insertion with respect to the key. + // + // Just use emplace instead, less verbose as it was added after perfect forwarding. + // + // http://stackoverflow.com/questions/17172080/insert-vs-emplace-vs-operator-in-c-map + { + std::map m; + std::pair::iterator,bool> ret; + + ret = m.insert(std::make_pair(0, "zero")); + assert(ret.first == m.find(0)); + assert(ret.second == true); + + ret = m.insert(std::make_pair(1, "one")); + assert(ret.first == m.find(1)); + assert(ret.second == true); + + //key already present + ret = m.insert(std::make_pair(1, "one2")); + assert(m[1] == "one"); + assert(ret.first == m.find(1)); + assert(ret.second == false); + } + + // # iterate + // + // Map is ordered: + // http://stackoverflow.com/questions/7648756/is-the-order-of-iterating-through-stdmap-known-and-guaranteed-by-the-standard + // + // It is iterated in key `<` order. + // + // So this basically requires implementations to use balanced + // trees intead of hashmap. + // + // Iteration returns key value pairs. + { + std::map m{ + {1, "one"}, + {0, "zero"}, + }; + + int i = 0; + int is[] = {0, 1}; + for (auto& im : m) { + assert(im.first == is[i]); + //cout << im->second << endl; + ++i; + } + assert(i == 2); + assert(map_to_str(m) == "0:zero, 1:one, "); + } + + // # erase + // + // Remove element from map. + { + // key version. Returns number of elements removed (0 or 1). + { + std::map m{ + {0, "zero"}, + {1, "one"}, + }; + int ret; + ret = m.erase(1); + assert(ret = 1); + assert((m == std::map{{0, "zero"}})); + + ret = m.erase(1); + assert(ret == 0); + } + + // iterator version. Returns iterator to next element. + // Does not invalidate other iterators. + // http://stackoverflow.com/questions/6438086/iterator-invalidation-rules + { + std::map m{ + {0, "zero"}, + {1, "one"}, + }; + auto itNext = m.find(1); + auto it = m.find(0); + assert(m.erase(it) == itNext); + assert((m == std::map{{1, "one"}})); + } + } +} diff --git a/userland/cpp/multimap.cpp b/userland/cpp/multimap.cpp new file mode 100644 index 0000000..d57fe29 --- /dev/null +++ b/userland/cpp/multimap.cpp @@ -0,0 +1,100 @@ +// https://cirosantilli.com/linux-kernel-module-cheat#cpp +// +// Map where a key can have multiple values. +// +// Simple to implement with map of set or vector: +// +// - http://stackoverflow.com/questions/8602068/whats-the-difference-between-stdmultimapkey-value-and-stdmapkey-stds +// - http://stackoverflow.com/questions/4437862/whats-the-advantage-of-multimap-over-map-of-vectors + +#include // map, multimap +#include // map, multimap +#include // stringstream +#include // stringstream + +int main() { + // equal_range iterates over all key value pairs with a given key. + { + std::multimap m{ + {1, 2}, + {1, 3}, + {2, 4} + }; + auto range = m.equal_range(1); + auto it = range.first; + assert(it->second == 2); + it++; + assert(it->second == 3); + it++; + assert(it == range.second); + } + + // Iteration over map iterates all pairs. + { + std::multimap m{ + {1, 2}, + {1, 3}, + {2, 4} + }; + + auto it = m.begin(); + assert(it->first == 1); + assert(it->second == 2); + + it++; + assert(it->first == 1); + assert(it->second == 3); + + it++; + assert(it->first == 2); + assert(it->second == 4); + + it++; + assert(it == m.end()); + } + + // # Iterate each key only once + // + // # Group by key + // + // - https://stackoverflow.com/questions/9371236/is-there-an-iterator-across-unique-keys-in-a-stdmultimap/41523639#41523639 + // - http://stackoverflow.com/questions/247818/stlmultimap-how-do-i-get-groups-of-data + { + std::multimap m{ + {1, 2}, + {1, 3}, + {2, 4} + }; + std::vector out; + + //for (auto it = m.begin(), end = m.end(); it != end;) { + //auto key = it->first; + //out.push_back(key); + //do { + //it++; + //} while (it != end && it->first == key); + //} + + auto it = m.begin(); + auto end = m.end(); + auto pair = *it; + auto key = pair.first; + while (true) { + // Operate on key. + out.push_back(key); + decltype(key) next_key; + do { + // Operate on value. + it++; + if (it == end) { + goto end; + } + next_key = it->first; + } while (next_key == key); + key = next_key; + } + end: + + assert(out == std::vector({1, 2})); + } +}