From 32e4702dc7844761e6ccdbb0d65aaa0b19379598 Mon Sep 17 00:00:00 2001 From: Gary Talent Date: Wed, 24 Apr 2024 20:55:26 -0500 Subject: [PATCH] [ox] Improve hasing and MaybeView * Add CityHash dep * Use CityHash for HashMap * Make MaybeView more versatile * Cleanup some inappropriate MaybeView uses --- deps/ox/CMakeLists.txt | 1 + deps/ox/deps/cityhash/CMakeLists.txt | 30 + deps/ox/deps/cityhash/include/cityhash/city.h | 656 ++++++++++++++++++ deps/ox/src/ox/model/walk.hpp | 2 +- deps/ox/src/ox/std/CMakeLists.txt | 1 + deps/ox/src/ox/std/hash.hpp | 170 +++++ deps/ox/src/ox/std/hashmap.hpp | 37 +- deps/ox/src/ox/std/istring.hpp | 5 + deps/ox/src/ox/std/maybeview.hpp | 2 +- deps/ox/src/ox/std/string.hpp | 5 + deps/ox/src/ox/std/stringview.hpp | 5 - deps/ox/src/ox/std/utility.hpp | 31 +- deps/ox/src/ox/std/vector.hpp | 39 +- 13 files changed, 911 insertions(+), 73 deletions(-) create mode 100644 deps/ox/deps/cityhash/CMakeLists.txt create mode 100644 deps/ox/deps/cityhash/include/cityhash/city.h create mode 100644 deps/ox/src/ox/std/hash.hpp diff --git a/deps/ox/CMakeLists.txt b/deps/ox/CMakeLists.txt index 244a0bc2..cb286b0a 100644 --- a/deps/ox/CMakeLists.txt +++ b/deps/ox/CMakeLists.txt @@ -82,4 +82,5 @@ if(OX_USE_STDLIB) set(JSONCPP_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/deps/jsoncpp/include") add_subdirectory(deps/jsoncpp) endif() +add_subdirectory(deps/cityhash) add_subdirectory(src) diff --git a/deps/ox/deps/cityhash/CMakeLists.txt b/deps/ox/deps/cityhash/CMakeLists.txt new file mode 100644 index 00000000..0521c3e5 --- /dev/null +++ b/deps/ox/deps/cityhash/CMakeLists.txt @@ -0,0 +1,30 @@ +cmake_minimum_required(VERSION 3.19) +set(CMAKE_POLICY_DEFAULT_CMP0110 NEW) # requires CMake 3.19 + +project(CityHash CXX) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +set(NOSTALGIA_BUILD_PLAYER OFF) +set(NOSTALGIA_BUILD_STUDIO_APP OFF) + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +if(APPLE) + set(CMAKE_MACOSX_RPATH OFF) +else() + if(UNIX) + set(BUILD_SHARED_LIBS ON) + endif() + set(CMAKE_INSTALL_RPATH "$ORIGIN/../lib") + set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) +endif() + +add_library(CityHash INTERFACE) +target_include_directories(CityHash INTERFACE include) diff --git a/deps/ox/deps/cityhash/include/cityhash/city.h b/deps/ox/deps/cityhash/include/cityhash/city.h new file mode 100644 index 00000000..d24101b3 --- /dev/null +++ b/deps/ox/deps/cityhash/include/cityhash/city.h @@ -0,0 +1,656 @@ +// Copyright (c) 2011 Google, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +// +// CityHash, by Geoff Pike and Jyrki Alakuijala +// +// http://code.google.com/p/cityhash/ +// +// This file provides a few functions for hashing strings. All of them are +// high-quality functions in the sense that they pass standard tests such +// as Austin Appleby's SMHasher. They are also fast. +// +// For 64-bit x86 code, on short strings, we don't know of anything faster than +// CityHash64 that is of comparable quality. We believe our nearest competitor +// is Murmur3. For 64-bit x86 code, CityHash64 is an excellent choice for hash +// tables and most other hashing (excluding cryptography). +// +// For 64-bit x86 code, on long strings, the picture is more complicated. +// On many recent Intel CPUs, such as Nehalem, Westmere, Sandy Bridge, etc., +// CityHashCrc128 appears to be faster than all competitors of comparable +// quality. CityHash128 is also good but not quite as fast. We believe our +// nearest competitor is Bob Jenkins' Spooky. We don't have great data for +// other 64-bit CPUs, but for long strings we know that Spooky is slightly +// faster than CityHash on some relatively recent AMD x86-64 CPUs, for example. +// Note that CityHashCrc128 is declared in citycrc.h. +// +// For 32-bit x86 code, we don't know of anything faster than CityHash32 that +// is of comparable quality. We believe our nearest competitor is Murmur3A. +// (On 64-bit CPUs, it is typically faster to use the other CityHash variants.) +// +// Functions in the CityHash family are not suitable for cryptography. +// +// Please see CityHash's README file for more details on our performance +// measurements and so on. +// +// WARNING: This code has been only lightly tested on big-endian platforms! +// It is known to work well on little-endian platforms that have a small penalty +// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. +// It should work on all 32-bit and 64-bit platforms that allow unaligned reads; +// bug reports are welcome. +// +// By the way, for some hash functions, given strings a and b, the hash +// of a+b is easily derived from the hashes of a and b. This property +// doesn't hold for any hash functions in this file. + +#ifndef CITY_HASH_H_ +#define CITY_HASH_H_ + +#if __has_include() + +#include + +#else + +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef short int16_t; +typedef unsigned short uint16_t; +typedef int int32_t; +typedef unsigned int uint32_t; +typedef unsigned uint_t; +#if defined(__arm__) || defined(__ppc__) +typedef long long int64_t; +typedef unsigned long long uint64_t; +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; +#else +typedef long int64_t; +typedef unsigned long uint64_t; +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; +#endif + +#if defined(_LP64) || defined(__ppc64__) || defined(__aarch64__) +typedef long intptr_t; +typedef unsigned long uintptr_t; +#elif defined(_WIN64) +typedef int64_t intptr_t; +typedef uint64_t uintptr_t; +#elif defined(_LP32) || defined(__ppc__) || defined(_WIN32) || defined(__arm__) +typedef int32_t intptr_t; +typedef uint32_t uintptr_t; +#else +#error intptr_t, and uintptr_t undefined +#endif + +#endif + + +using size_t = decltype(alignof(int)); + + + +#if __has_include() +#include +#endif + + + +namespace cityhash::detail { +template +struct pair { + T1 first{}; + T2 second{}; + constexpr pair() noexcept = default; + constexpr pair(T1 a, T2 b) noexcept: first(std::move(a)), second(std::move(b)) {} +}; +} + +namespace cityhash { + +using uint128 = cityhash::detail::pair; + +namespace detail { + +template +constexpr void swap(T &a, T &b) noexcept { + auto temp = std::move(a); + a = std::move(b); + b = std::move(temp); +} + +template +[[nodiscard]] +constexpr T byteSwap(T i) noexcept { + if constexpr(sizeof(T) == 1) { + return i; + } else if constexpr(sizeof(T) == 2) { + return static_cast(i << 8) | static_cast(i >> 8); + } else if constexpr(sizeof(T) == 4) { + return ((i >> 24) & 0x000000ff) | + ((i >> 8) & 0x0000ff00) | + ((i << 8) & 0x00ff0000) | + ((i << 24) & 0xff000000); + } else if constexpr(sizeof(T) == 8) { + return ((i >> 56) & 0x00000000000000ff) | + ((i >> 40) & 0x000000000000ff00) | + ((i >> 24) & 0x0000000000ff0000) | + ((i >> 8) & 0x00000000ff000000) | + ((i << 8) & 0x000000ff00000000) | + ((i << 24) & 0x0000ff0000000000) | + ((i << 40) & 0x00ff000000000000) | + ((i << 56) & 0xff00000000000000); + } +} + + +[[nodiscard]] +constexpr uint64_t Uint128Low64(const uint128& x) noexcept { return x.first; } +[[nodiscard]] +constexpr uint64_t Uint128High64(const uint128& x) noexcept { return x.second; } + +// Hash 128 input bits down to 64 bits of output. +// This is intended to be a reasonably good hash function. +[[nodiscard]] +constexpr uint64_t Hash128to64(const uint128& x) noexcept { + // Murmur-inspired hashing. + const uint64_t kMul = 0x9ddfea08eb382d69ULL; + uint64_t a = (detail::Uint128Low64(x) ^ detail::Uint128High64(x)) * kMul; + a ^= (a >> 47); + uint64_t b = (detail::Uint128High64(x) ^ a) * kMul; + b ^= (b >> 47); + b *= kMul; + return b; +} + +[[nodiscard]] +constexpr uint64_t UNALIGNED_LOAD64(const char *p) noexcept { + uint64_t result{}; + result |= static_cast(p[0]); + result |= static_cast(p[1]) << 8; + result |= static_cast(p[2]) << 16; + result |= static_cast(p[3]) << 24; + result |= static_cast(p[4]) << 32; + result |= static_cast(p[5]) << 40; + result |= static_cast(p[6]) << 48; + result |= static_cast(p[7]) << 56; + //memcpy(&result, p, sizeof(result)); + return result; +} + +[[nodiscard]] +constexpr uint32_t UNALIGNED_LOAD32(const char *p) noexcept { + uint32_t result{}; + result |= static_cast(p[0]); + result |= static_cast(p[1]) << 8; + result |= static_cast(p[2]) << 16; + result |= static_cast(p[3]) << 24; + //memcpy(&result, p, sizeof(result)); + return result; +} + +#ifdef WORDS_BIGENDIAN +#define uint32_in_expected_order(x) (detail::byteSwap(x)) +#define uint64_in_expected_order(x) (detail::byteSwap(x)) +#else +#define uint32_in_expected_order(x) (x) +#define uint64_in_expected_order(x) (x) +#endif + +#if !defined(LIKELY) +#if HAVE_BUILTIN_EXPECT +#define LIKELY(x) (__builtin_expect(!!(x), 1)) +#else +#define LIKELY(x) (x) +#endif +#endif + +[[nodiscard]] +constexpr uint64_t Fetch64(const char *p) noexcept { + return uint64_in_expected_order(UNALIGNED_LOAD64(p)); +} + +[[nodiscard]] +constexpr uint32_t Fetch32(const char *p) noexcept { + return uint32_in_expected_order(UNALIGNED_LOAD32(p)); +} + +// Some primes between 2^63 and 2^64 for various uses. +constexpr uint64_t k0 = 0xc3a5c85c97cb3127ULL; +constexpr uint64_t k1 = 0xb492b66fbe98f273ULL; +constexpr uint64_t k2 = 0x9ae16a3b2f90404fULL; + +// Magic numbers for 32-bit hashing. Copied from Murmur3. +constexpr uint32_t c1 = 0xcc9e2d51; +constexpr uint32_t c2 = 0x1b873593; + +// A 32-bit to 32-bit integer hash copied from Murmur3. +[[nodiscard]] +constexpr uint32_t fmix(uint32_t h) noexcept { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +[[nodiscard]] +constexpr uint32_t Rotate32(uint32_t val, int shift) noexcept { + // Avoid shifting by 32: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (32 - shift))); +} + +#undef PERMUTE3 +#define PERMUTE3(a, b, c) do { detail::swap(a, b); detail::swap(a, c); } while (0) + +[[nodiscard]] +constexpr uint32_t Mur(uint32_t a, uint32_t h) noexcept { + // Helper from Murmur3 for combining two 32-bit values. + a *= detail::c1; + a = Rotate32(a, 17); + a *= detail::c2; + h ^= a; + h = Rotate32(h, 19); + return h * 5 + 0xe6546b64; +} + +[[nodiscard]] +constexpr uint32_t Hash32Len13to24(const char *s, size_t len) noexcept { + uint32_t a = Fetch32(s - 4 + (len >> 1)); + uint32_t b = Fetch32(s + 4); + uint32_t c = Fetch32(s + len - 8); + uint32_t d = Fetch32(s + (len >> 1)); + uint32_t e = Fetch32(s); + uint32_t f = Fetch32(s + len - 4); + uint32_t h = static_cast(len); + + return detail::fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h))))))); +} + +[[nodiscard]] +constexpr uint32_t Hash32Len0to4(const char *s, size_t len) noexcept { + uint32_t b = 0; + uint32_t c = 9; + for (size_t i = 0; i < len; i++) { + signed char v = static_cast(s[i]); + b = b * detail::c1 + static_cast(v); + c ^= b; + } + return detail::fmix(Mur(b, Mur(static_cast(len), c))); +} + +[[nodiscard]] +constexpr uint32_t Hash32Len5to12(const char *s, size_t len) noexcept { + uint32_t a = static_cast(len), b = a * 5, c = 9, d = b; + a += Fetch32(s); + b += Fetch32(s + len - 4); + c += Fetch32(s + ((len >> 1) & 4)); + return detail::fmix(Mur(c, Mur(b, Mur(a, d)))); +} + +// Bitwise right rotate. Normally this will compile to a single +// instruction, especially if the shift is a manifest constant. +[[nodiscard]] +constexpr uint64_t Rotate(uint64_t val, int shift) noexcept { + // Avoid shifting by 64: doing so yields an undefined result. + return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); +} + +[[nodiscard]] +constexpr uint64_t ShiftMix(uint64_t val) noexcept { + return val ^ (val >> 47); +} + +[[nodiscard]] +constexpr uint64_t HashLen16(uint64_t u, uint64_t v) noexcept { + return Hash128to64(uint128(u, v)); +} + +[[nodiscard]] +constexpr uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) noexcept { + // Murmur-inspired hashing. + uint64_t a = (u ^ v) * mul; + a ^= (a >> 47); + uint64_t b = (v ^ a) * mul; + b ^= (b >> 47); + b *= mul; + return b; +} + +[[nodiscard]] +constexpr uint64_t HashLen0to16(const char *s, size_t len) noexcept { + if (len >= 8) { + uint64_t mul = detail::k2 + len * 2; + uint64_t a = detail::Fetch64(s) + detail::k2; + uint64_t b = detail::Fetch64(s + len - 8); + uint64_t c = detail::Rotate(b, 37) * mul + a; + uint64_t d = (detail::Rotate(a, 25) + b) * mul; + return HashLen16(c, d, mul); + } + if (len >= 4) { + uint64_t mul = detail::k2 + len * 2; + uint64_t a = Fetch32(s); + return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); + } + if (len > 0) { + uint8_t a = static_cast(s[0]); + uint8_t b = static_cast(s[len >> 1]); + uint8_t c = static_cast(s[len - 1]); + uint32_t y = static_cast(a) + (static_cast(b) << 8); + uint32_t z = static_cast(len) + (static_cast(c) << 2); + return ShiftMix(y * detail::k2 ^ z * detail::k0) * detail::k2; + } + return detail::k2; +} + +// This probably works well for 16-byte strings as well, but it may be overkill +// in that case. +[[nodiscard]] +constexpr uint64_t HashLen17to32(const char *s, size_t len) noexcept { + uint64_t mul = detail::k2 + len * 2; + uint64_t a = detail::Fetch64(s) * detail::k1; + uint64_t b = detail::Fetch64(s + 8); + uint64_t c = detail::Fetch64(s + len - 8) * mul; + uint64_t d = detail::Fetch64(s + len - 16) * detail::k2; + return HashLen16(detail::Rotate(a + b, 43) + detail::Rotate(c, 30) + d, + a + detail::Rotate(b + detail::k2, 18) + c, mul); +} + +// Return a 16-byte hash for 48 bytes. Quick and dirty. +// Callers do best to use "random-looking" values for a and b. +[[nodiscard]] +constexpr detail::pair WeakHashLen32WithSeeds( + uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) noexcept { + a += w; + b = detail::Rotate(b + a + z, 21); + uint64_t c = a; + a += x; + a += y; + b += detail::Rotate(a, 44); + return detail::pair(a + z, b + c); +} + +// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. +[[nodiscard]] +constexpr detail::pair WeakHashLen32WithSeeds( + const char* s, uint64_t a, uint64_t b) noexcept { + return WeakHashLen32WithSeeds(detail::Fetch64(s), + detail::Fetch64(s + 8), + detail::Fetch64(s + 16), + detail::Fetch64(s + 24), + a, + b); +} + +// Return an 8-byte hash for 33 to 64 bytes. +[[nodiscard]] +constexpr uint64_t HashLen33to64(const char *s, size_t len) noexcept { + uint64_t mul = detail::k2 + len * 2; + uint64_t a = detail::Fetch64(s) * detail::k2; + uint64_t b = detail::Fetch64(s + 8); + uint64_t c = detail::Fetch64(s + len - 24); + uint64_t d = detail::Fetch64(s + len - 32); + uint64_t e = detail::Fetch64(s + 16) * detail::k2; + uint64_t f = detail::Fetch64(s + 24) * 9; + uint64_t g = detail::Fetch64(s + len - 8); + uint64_t h = detail::Fetch64(s + len - 16) * mul; + uint64_t u = detail::Rotate(a + g, 43) + (detail::Rotate(b, 30) + c) * 9; + uint64_t v = ((a + g) ^ d) + f + 1; + uint64_t w = detail::byteSwap((u + v) * mul) + h; + uint64_t x = detail::Rotate(e + f, 42) + c; + uint64_t y = (detail::byteSwap((v + w) * mul) + g) * mul; + uint64_t z = e + f + c; + a = detail::byteSwap((x + z) * mul + y) + b; + b = ShiftMix((z + a) * mul + d + h) * mul; + return b + x; +} + +// A subroutine for CityHash128(). Returns a decent 128-bit hash for strings +// of any length representable in signed long. Based on City and Murmur. +[[nodiscard]] +constexpr uint128 CityMurmur(const char *s, size_t len, uint128 seed) noexcept { + uint64_t a = detail::Uint128Low64(seed); + uint64_t b = detail::Uint128High64(seed); + uint64_t c = 0; + uint64_t d = 0; + if (len <= 16) { + a = ShiftMix(a * detail::k1) * detail::k1; + c = b * detail::k1 + HashLen0to16(s, len); + d = ShiftMix(a + (len >= 8 ? detail::Fetch64(s) : c)); + } else { + c = HashLen16(detail::Fetch64(s + len - 8) + detail::k1, a); + d = HashLen16(b + len, c + detail::Fetch64(s + len - 16)); + a += d; + // len > 16 here, so do...while is safe + do { + a ^= ShiftMix(detail::Fetch64(s) * detail::k1) * detail::k1; + a *= detail::k1; + b ^= a; + c ^= ShiftMix(detail::Fetch64(s + 8) * detail::k1) * detail::k1; + c *= detail::k1; + d ^= c; + s += 16; + len -= 16; + } while (len > 16); + } + a = HashLen16(a, c); + b = HashLen16(d, b); + return uint128(a ^ b, HashLen16(b, a)); +} + +} + + +[[nodiscard]] +constexpr uint32_t CityHash32(const char *s, size_t len) noexcept { + if (len <= 24) { + return len <= 12 ? + (len <= 4 ? detail::Hash32Len0to4(s, len) : detail::Hash32Len5to12(s, len)) : + detail::Hash32Len13to24(s, len); + } + + // len > 24 + uint32_t h = static_cast(len), g = detail::c1 * h, f = g; + uint32_t a0 = detail::Rotate32(detail::Fetch32(s + len - 4) * detail::c1, 17) * detail::c2; + uint32_t a1 = detail::Rotate32(detail::Fetch32(s + len - 8) * detail::c1, 17) * detail::c2; + uint32_t a2 = detail::Rotate32(detail::Fetch32(s + len - 16) * detail::c1, 17) * detail::c2; + uint32_t a3 = detail::Rotate32(detail::Fetch32(s + len - 12) * detail::c1, 17) * detail::c2; + uint32_t a4 = detail::Rotate32(detail::Fetch32(s + len - 20) * detail::c1, 17) * detail::c2; + h ^= a0; + h = detail::Rotate32(h, 19); + h = h * 5 + 0xe6546b64; + h ^= a2; + h = detail::Rotate32(h, 19); + h = h * 5 + 0xe6546b64; + g ^= a1; + g = detail::Rotate32(g, 19); + g = g * 5 + 0xe6546b64; + g ^= a3; + g = detail::Rotate32(g, 19); + g = g * 5 + 0xe6546b64; + f += a4; + f = detail::Rotate32(f, 19); + f = f * 5 + 0xe6546b64; + size_t iters = (len - 1) / 20; + do { + uint32_t a0 = detail::Rotate32(detail::Fetch32(s) * detail::c1, 17) * detail::c2; + uint32_t a1 = detail::Fetch32(s + 4); + uint32_t a2 = detail::Rotate32(detail::Fetch32(s + 8) * detail::c1, 17) * detail::c2; + uint32_t a3 = detail::Rotate32(detail::Fetch32(s + 12) * detail::c1, 17) * detail::c2; + uint32_t a4 = detail::Fetch32(s + 16); + h ^= a0; + h = detail::Rotate32(h, 18); + h = h * 5 + 0xe6546b64; + f += a1; + f = detail::Rotate32(f, 19); + f = f * detail::c1; + g += a2; + g = detail::Rotate32(g, 18); + g = g * 5 + 0xe6546b64; + h ^= a3 + a1; + h = detail::Rotate32(h, 19); + h = h * 5 + 0xe6546b64; + g ^= a4; + g = detail::byteSwap(g) * 5; + h += a4 * 5; + h = detail::byteSwap(h); + f += a0; + PERMUTE3(f, h, g); + s += 20; + } while (--iters != 0); + g = detail::Rotate32(g, 11) * detail::c1; + g = detail::Rotate32(g, 17) * detail::c1; + f = detail::Rotate32(f, 11) * detail::c1; + f = detail::Rotate32(f, 17) * detail::c1; + h = detail::Rotate32(h + g, 19); + h = h * 5 + 0xe6546b64; + h = detail::Rotate32(h, 17) * detail::c1; + h = detail::Rotate32(h + f, 19); + h = h * 5 + 0xe6546b64; + h = detail::Rotate32(h, 17) * detail::c1; + return h; +} + +[[nodiscard]] +constexpr uint64_t CityHash64(const char *s, size_t len) noexcept { + if (len <= 32) { + if (len <= 16) { + return detail::HashLen0to16(s, len); + } else { + return detail::HashLen17to32(s, len); + } + } else if (len <= 64) { + return detail::HashLen33to64(s, len); + } + + // For strings over 64 bytes we hash the end first, and then as we + // loop we keep 56 bytes of state: v, w, x, y, and z. + uint64_t x = detail::Fetch64(s + len - 40); + uint64_t y = detail::Fetch64(s + len - 16) + detail::Fetch64(s + len - 56); + uint64_t z = detail::HashLen16(detail::Fetch64(s + len - 48) + len, detail::Fetch64(s + len - 24)); + detail::pair v = detail::WeakHashLen32WithSeeds(s + len - 64, len, z); + detail::pair w = detail::WeakHashLen32WithSeeds(s + len - 32, y + detail::k1, x); + x = x * detail::k1 + detail::Fetch64(s); + + // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. + len = (len - 1) & ~static_cast(63); + do { + x = detail::Rotate(x + y + v.first + detail::Fetch64(s + 8), 37) * detail::k1; + y = detail::Rotate(y + v.second + detail::Fetch64(s + 48), 42) * detail::k1; + x ^= w.second; + y += v.first + detail::Fetch64(s + 40); + z = detail::Rotate(z + w.first, 33) * detail::k1; + v = detail::WeakHashLen32WithSeeds(s, v.second * detail::k1, x + w.first); + w = detail::WeakHashLen32WithSeeds(s + 32, z + w.second, y + detail::Fetch64(s + 16)); + detail::swap(z, x); + s += 64; + len -= 64; + } while (len != 0); + return detail::HashLen16(detail::HashLen16(v.first, w.first) + detail::ShiftMix(y) * detail::k1 + z, + detail::HashLen16(v.second, w.second) + x); +} + +[[nodiscard]] +constexpr uint64_t CityHash64WithSeeds(const char *s, size_t len, + uint64_t seed0, uint64_t seed1) noexcept { + return detail::HashLen16(CityHash64(s, len) - seed0, seed1); +} + +[[nodiscard]] +constexpr uint64_t CityHash64WithSeed(const char *s, size_t len, uint64_t seed) noexcept { + return CityHash64WithSeeds(s, len, detail::k2, seed); +} + +[[nodiscard]] +constexpr uint128 CityHash128WithSeed(const char *s, size_t len, uint128 seed) noexcept { + if (len < 128) { + return detail::CityMurmur(s, len, seed); + } + + // We expect len >= 128 to be the common case. Keep 56 bytes of state: + // v, w, x, y, and z. + detail::pair v, w; + uint64_t x = detail::Uint128Low64(seed); + uint64_t y = detail::Uint128High64(seed); + uint64_t z = len * detail::k1; + v.first = detail::Rotate(y ^ detail::k1, 49) * detail::k1 + detail::Fetch64(s); + v.second = detail::Rotate(v.first, 42) * detail::k1 + detail::Fetch64(s + 8); + w.first = detail::Rotate(y + z, 35) * detail::k1 + x; + w.second = detail::Rotate(x + detail::Fetch64(s + 88), 53) * detail::k1; + + // This is the same inner loop as CityHash64(), manually unrolled. + do { + x = detail::Rotate(x + y + v.first + detail::Fetch64(s + 8), 37) * detail::k1; + y = detail::Rotate(y + v.second + detail::Fetch64(s + 48), 42) * detail::k1; + x ^= w.second; + y += v.first + detail::Fetch64(s + 40); + z = detail::Rotate(z + w.first, 33) * detail::k1; + v = detail::WeakHashLen32WithSeeds(s, v.second * detail::k1, x + w.first); + w = detail::WeakHashLen32WithSeeds(s + 32, z + w.second, y + detail::Fetch64(s + 16)); + detail::swap(z, x); + s += 64; + x = detail::Rotate(x + y + v.first + detail::Fetch64(s + 8), 37) * detail::k1; + y = detail::Rotate(y + v.second + detail::Fetch64(s + 48), 42) * detail::k1; + x ^= w.second; + y += v.first + detail::Fetch64(s + 40); + z = detail::Rotate(z + w.first, 33) * detail::k1; + v = detail::WeakHashLen32WithSeeds(s, v.second * detail::k1, x + w.first); + w = detail::WeakHashLen32WithSeeds(s + 32, z + w.second, y + detail::Fetch64(s + 16)); + detail::swap(z, x); + s += 64; + len -= 128; + } while (LIKELY(len >= 128)); + x += detail::Rotate(v.first + z, 49) * detail::k0; + y = y * detail::k0 + detail::Rotate(w.second, 37); + z = z * detail::k0 + detail::Rotate(w.first, 27); + w.first *= 9; + v.first *= detail::k0; + // If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s. + for (size_t tail_done = 0; tail_done < len; ) { + tail_done += 32; + y = detail::Rotate(x + y, 42) * detail::k0 + v.second; + w.first += detail::Fetch64(s + len - tail_done + 16); + x = x * detail::k0 + w.first; + z += w.second + detail::Fetch64(s + len - tail_done); + w.second += v.first; + v = detail::WeakHashLen32WithSeeds(s + len - tail_done, v.first + z, v.second); + v.first *= detail::k0; + } + // At this point our 56 bytes of state should contain more than + // enough information for a strong 128-bit hash. We use two + // different 56-byte-to-8-byte hashes to get a 16-byte final result. + x = detail::HashLen16(x, v.first); + y = detail::HashLen16(y + z, w.first); + return uint128(detail::HashLen16(x + v.second, w.second) + y, + detail::HashLen16(x + w.second, y + v.second)); +} + +[[nodiscard]] +constexpr uint128 CityHash128(const char *s, size_t len) noexcept { + return len >= 16 ? + CityHash128WithSeed(s + 16, len - 16, + uint128(detail::Fetch64(s), detail::Fetch64(s + 8) + detail::k0)) : + CityHash128WithSeed(s, len, uint128(detail::k0, detail::k1)); +} + +} + +#endif // CITY_HASH_H_ diff --git a/deps/ox/src/ox/model/walk.hpp b/deps/ox/src/ox/model/walk.hpp index d1bc29dc..784bddc7 100644 --- a/deps/ox/src/ox/model/walk.hpp +++ b/deps/ox/src/ox/model/walk.hpp @@ -64,7 +64,7 @@ constexpr Error DataWalker::read(const DescriptorField &f, Reader *rd template constexpr void DataWalker::pushNamePath(const FieldName &fn) noexcept { - m_path.push_back(fn); + m_path.emplace_back(fn); } template diff --git a/deps/ox/src/ox/std/CMakeLists.txt b/deps/ox/src/ox/std/CMakeLists.txt index 6c958be6..6c88ca49 100644 --- a/deps/ox/src/ox/std/CMakeLists.txt +++ b/deps/ox/src/ox/std/CMakeLists.txt @@ -81,6 +81,7 @@ target_link_libraries( OxStd PUBLIC $<$:gcc> OxTraceHook + CityHash ) install( diff --git a/deps/ox/src/ox/std/hash.hpp b/deps/ox/src/ox/std/hash.hpp new file mode 100644 index 00000000..8f7e9233 --- /dev/null +++ b/deps/ox/src/ox/std/hash.hpp @@ -0,0 +1,170 @@ +/* + * Copyright 2015 - 2024 gary@drinkingtea.net + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + */ + +#pragma once + +#if __has_include() +#include +#else +namespace std { +template +struct hash; +} +#endif + +#include + +#include "istring.hpp" +#include "string.hpp" +#include "stringview.hpp" +#include "types.hpp" + +namespace ox { + +template +struct hash { + [[nodiscard]] + constexpr size_t operator()(T const&v) const noexcept { + return std::hash{}(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(bool v) const noexcept { + return static_cast(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(char v) const noexcept { + return static_cast(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(short v) const noexcept { + return static_cast(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(int v) const noexcept { + return static_cast(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(long v) const noexcept { + return static_cast(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(long long v) const noexcept { + return static_cast(v); + } +}; + + + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(unsigned char v) const noexcept { + return static_cast(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(unsigned short v) const noexcept { + return static_cast(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(unsigned int v) const noexcept { + return static_cast(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(unsigned long v) const noexcept { + return static_cast(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(unsigned long long v) const noexcept { + return static_cast(v); + } +}; + + +[[nodiscard]] +constexpr auto hashString(ox::StringView const&v) noexcept { + if constexpr(sizeof(void*) == 8) { + return cityhash::CityHash64(v.data(), v.bytes()); + } else { + return cityhash::CityHash32(v.data(), v.bytes()); + } +} + + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(ox::StringView v) const noexcept { + return hashString(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(ox::StringView v) const noexcept { + return hashString(v); + } +}; + +template +struct hash> { + [[nodiscard]] + constexpr size_t operator()(ox::StringView v) const noexcept { + return hashString(v); + } +}; + +template<> +struct hash { + [[nodiscard]] + constexpr size_t operator()(ox::StringView v) const noexcept { + return hashString(v); + } +}; + +} diff --git a/deps/ox/src/ox/std/hashmap.hpp b/deps/ox/src/ox/std/hashmap.hpp index 7ddee6c3..2475359d 100644 --- a/deps/ox/src/ox/std/hashmap.hpp +++ b/deps/ox/src/ox/std/hashmap.hpp @@ -9,6 +9,7 @@ #pragma once #include "algorithm.hpp" +#include "hash.hpp" #include "ignore.hpp" #include "stringview.hpp" #include "strops.hpp" @@ -32,7 +33,7 @@ class HashMap { Vector m_pairs; public: - explicit constexpr HashMap(std::size_t size = 100); + explicit constexpr HashMap(std::size_t size = 127); constexpr HashMap(HashMap const&other); @@ -68,10 +69,6 @@ class HashMap { private: constexpr void expand(); - constexpr static uint64_t hash(Integral_c auto) noexcept; - - constexpr static uint64_t hash(StringView const&) noexcept; - template constexpr Pair *const&access(Vector const&pairs, KK const&key) const; @@ -172,14 +169,14 @@ constexpr void HashMap::erase(MaybeView_t const&k) { if (!contains(k)) { return; } - auto h = hash(k) % m_pairs.size(); + auto h = ox::hash>{}(k) % m_pairs.size(); while (true) { const auto &p = m_pairs[h]; if (p == nullptr || p->key == k) { std::ignore = m_pairs.erase(h); break; } else { - h = hash(h) % m_pairs.size(); + h = ox::hash>{}(k) % m_pairs.size(); } } std::ignore = m_keys.erase(ox::find(m_keys.begin(), m_keys.end(), k)); @@ -206,7 +203,7 @@ constexpr void HashMap::clear() { delete m_pairs[i]; } m_pairs.clear(); - m_pairs.resize(100); + m_pairs.resize(127); } template @@ -219,30 +216,10 @@ constexpr void HashMap::expand() { m_pairs = std::move(r); } -template -constexpr uint64_t HashMap::hash(Integral_c auto k) noexcept { - uint64_t sum = 1; - for (auto i = 0u; i < sizeof(K); ++i) { - const auto shift = i * 8; - const auto v = static_cast(k >> shift & 0xff); - sum += (sum + v) * 7 * sum; - } - return sum; -} - -template -constexpr uint64_t HashMap::hash(StringView const&k) noexcept { - uint64_t sum = 1; - for (auto i = 0u; i < k.len(); ++i) { - sum += ((sum + static_cast(k[i])) * 7) * sum; - } - return sum; -} - template template constexpr typename HashMap::Pair *const&HashMap::access(Vector const&pairs, KK const&k) const { - auto h = static_cast(hash(k) % pairs.size()); + auto h = static_cast(ox::hash{}(k) % pairs.size()); while (true) { const auto &p = pairs[h]; if (p == nullptr || p->key == k) { @@ -256,7 +233,7 @@ constexpr typename HashMap::Pair *const&HashMap::access(Vector template constexpr typename HashMap::Pair *&HashMap::access(Vector &pairs, KK const&k) { - auto h = static_cast(hash(k) % pairs.size()); + auto h = static_cast(ox::hash{}(k) % pairs.size()); while (true) { auto &p = pairs[h]; if (p == nullptr || p->key == k) { diff --git a/deps/ox/src/ox/std/istring.hpp b/deps/ox/src/ox/std/istring.hpp index eb02dc7a..b7eef79a 100644 --- a/deps/ox/src/ox/std/istring.hpp +++ b/deps/ox/src/ox/std/istring.hpp @@ -282,4 +282,9 @@ constexpr std::size_t IString::cap() const noexcept { return buffLen; } +template +struct MaybeView> { + using type = ox::StringView; +}; + } diff --git a/deps/ox/src/ox/std/maybeview.hpp b/deps/ox/src/ox/std/maybeview.hpp index 01ad86cf..149cd97f 100644 --- a/deps/ox/src/ox/std/maybeview.hpp +++ b/deps/ox/src/ox/std/maybeview.hpp @@ -15,7 +15,7 @@ namespace ox { // Maybe StringView. If T is a string type, MaybeType::type/MaybeView_t is a // StringView. This avoids creating unnecessary Strings when taking a // StringView or C string as a function argument. -template> +template struct MaybeView { using type = T; }; diff --git a/deps/ox/src/ox/std/string.hpp b/deps/ox/src/ox/std/string.hpp index 20ebeeaa..c3908681 100644 --- a/deps/ox/src/ox/std/string.hpp +++ b/deps/ox/src/ox/std/string.hpp @@ -589,4 +589,9 @@ constexpr auto alignOf(const ox::BasicString&) noexcept { return alignOf(&v); } +template +struct MaybeView> { + using type = ox::StringView; +}; + } diff --git a/deps/ox/src/ox/std/stringview.hpp b/deps/ox/src/ox/std/stringview.hpp index 1b3c83e9..63f57a9a 100644 --- a/deps/ox/src/ox/std/stringview.hpp +++ b/deps/ox/src/ox/std/stringview.hpp @@ -98,11 +98,6 @@ constexpr auto toStdStringView(CRStringView sv) noexcept { #endif -template -struct MaybeView { - using type = ox::StringView; -}; - constexpr ox::Result atoi(ox::CRStringView str) noexcept { int total = 0; int multiplier = 1; diff --git a/deps/ox/src/ox/std/utility.hpp b/deps/ox/src/ox/std/utility.hpp index d01283eb..5a69405a 100644 --- a/deps/ox/src/ox/std/utility.hpp +++ b/deps/ox/src/ox/std/utility.hpp @@ -8,7 +8,27 @@ #pragma once +#if __has_include() +#include #include "typetraits.hpp" +#else +#include "typetraits.hpp" +namespace std { + +template +constexpr typename ox::remove_reference::type &&move(T &&t) noexcept { + return static_cast::type&&>(t); +} + +template +constexpr void swap(T &a, T &b) noexcept { + auto temp = std::move(a); + a = std::move(b); + b = std::move(temp); +} + +} +#endif namespace ox { @@ -30,14 +50,3 @@ constexpr T &&forward(remove_reference_t &&t) noexcept { } - -#if __has_include() -#include -#else -namespace std { -template -constexpr typename ox::remove_reference::type &&move(T &&t) noexcept { - return static_cast::type&&>(t); -} -} -#endif diff --git a/deps/ox/src/ox/std/vector.hpp b/deps/ox/src/ox/std/vector.hpp index ae5de71c..7811e1e7 100644 --- a/deps/ox/src/ox/std/vector.hpp +++ b/deps/ox/src/ox/std/vector.hpp @@ -272,9 +272,9 @@ class Vector: detail::VectorAllocator { constexpr bool contains(MaybeView_t const&) const noexcept(useNoexcept); constexpr iterator insert( - std::size_t pos, std::size_t cnt, MaybeView_t const&val) noexcept(useNoexcept); + std::size_t pos, std::size_t cnt, T val) noexcept(useNoexcept); - constexpr iterator insert(std::size_t pos, MaybeView_t const&val) noexcept(useNoexcept); + constexpr iterator insert(std::size_t pos, T val) noexcept(useNoexcept); template constexpr iterator emplace(std::size_t pos, Args&&... args) noexcept(useNoexcept); @@ -282,9 +282,7 @@ class Vector: detail::VectorAllocator { template constexpr T &emplace_back(Args&&... args) noexcept(useNoexcept); - constexpr void push_back(T &&item) noexcept(useNoexcept); - - constexpr void push_back(MaybeView_t const&item) noexcept(useNoexcept); + constexpr void push_back(T item) noexcept(useNoexcept); constexpr void pop_back() noexcept(useNoexcept); @@ -534,14 +532,14 @@ constexpr bool Vector::contains(MaybeView_t co template constexpr typename Vector::template iterator Vector::insert( - std::size_t pos, std::size_t cnt, MaybeView_t const&val) noexcept(useNoexcept) { + std::size_t pos, std::size_t cnt, T val) noexcept(useNoexcept) { if (m_size + cnt > m_cap) { reserveInsert(m_cap ? m_size + cnt : initialCap, pos, cnt); if (pos < m_size) { - m_items[pos] = val; + m_items[pos] = std::move(val); } else { for (auto i = 0u; i < cnt; ++i) { - std::construct_at(&m_items[pos + i], val); + std::construct_at(&m_items[pos + i], m_items[pos]); } } } else { @@ -549,10 +547,10 @@ Vector::insert( for (auto i = m_size + cnt - 1; i > pos; --i) { std::construct_at(&m_items[i], std::move(m_items[i - cnt])); } - m_items[pos] = val; + m_items[pos] = std::move(val); } else { for (auto i = 0u; i < cnt; ++i) { - std::construct_at(&m_items[pos + i], val); + std::construct_at(&m_items[pos + i], m_items[pos]); } } } @@ -562,22 +560,22 @@ Vector::insert( template constexpr typename Vector::template iterator -Vector::insert(std::size_t pos, MaybeView_t const&val) noexcept(useNoexcept) { +Vector::insert(std::size_t pos, T val) noexcept(useNoexcept) { if (m_size == m_cap) { reserveInsert(m_cap ? m_cap * 2 : initialCap, pos); if (pos < m_size) { - m_items[pos] = val; + m_items[pos] = std::move(val); } else { - std::construct_at(&m_items[pos], val); + std::construct_at(&m_items[pos], m_items[pos]); } } else { if (pos < m_size) { for (auto i = m_size; i > pos; --i) { std::construct_at(&m_items[i], std::move(m_items[i - 1])); } - m_items[pos] = val; + m_items[pos] = std::move(val); } else { - std::construct_at(&m_items[pos], val); + std::construct_at(&m_items[pos], m_items[pos]); } } ++m_size; @@ -619,7 +617,7 @@ constexpr T &Vector::emplace_back(Args&&... args) } template -constexpr void Vector::push_back(T &&item) noexcept(useNoexcept) { +constexpr void Vector::push_back(T item) noexcept(useNoexcept) { if (m_size == m_cap) { reserve(m_cap ? m_cap * 2 : initialCap); } @@ -627,15 +625,6 @@ constexpr void Vector::push_back(T &&item) noexce ++m_size; } -template -constexpr void Vector::push_back(MaybeView_t const&item) noexcept(useNoexcept) { - if (m_size == m_cap) { - reserve(m_cap ? m_cap * 2 : initialCap); - } - std::construct_at(&m_items[m_size], item); - ++m_size; -} - template constexpr void Vector::pop_back() noexcept(useNoexcept) { --m_size;