/************************************************************************* * * Copyright 2016 Realm Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **************************************************************************/ #ifndef REALM_STRING_HPP #define REALM_STRING_HPP #include #include #include #include #include #include #include #include #include #include #include namespace realm { /// Selects CityHash64 on 64-bit platforms, and Murmur2 on 32-bit platforms. /// This is what libc++ does, and it is a good general choice for a /// non-cryptographic hash function (suitable for std::unordered_map etc.). size_t murmur2_or_cityhash(const unsigned char* data, size_t len) noexcept; uint_least32_t murmur2_32(const unsigned char* data, size_t len) noexcept; uint_least64_t cityhash_64(const unsigned char* data, size_t len) noexcept; /// A reference to a chunk of character data. /// /// An instance of this class can be thought of as a type tag on a region of /// memory. It does not own the referenced memory, nor does it in any other way /// attempt to manage the lifetime of it. /// /// A null character inside the referenced region is considered a part of the /// string by Realm. /// /// For compatibility with C-style strings, when a string is stored in a Realm /// database, it is always followed by a terminating null character, regardless /// of whether the string itself has internal null characters. This means that /// when a StringData object is extracted from Realm, the referenced region is /// guaranteed to be followed immediately by an extra null character, but that /// null character is not inside the referenced region. Therefore, all of the /// following forms are guaranteed to return a pointer to a null-terminated /// string: /// /// \code{.cpp} /// /// group.get_table_name(...).data() /// table.get_column_name().data() /// table.get_string(...).data() /// table.get_mixed(...).get_string().data() /// /// \endcode /// /// Note that in general, no assumptions can be made about what follows a string /// that is referenced by a StringData object, or whether anything follows it at /// all. In particular, the receiver of a StringData object cannot assume that /// the referenced string is followed by a null character unless there is an /// externally provided guarantee. /// /// This class makes it possible to distinguish between a 'null' reference and a /// reference to the empty string (see is_null()). /// /// \sa BinaryData /// \sa Mixed class StringData { public: /// Construct a null reference. StringData() noexcept; StringData(int) = delete; /// If \a external_data is 'null', \a data_size must be zero. StringData(const char* external_data, size_t data_size) noexcept; template StringData(const std::basic_string&); template operator std::basic_string() const; template StringData(const util::Optional>&); StringData(std::string_view sv); StringData(const null&) noexcept; /// Initialize from a zero terminated C style string. Pass null to construct /// a null reference. StringData(const char* c_str) noexcept; char operator[](size_t i) const noexcept; const char* data() const noexcept; size_t size() const noexcept; /// Is this a null reference? /// /// An instance of StringData is a null reference when, and only when the /// stored size is zero (size()) and the stored pointer is the null pointer /// (data()). /// /// In the case of the empty string, the stored size is still zero, but the /// stored pointer is **not** the null pointer. It could for example point /// to the empty string literal. Note that the actual value of the pointer /// is immaterial in this case (as long as it is not zero), because when the /// size is zero, it is an error to dereference the pointer. /// /// Conversion of a StringData object to `bool` yields the logical negation /// of the result of calling this function. In other words, a StringData /// object is converted to true if it is not the null reference, otherwise /// it is converted to false. bool is_null() const noexcept; friend bool operator==(const StringData&, const StringData&) noexcept; friend bool operator!=(const StringData&, const StringData&) noexcept; //@{ /// Trivial bytewise lexicographical comparison. friend bool operator<(const StringData&, const StringData&) noexcept; friend bool operator>(const StringData&, const StringData&) noexcept; friend bool operator<=(const StringData&, const StringData&) noexcept; friend bool operator>=(const StringData&, const StringData&) noexcept; //@} bool begins_with(StringData) const noexcept; bool ends_with(StringData) const noexcept; bool contains(StringData) const noexcept; bool contains(StringData d, const std::array &charmap) const noexcept; // Wildcard matching ('?' for single char, '*' for zero or more chars) // case insensitive version in unicode.hpp bool like(StringData) const noexcept; //@{ /// Undefined behavior if \a n, \a i, or i+n is greater than /// size(). StringData prefix(size_t n) const noexcept; StringData suffix(size_t n) const noexcept; StringData substr(size_t i, size_t n) const noexcept; StringData substr(size_t i) const noexcept; //@} template friend std::basic_ostream& operator<<(std::basic_ostream&, const StringData&); explicit operator bool() const noexcept; explicit operator std::string_view() const noexcept { return std::string_view(m_data); } /// If the StringData is NULL, the hash is 0. Otherwise, the function /// `murmur2_or_cityhash()` is called on the data. size_t hash() const noexcept; private: const char* m_data; size_t m_size; static bool matchlike(const StringData& text, const StringData& pattern) noexcept; static bool matchlike_ins(const StringData& text, const StringData& pattern_upper, const StringData& pattern_lower) noexcept; friend bool string_like_ins(StringData, StringData) noexcept; friend bool string_like_ins(StringData, StringData, StringData) noexcept; }; // Implementation: inline StringData::StringData() noexcept : m_data(nullptr) , m_size(0) { } inline StringData::StringData(const char* external_data, size_t data_size) noexcept : m_data(external_data) , m_size(data_size) { REALM_ASSERT_DEBUG(external_data || data_size == 0); } inline StringData::StringData(std::string_view sv) : m_data(sv.data()) , m_size(sv.size()) { } template inline StringData::StringData(const std::basic_string& s) : m_data(s.data()) , m_size(s.size()) { } template inline StringData::operator std::basic_string() const { return std::basic_string(m_data, m_size); } template inline StringData::StringData(const util::Optional>& s) : m_data(s ? s->data() : nullptr) , m_size(s ? s->size() : 0) { } inline StringData::StringData(const null&) noexcept : m_data(nullptr) , m_size(0) { } inline StringData::StringData(const char* c_str) noexcept : m_data(c_str) , m_size(0) { if (c_str) m_size = std::char_traits::length(c_str); } inline char StringData::operator[](size_t i) const noexcept { return m_data[i]; } inline const char* StringData::data() const noexcept { return m_data; } inline size_t StringData::size() const noexcept { return m_size; } inline bool StringData::is_null() const noexcept { return !m_data; } inline bool operator==(const StringData& a, const StringData& b) noexcept { return a.m_size == b.m_size && a.is_null() == b.is_null() && safe_equal(a.m_data, a.m_data + a.m_size, b.m_data); } inline bool operator!=(const StringData& a, const StringData& b) noexcept { return !(a == b); } inline bool operator<(const StringData& a, const StringData& b) noexcept { if (a.is_null() && !b.is_null()) { // Null strings are smaller than all other strings, and not // equal to empty strings. return true; } return std::lexicographical_compare(a.m_data, a.m_data + a.m_size, b.m_data, b.m_data + b.m_size); } inline bool operator>(const StringData& a, const StringData& b) noexcept { return b < a; } inline bool operator<=(const StringData& a, const StringData& b) noexcept { return !(b < a); } inline bool operator>=(const StringData& a, const StringData& b) noexcept { return !(a < b); } inline bool StringData::begins_with(StringData d) const noexcept { if (is_null() && !d.is_null()) return false; return d.m_size <= m_size && safe_equal(m_data, m_data + d.m_size, d.m_data); } inline bool StringData::ends_with(StringData d) const noexcept { if (is_null() && !d.is_null()) return false; return d.m_size <= m_size && safe_equal(m_data + m_size - d.m_size, m_data + m_size, d.m_data); } inline bool StringData::contains(StringData d) const noexcept { if (is_null() && !d.is_null()) return false; return d.m_size == 0 || std::search(m_data, m_data + m_size, d.m_data, d.m_data + d.m_size) != m_data + m_size; } /// This method takes an array that maps chars to distance that can be moved (and zero for chars not in needle), /// allowing the method to apply Boyer-Moore for quick substring search /// The map is calculated in the StringNode class (so it can be reused across searches) inline bool StringData::contains(StringData d, const std::array &charmap) const noexcept { if (is_null() && !d.is_null()) return false; size_t needle_size = d.size(); if (needle_size == 0) return true; // Prepare vars to avoid lookups in loop size_t last_char_pos = d.size()-1; unsigned char lastChar = d[last_char_pos]; // Do Boyer-Moore search size_t p = last_char_pos; while (p < m_size) { unsigned char c = m_data[p]; // Get candidate for last char if (c == lastChar) { StringData candidate = substr(p-needle_size+1, needle_size); if (candidate == d) return true; // text found! } // If we don't have a match, see how far we can move char_pos if (charmap[c] == 0) p += needle_size; // char was not present in search string else p += charmap[c]; } return false; } inline bool StringData::like(StringData d) const noexcept { if (is_null() || d.is_null()) { return (is_null() && d.is_null()); } return matchlike(*this, d); } inline StringData StringData::prefix(size_t n) const noexcept { return substr(0, n); } inline StringData StringData::suffix(size_t n) const noexcept { return substr(m_size - n); } inline StringData StringData::substr(size_t i, size_t n) const noexcept { return StringData(m_data + i, n); } inline StringData StringData::substr(size_t i) const noexcept { return substr(i, m_size - i); } template inline std::basic_ostream& operator<<(std::basic_ostream& out, const StringData& d) { if (d.is_null()) { out << ""; } else { for (const char* i = d.m_data; i != d.m_data + d.m_size; ++i) out << *i; } return out; } inline StringData::operator bool() const noexcept { return !is_null(); } inline size_t StringData::hash() const noexcept { if (is_null()) return 0; auto unsigned_data = reinterpret_cast(m_data); return murmur2_or_cityhash(unsigned_data, m_size); } } // namespace realm namespace std { template <> struct hash<::realm::StringData> { inline size_t operator()(const ::realm::StringData& str) const noexcept { return str.hash(); } }; } // namespace std #endif // REALM_STRING_HPP