Blame view

Pods/Realm/include/core/realm/unicode.hpp 7.01 KB
75d24c15   yangbin   123
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
  /*************************************************************************
   *
   * Copyright 2016 Realm Inc.
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   * http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   *
   **************************************************************************/
  
  #ifndef REALM_UNICODE_HPP
  #define REALM_UNICODE_HPP
  
  #include <locale>
  #include <cstdint>
  #include <string>
  
  #include <realm/string_data.hpp>
  #include <realm/util/features.h>
  #include <realm/utilities.hpp>
  
  
  namespace realm {
  
  enum string_compare_method_t {
      STRING_COMPARE_CORE,
      STRING_COMPARE_CPP11,
      STRING_COMPARE_CALLBACK,
      STRING_COMPARE_CORE_SIMILAR
  };
  
  extern StringCompareCallback string_compare_callback;
  extern string_compare_method_t string_compare_method;
  
  // Description for set_string_compare_method():
  //
  // Short summary: iOS language binding: call
  //     set_string_compare_method() for fast but slightly inaccurate sort in some countries, or
  //     set_string_compare_method(2, callbackptr) for slow but precise sort (see callbackptr below)
  //
  // Different countries ('locales') have different sorting order for strings and letters. Because there unfortunatly
  // doesn't exist any unified standardized way to compare strings in C++ on multiple platforms, we need this method.
  //
  // It determins how sorting a TableView by a String column must take place. The 'method' argument can be:
  //
  // 0: Fast core-only compare (no OS/framework calls). LIMITATIONS: Works only upto 'Latin Extended 2' (unicodes
  // 0...591). Also, sorting order is according to 'en_US' so it may be slightly inaccurate for some countries.
  // 'callback' argument is ignored.
  //
  // Return value: Always 'true'
  //
  // 1: Native C++11 method if core is compiled as C++11. Gives precise sorting according
  // to user's current locale. LIMITATIONS: Currently works only on Windows and on Linux with clang. Does NOT work on
  // iOS (due to only 'C' locale being available in CoreFoundation, which puts 'Z' before 'a'). Unknown if works on
  // Windows Phone / Android. Furthermore it does NOT work on Linux with gcc 4.7 or 4.8 (lack of c++11 feature that
  // can convert utf8->wstring without calls to setlocale()).
  //
  // Return value: 'true' if supported, otherwise 'false' (if so, then previous setting, if any, is preserved).
  //
  // 2: Callback method. Language binding / C++ user must provide a utf-8 callback method of prototype:
  // bool callback(const char* string1, const char* string2) where 'callback' must return bool(string1 < string2).
  //
  // Return value: Always 'true'
  //
  // Default is method = 0 if the function is never called
  //
  // NOT THREAD SAFE! Call once during initialization or make sure it's not called simultaneously with different
  // arguments. The setting is remembered per-process; it does NOT need to be called prior to each sort
  bool set_string_compare_method(string_compare_method_t method, StringCompareCallback callback);
  
  
  // Return size in bytes of utf8 character. No error checking
  size_t sequence_length(char lead);
  
  // Limitations for case insensitive string search
  // Case insensitive search (equal, begins_with, ends_with, like and contains)
  // only works for unicodes 0...0x7f which is the same as the 0...127
  // ASCII character set (letters a-z and A-Z).
  
  // In does *not* work for the 0...255 ANSI character set that contains
  // characters from many European countries like Germany, France, Denmark,
  // etc.
  
  // It also does not work for characters from non-western countries like
  // Japan, Russia, Arabia, etc.
  
  // If there exists characters outside the ASCII range either in the text
  // to be searched for, or in the Realm string column which is searched
  // in, then the compare yields a random result such that the row may or
  // may not be included in the result set.
  
  // Return bool(string1 < string2)
  bool utf8_compare(StringData string1, StringData string2);
  
  // Return unicode value of character.
  uint32_t utf8value(const char* character);
  
  inline bool equal_sequence(const char*& begin, const char* end, const char* begin2);
  
  // FIXME: The current approach to case insensitive comparison requires
  // that case mappings can be done in a way that does not change he
  // number of bytes used to encode the individual Unicode
  // character. This is not generally the case, so, as far as I can see,
  // this approach has no future.
  //
  // FIXME: The current approach to case insensitive comparison relies
  // on checking each "haystack" character against the corresponding
  // character in both a lower cased and an upper cased version of the
  // "needle". While this leads to efficient comparison, it ignores the
  // fact that "case folding" is the only correct approach to case
  // insensitive comparison in a locale agnostic Unicode
  // environment.
  //
  // See
  //   http://www.w3.org/International/wiki/Case_folding
  //   http://userguide.icu-project.org/transforms/casemappings#TOC-Case-Folding.
  //
  // The ideal API would probably be something like this:
  //
  //   case_fold:        utf_8 -> case_folded
  //   equal_case_fold:  (needle_case_folded, single_haystack_entry_utf_8) -> found
  //   search_case_fold: (needle_case_folded, huge_haystack_string_utf_8) -> found_at_position
  //
  // The case folded form would probably be using UTF-32 or UTF-16.
  
  
  /// If successful, returns a string of the same size as \a source.
  /// Returns none if invalid UTF-8 encoding was encountered.
  util::Optional<std::string> case_map(StringData source, bool upper);
  
  enum IgnoreErrorsTag { IgnoreErrors };
  std::string case_map(StringData source, bool upper, IgnoreErrorsTag);
  
  /// Assumes that the sizes of \a needle_upper and \a needle_lower are
  /// identical to the size of \a haystack. Returns false if the needle
  /// is different from the haystack.
  bool equal_case_fold(StringData haystack, const char* needle_upper, const char* needle_lower);
  
  /// Assumes that the sizes of \a needle_upper and \a needle_lower are
  /// both equal to \a needle_size. Returns haystack.size() if the
  /// needle was not found.
  size_t search_case_fold(StringData haystack, const char* needle_upper, const char* needle_lower, size_t needle_size);
      
  /// Assumes that the sizes of \a needle_upper and \a needle_lower are
  /// both equal to \a needle_size. Returns false if the
  /// needle was not found.
  bool contains_ins(StringData haystack, const char* needle_upper, const char* needle_lower, size_t needle_size, const std::array<uint8_t, 256> &charmap);
  
  /// Case insensitive wildcard matching ('?' for single char, '*' for zero or more chars)
  bool string_like_ins(StringData text, StringData pattern) noexcept;
  bool string_like_ins(StringData text, StringData upper, StringData lower) noexcept;
  
  } // namespace realm
  
  #endif // REALM_UNICODE_HPP