segment.hpp
14.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
//
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
//
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt
#ifndef BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
#define BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
#include <boost/locale/config.hpp>
#include <boost/locale/util/string.hpp>
#include <iosfwd>
#include <iterator>
#include <locale>
#include <string>
#ifdef BOOST_MSVC
# pragma warning(push)
# pragma warning(disable : 4275 4251 4231 4660)
#endif
namespace boost { namespace locale { namespace boundary {
/// \cond INTERNAL
namespace detail {
template<typename LeftIterator, typename RightIterator>
int compare_text(LeftIterator l_begin, LeftIterator l_end, RightIterator r_begin, RightIterator r_end)
{
typedef LeftIterator left_iterator;
typedef typename std::iterator_traits<left_iterator>::value_type char_type;
typedef std::char_traits<char_type> traits;
while(l_begin != l_end && r_begin != r_end) {
char_type lchar = *l_begin++;
char_type rchar = *r_begin++;
if(traits::eq(lchar, rchar))
continue;
if(traits::lt(lchar, rchar))
return -1;
else
return 1;
}
if(l_begin == l_end && r_begin == r_end)
return 0;
if(l_begin == l_end)
return -1;
else
return 1;
}
template<typename Left, typename Right>
int compare_text(const Left& l, const Right& r)
{
return compare_text(l.begin(), l.end(), r.begin(), r.end());
}
template<typename Left, typename Char>
int compare_string(const Left& l, const Char* begin)
{
return compare_text(l.begin(), l.end(), begin, util::str_end(begin));
}
template<typename Right, typename Char>
int compare_string(const Char* begin, const Right& r)
{
return compare_text(begin, util::str_end(begin), r.begin(), r.end());
}
} // namespace detail
/// \endcond
/// \addtogroup boundary
/// @{
/// \brief a segment object that represents a pair of two iterators that define the range where
/// this segment exits and a rule that defines it.
///
/// This type of object is dereferenced by the iterators of segment_index. Using a rule() member function
/// you can get a specific rule this segment was selected with. For example, when you use
/// word boundary analysis, you can check if the specific word contains Kana letters by checking (rule() & \ref
/// word_kana)!=0 For a sentence analysis you can check if the sentence is selected because a sentence terminator is
/// found (\ref sentence_term) or there is a line break (\ref sentence_sep).
///
/// This object can be automatically converted to std::basic_string with the same type of character. It is also
/// valid range that has begin() and end() member functions returning iterators on the location of the segment.
///
/// \see
///
/// - \ref segment_index
/// - \ref boundary_point
/// - \ref boundary_point_index
template<typename IteratorType>
class segment : public std::pair<IteratorType, IteratorType> {
public:
/// The type of the underlying character
typedef typename std::iterator_traits<IteratorType>::value_type char_type;
/// The type of the string it is converted to
typedef std::basic_string<char_type> string_type;
/// The value that iterators return - the character itself
typedef char_type value_type;
/// The iterator that allows to iterate the range
typedef IteratorType iterator;
/// The iterator that allows to iterate the range
typedef IteratorType const_iterator;
/// The type that represent a difference between two iterators
typedef typename std::iterator_traits<IteratorType>::difference_type difference_type;
/// Default constructor
segment() : rule_(0) {}
/// Create a segment using two iterators and a rule that represents this point
segment(iterator b, iterator e, rule_type r) : std::pair<IteratorType, IteratorType>(b, e), rule_(r) {}
/// Set the start of the range
void begin(const iterator& v) { this->first = v; }
/// Set the end of the range
void end(const iterator& v) { this->second = v; }
/// Get the start of the range
IteratorType begin() const { return this->first; }
/// Set the end of the range
IteratorType end() const { return this->second; }
/// Convert the range to a string automatically
template<class T, class A>
operator std::basic_string<char_type, T, A>() const
{
return std::basic_string<char_type, T, A>(this->first, this->second);
}
/// Create a string from the range explicitly
string_type str() const { return string_type(begin(), end()); }
/// Get the length of the text chunk
size_t length() const { return std::distance(begin(), end()); }
/// Check if the segment is empty
bool empty() const { return begin() == end(); }
/// Get the rule that is used for selection of this segment.
rule_type rule() const { return rule_; }
/// Set a rule that is used for segment selection
void rule(rule_type r) { rule_ = r; }
// make sure we override std::pair's operator==
/// Compare two segments
bool operator==(const segment& other) const { return detail::compare_text(*this, other) == 0; }
/// Compare two segments
bool operator!=(const segment& other) const { return detail::compare_text(*this, other) != 0; }
private:
rule_type rule_;
};
/// Compare two segments
template<typename IteratorL, typename IteratorR>
bool operator==(const segment<IteratorL>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) == 0;
}
/// Compare two segments
template<typename IteratorL, typename IteratorR>
bool operator!=(const segment<IteratorL>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) != 0;
}
/// Compare two segments
template<typename IteratorL, typename IteratorR>
bool operator<(const segment<IteratorL>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) < 0;
}
/// Compare two segments
template<typename IteratorL, typename IteratorR>
bool operator<=(const segment<IteratorL>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) <= 0;
}
/// Compare two segments
template<typename IteratorL, typename IteratorR>
bool operator>(const segment<IteratorL>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) > 0;
}
/// Compare two segments
template<typename IteratorL, typename IteratorR>
bool operator>=(const segment<IteratorL>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) >= 0;
}
/// Compare string and segment
template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
bool operator==(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) == 0;
}
/// Compare string and segment
template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
bool operator!=(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) != 0;
}
/// Compare string and segment
template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
bool operator<(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) < 0;
}
/// Compare string and segment
template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
bool operator<=(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) <= 0;
}
/// Compare string and segment
template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
bool operator>(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) > 0;
}
/// Compare string and segment
template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
bool operator>=(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
{
return detail::compare_text(l, r) >= 0;
}
/// Compare string and segment
template<typename Iterator, typename CharType, typename Traits, typename Alloc>
bool operator==(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
{
return detail::compare_text(l, r) == 0;
}
/// Compare string and segment
template<typename Iterator, typename CharType, typename Traits, typename Alloc>
bool operator!=(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
{
return detail::compare_text(l, r) != 0;
}
/// Compare string and segment
template<typename Iterator, typename CharType, typename Traits, typename Alloc>
bool operator<(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
{
return detail::compare_text(l, r) < 0;
}
/// Compare string and segment
template<typename Iterator, typename CharType, typename Traits, typename Alloc>
bool operator<=(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
{
return detail::compare_text(l, r) <= 0;
}
/// Compare string and segment
template<typename Iterator, typename CharType, typename Traits, typename Alloc>
bool operator>(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
{
return detail::compare_text(l, r) > 0;
}
/// Compare string and segment
template<typename Iterator, typename CharType, typename Traits, typename Alloc>
bool operator>=(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
{
return detail::compare_text(l, r) >= 0;
}
/// Compare C string and segment
template<typename CharType, typename IteratorR>
bool operator==(const CharType* l, const segment<IteratorR>& r)
{
return detail::compare_string(l, r) == 0;
}
/// Compare C string and segment
template<typename CharType, typename IteratorR>
bool operator!=(const CharType* l, const segment<IteratorR>& r)
{
return detail::compare_string(l, r) != 0;
}
/// Compare C string and segment
template<typename CharType, typename IteratorR>
bool operator<(const CharType* l, const segment<IteratorR>& r)
{
return detail::compare_string(l, r) < 0;
}
/// Compare C string and segment
template<typename CharType, typename IteratorR>
bool operator<=(const CharType* l, const segment<IteratorR>& r)
{
return detail::compare_string(l, r) <= 0;
}
/// Compare C string and segment
template<typename CharType, typename IteratorR>
bool operator>(const CharType* l, const segment<IteratorR>& r)
{
return detail::compare_string(l, r) > 0;
}
/// Compare C string and segment
template<typename CharType, typename IteratorR>
bool operator>=(const CharType* l, const segment<IteratorR>& r)
{
return detail::compare_string(l, r) >= 0;
}
/// Compare C string and segment
template<typename Iterator, typename CharType>
bool operator==(const segment<Iterator>& l, const CharType* r)
{
return detail::compare_string(l, r) == 0;
}
/// Compare C string and segment
template<typename Iterator, typename CharType>
bool operator!=(const segment<Iterator>& l, const CharType* r)
{
return detail::compare_string(l, r) != 0;
}
/// Compare C string and segment
template<typename Iterator, typename CharType>
bool operator<(const segment<Iterator>& l, const CharType* r)
{
return detail::compare_string(l, r) < 0;
}
/// Compare C string and segment
template<typename Iterator, typename CharType>
bool operator<=(const segment<Iterator>& l, const CharType* r)
{
return detail::compare_string(l, r) <= 0;
}
/// Compare C string and segment
template<typename Iterator, typename CharType>
bool operator>(const segment<Iterator>& l, const CharType* r)
{
return detail::compare_string(l, r) > 0;
}
/// Compare C string and segment
template<typename Iterator, typename CharType>
bool operator>=(const segment<Iterator>& l, const CharType* r)
{
return detail::compare_string(l, r) >= 0;
}
typedef segment<std::string::const_iterator> ssegment; ///< convenience typedef
typedef segment<std::wstring::const_iterator> wssegment; ///< convenience typedef
#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
typedef segment<std::u16string::const_iterator> u16ssegment; ///< convenience typedef
#endif
#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
typedef segment<std::u32string::const_iterator> u32ssegment; ///< convenience typedef
#endif
typedef segment<const char*> csegment; ///< convenience typedef
typedef segment<const wchar_t*> wcsegment; ///< convenience typedef
#ifdef BOOST_LOCALE_ENABLE_CHAR16_T
typedef segment<const char16_t*> u16csegment; ///< convenience typedef
#endif
#ifdef BOOST_LOCALE_ENABLE_CHAR32_T
typedef segment<const char32_t*> u32csegment; ///< convenience typedef
#endif
/// Write the segment to the stream character by character
template<typename CharType, typename TraitsType, typename Iterator>
std::basic_ostream<CharType, TraitsType>& operator<<(std::basic_ostream<CharType, TraitsType>& out,
const segment<Iterator>& tok)
{
for(Iterator p = tok.begin(), e = tok.end(); p != e; ++p)
out << *p;
return out;
}
/// @}
}}} // namespace boost::locale::boundary
#ifdef BOOST_MSVC
# pragma warning(pop)
#endif
#endif