libstdc++
regex_compiler.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2010-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_compiler.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37  /**
38  * @addtogroup regex-detail
39  * @{
40  */
41 
42  template<typename _TraitsT>
44 
45  /// Builds an NFA from an input iterator interval.
46  template<typename _FwdIter, typename _TraitsT>
47  class _Compiler
48  {
49  public:
50  typedef typename _TraitsT::string_type _StringT;
51  typedef _NFA<_TraitsT> _RegexT;
53 
54  _Compiler(_FwdIter __b, _FwdIter __e,
55  const _TraitsT& __traits, _FlagT __flags);
56 
58  _M_get_nfa()
59  { return make_shared<_RegexT>(std::move(_M_nfa)); }
60 
61  private:
62  typedef _Scanner<_FwdIter> _ScannerT;
63  typedef typename _ScannerT::_TokenT _TokenT;
68 
69  // accepts a specific token or returns false.
70  bool
71  _M_match_token(_TokenT __token);
72 
73  void
74  _M_disjunction();
75 
76  void
77  _M_alternative();
78 
79  bool
80  _M_term();
81 
82  bool
83  _M_assertion();
84 
85  void
86  _M_quantifier();
87 
88  bool
89  _M_atom();
90 
91  bool
92  _M_bracket_expression();
93 
94  void
95  _M_expression_term(_BMatcherT& __matcher);
96 
97  bool
98  _M_range_expression(_BMatcherT& __matcher);
99 
100  bool
101  _M_collating_symbol(_BMatcherT& __matcher);
102 
103  bool
104  _M_equivalence_class(_BMatcherT& __matcher);
105 
106  bool
107  _M_character_class(_BMatcherT& __matcher);
108 
109  int
110  _M_cur_int_value(int __radix);
111 
112  bool
113  _M_try_char();
114 
115  _StateSeqT
116  _M_pop()
117  {
118  auto ret = _M_stack.top();
119  _M_stack.pop();
120  return ret;
121  }
122 
123  _FlagT _M_flags;
124  const _TraitsT& _M_traits;
125  const _CtypeT& _M_ctype;
126  _ScannerT _M_scanner;
127  _RegexT _M_nfa;
128  _StringT _M_value;
129  _StackT _M_stack;
130  };
131 
132  template<typename _Tp>
133  struct __has_contiguous_iter : std::false_type { };
134 
135  template<typename _Ch, typename _Tr, typename _Alloc>
136  struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
137  : std::true_type // string<Ch> storage is contiguous
138  { };
139 
140  template<typename _Tp, typename _Alloc>
141  struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
142  : std::true_type // vector<Tp> storage is contiguous
143  { };
144 
145  template<typename _Alloc>
146  struct __has_contiguous_iter<std::vector<bool, _Alloc>>
147  : std::false_type // vector<bool> storage is not contiguous
148  { };
149 
150  template<typename _Tp>
151  struct __is_contiguous_normal_iter : std::false_type { };
152 
153  template<typename _Tp, typename _Cont>
154  struct
155  __is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
156  : __has_contiguous_iter<_Cont>::type
157  { };
158 
159  template<typename _Iter, typename _TraitsT>
160  using __enable_if_contiguous_normal_iter
161  = typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
163 
164  template<typename _Iter, typename _TraitsT>
165  using __disable_if_contiguous_normal_iter
166  = typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
168 
169  template<typename _FwdIter, typename _TraitsT>
170  inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
171  __compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
173  {
174  using _Cmplr = _Compiler<_FwdIter, _TraitsT>;
175  return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa();
176  }
177 
178  template<typename _Iter, typename _TraitsT>
179  inline __enable_if_contiguous_normal_iter<_Iter, _TraitsT>
180  __compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
182  {
183  size_t __len = __last - __first;
184  const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr;
185  return __compile_nfa(__cfirst, __cfirst + __len, __traits, __flags);
186  }
187 
188  template<typename _TraitsT>
189  struct _AnyMatcher
190  {
191  typedef typename _TraitsT::char_type _CharT;
192 
193  explicit
194  _AnyMatcher(const _TraitsT& __traits)
195  : _M_traits(__traits)
196  { }
197 
198  bool
199  operator()(_CharT __ch) const
200  {
201  return _M_traits.translate(__ch) != '\n'
202  && _M_traits.translate(__ch) != '\r'
203  && _M_traits.translate(__ch) != u'\u2028'
204  && _M_traits.translate(__ch) != u'\u2029';
205  }
206 
207  const _TraitsT& _M_traits;
208  };
209 
210  template<typename _TraitsT>
211  struct _CharMatcher
212  {
213  typedef typename _TraitsT::char_type _CharT;
215 
216  explicit
217  _CharMatcher(_CharT __ch, const _TraitsT& __traits, _FlagT __flags)
218  : _M_traits(__traits), _M_flags(__flags), _M_ch(_M_translate(__ch))
219  { }
220 
221  bool
222  operator()(_CharT __ch) const
223  { return _M_ch == _M_translate(__ch); }
224 
225  _CharT
226  _M_translate(_CharT __ch) const
227  {
228  if (_M_flags & regex_constants::icase)
229  return _M_traits.translate_nocase(__ch);
230  else
231  return _M_traits.translate(__ch);
232  }
233 
234  const _TraitsT& _M_traits;
235  _FlagT _M_flags;
236  _CharT _M_ch;
237  };
238 
239  /// Matches a character range (bracket expression)
240  // TODO: Convert used _M_flags fields to template parameters, including
241  // collate and icase. Avoid using std::set, could use flat_set
242  // (sorted vector and binary search) instead; use an fixed sized (256)
243  // vector<bool> for char specialization if necessary.
244  template<typename _TraitsT>
245  struct _BracketMatcher
246  {
247  typedef typename _TraitsT::char_type _CharT;
248  typedef typename _TraitsT::char_class_type _CharClassT;
249  typedef typename _TraitsT::string_type _StringT;
251 
252  explicit
253  _BracketMatcher(bool __is_non_matching,
254  const _TraitsT& __traits,
255  _FlagT __flags)
256  : _M_traits(__traits), _M_class_set(0), _M_flags(__flags),
257  _M_is_non_matching(__is_non_matching)
258  { }
259 
260  bool
261  operator()(_CharT) const;
262 
263  void
264  _M_add_char(_CharT __c)
265  { _M_char_set.insert(_M_translate(__c)); }
266 
267  void
268  _M_add_collating_element(const _StringT& __s)
269  {
270  auto __st = _M_traits.lookup_collatename(__s.data(),
271  __s.data() + __s.size());
272  if (__st.empty())
273  __throw_regex_error(regex_constants::error_collate);
274  _M_char_set.insert(_M_translate(__st[0]));
275  }
276 
277  void
278  _M_add_equivalence_class(const _StringT& __s)
279  {
280  auto __st = _M_traits.lookup_collatename(__s.data(),
281  __s.data() + __s.size());
282  if (__st.empty())
283  __throw_regex_error(regex_constants::error_collate);
284  __st = _M_traits.transform_primary(__st.data(),
285  __st.data() + __st.size());
286  _M_equiv_set.insert(__st);
287  }
288 
289  void
290  _M_add_character_class(const _StringT& __s)
291  {
292  auto __mask = _M_traits.lookup_classname(__s.data(),
293  __s.data() + __s.size(),
294  _M_is_icase());
295  if (__mask == 0)
296  __throw_regex_error(regex_constants::error_ctype);
297  _M_class_set |= __mask;
298  }
299 
300  void
301  _M_make_range(_CharT __l, _CharT __r)
302  {
303  if (_M_flags & regex_constants::collate)
304  _M_range_set.insert(
305  make_pair(_M_get_str(_M_translate(__l)),
306  _M_get_str(_M_translate(__r))));
307  else
308  _M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r)));
309  }
310 
311  _CharT
312  _M_translate(_CharT __c) const
313  {
314  if (_M_is_icase())
315  return _M_traits.translate_nocase(__c);
316  else
317  return _M_traits.translate(__c);
318  }
319 
320  bool
321  _M_is_icase() const
322  { return _M_flags & regex_constants::icase; }
323 
324  _StringT
325  _M_get_str(_CharT __c) const
326  {
327  _StringT __s(1, __c);
328  return _M_traits.transform(__s.begin(), __s.end());
329  }
330 
331  std::set<_CharT> _M_char_set;
332  std::set<_StringT> _M_equiv_set;
333  std::set<pair<_StringT, _StringT>> _M_range_set;
334  const _TraitsT& _M_traits;
335  _CharClassT _M_class_set;
336  _FlagT _M_flags;
337  bool _M_is_non_matching;
338  };
339 
340  //@} regex-detail
341 _GLIBCXX_END_NAMESPACE_VERSION
342 } // namespace __detail
343 } // namespace std
344 
345 #include <bits/regex_compiler.tcc>
Primary class template ctype facet.This template class defines classification and conversion function...
struct _Scanner. Scans an input range for regex tokens.
Definition: regex_scanner.h:53
constexpr error_type error_ctype(_S_error_ctype)
constexpr pair< typename __decay_and_strip< _T1 >::__type, typename __decay_and_strip< _T2 >::__type > make_pair(_T1 &&__x, _T2 &&__y)
A convenience wrapper for creating a pair from two objects.
Definition: stl_pair.h:276
reference top()
Definition: stl_stack.h:159
Builds an NFA from an input iterator interval.
void pop()
Removes first element.
Definition: stl_stack.h:212
integral_constant
Definition: type_traits:57
std::pair< iterator, bool > insert(const value_type &__x)
Attempts to insert an element into the set.
Definition: stl_set.h:498
syntax_option_type
This is a bitmask type indicating how to interpret the regex.
A smart pointer with reference-counted copy semantics.
Definition: shared_ptr.h:93
constexpr error_type error_collate(_S_error_collate)
Matches a character range (bracket expression)
_Tp * __addressof(_Tp &__r) noexcept
Same as C++11 std::addressof.
Definition: move.h:47
Managing sequences of characters and character-like objects.
Definition: basic_string.h:112
Describes a sequence of one or more _State, its current start and end(s). This structure contains fra...
A standard container giving FILO behavior.
Definition: stl_stack.h:96