libstdc++
codecvt.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000-2014 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/codecvt.h
26  * This is an internal header file, included by other library headers.
27  * Do not attempt to use it directly. @headername{locale}
28  */
29 
30 //
31 // ISO C++ 14882: 22.2.1.5 Template class codecvt
32 //
33 
34 // Written by Benjamin Kosnik <bkoz@redhat.com>
35 
36 #ifndef _CODECVT_H
37 #define _CODECVT_H 1
38 
39 #pragma GCC system_header
40 
41 namespace std _GLIBCXX_VISIBILITY(default)
42 {
43 _GLIBCXX_BEGIN_NAMESPACE_VERSION
44 
45  /// Empty base class for codecvt facet [22.2.1.5].
47  {
48  public:
49  enum result
50  {
51  ok,
52  partial,
53  error,
54  noconv
55  };
56  };
57 
58  /**
59  * @brief Common base for codecvt functions.
60  *
61  * This template class provides implementations of the public functions
62  * that forward to the protected virtual functions.
63  *
64  * This template also provides abstract stubs for the protected virtual
65  * functions.
66  */
67  template<typename _InternT, typename _ExternT, typename _StateT>
69  : public locale::facet, public codecvt_base
70  {
71  public:
72  // Types:
73  typedef codecvt_base::result result;
74  typedef _InternT intern_type;
75  typedef _ExternT extern_type;
76  typedef _StateT state_type;
77 
78  // 22.2.1.5.1 codecvt members
79  /**
80  * @brief Convert from internal to external character set.
81  *
82  * Converts input string of intern_type to output string of
83  * extern_type. This is analogous to wcsrtombs. It does this by
84  * calling codecvt::do_out.
85  *
86  * The source and destination character sets are determined by the
87  * facet's locale, internal and external types.
88  *
89  * The characters in [from,from_end) are converted and written to
90  * [to,to_end). from_next and to_next are set to point to the
91  * character following the last successfully converted character,
92  * respectively. If the result needed no conversion, from_next and
93  * to_next are not affected.
94  *
95  * The @a state argument should be initialized if the input is at the
96  * beginning and carried from a previous call if continuing
97  * conversion. There are no guarantees about how @a state is used.
98  *
99  * The result returned is a member of codecvt_base::result. If
100  * all the input is converted, returns codecvt_base::ok. If no
101  * conversion is necessary, returns codecvt_base::noconv. If
102  * the input ends early or there is insufficient space in the
103  * output, returns codecvt_base::partial. Otherwise the
104  * conversion failed and codecvt_base::error is returned.
105  *
106  * @param __state Persistent conversion state data.
107  * @param __from Start of input.
108  * @param __from_end End of input.
109  * @param __from_next Returns start of unconverted data.
110  * @param __to Start of output buffer.
111  * @param __to_end End of output buffer.
112  * @param __to_next Returns start of unused output area.
113  * @return codecvt_base::result.
114  */
115  result
116  out(state_type& __state, const intern_type* __from,
117  const intern_type* __from_end, const intern_type*& __from_next,
118  extern_type* __to, extern_type* __to_end,
119  extern_type*& __to_next) const
120  {
121  return this->do_out(__state, __from, __from_end, __from_next,
122  __to, __to_end, __to_next);
123  }
124 
125  /**
126  * @brief Reset conversion state.
127  *
128  * Writes characters to output that would restore @a state to initial
129  * conditions. The idea is that if a partial conversion occurs, then
130  * the converting the characters written by this function would leave
131  * the state in initial conditions, rather than partial conversion
132  * state. It does this by calling codecvt::do_unshift().
133  *
134  * For example, if 4 external characters always converted to 1 internal
135  * character, and input to in() had 6 external characters with state
136  * saved, this function would write two characters to the output and
137  * set the state to initialized conditions.
138  *
139  * The source and destination character sets are determined by the
140  * facet's locale, internal and external types.
141  *
142  * The result returned is a member of codecvt_base::result. If the
143  * state could be reset and data written, returns codecvt_base::ok. If
144  * no conversion is necessary, returns codecvt_base::noconv. If the
145  * output has insufficient space, returns codecvt_base::partial.
146  * Otherwise the reset failed and codecvt_base::error is returned.
147  *
148  * @param __state Persistent conversion state data.
149  * @param __to Start of output buffer.
150  * @param __to_end End of output buffer.
151  * @param __to_next Returns start of unused output area.
152  * @return codecvt_base::result.
153  */
154  result
155  unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
156  extern_type*& __to_next) const
157  { return this->do_unshift(__state, __to,__to_end,__to_next); }
158 
159  /**
160  * @brief Convert from external to internal character set.
161  *
162  * Converts input string of extern_type to output string of
163  * intern_type. This is analogous to mbsrtowcs. It does this by
164  * calling codecvt::do_in.
165  *
166  * The source and destination character sets are determined by the
167  * facet's locale, internal and external types.
168  *
169  * The characters in [from,from_end) are converted and written to
170  * [to,to_end). from_next and to_next are set to point to the
171  * character following the last successfully converted character,
172  * respectively. If the result needed no conversion, from_next and
173  * to_next are not affected.
174  *
175  * The @a state argument should be initialized if the input is at the
176  * beginning and carried from a previous call if continuing
177  * conversion. There are no guarantees about how @a state is used.
178  *
179  * The result returned is a member of codecvt_base::result. If
180  * all the input is converted, returns codecvt_base::ok. If no
181  * conversion is necessary, returns codecvt_base::noconv. If
182  * the input ends early or there is insufficient space in the
183  * output, returns codecvt_base::partial. Otherwise the
184  * conversion failed and codecvt_base::error is returned.
185  *
186  * @param __state Persistent conversion state data.
187  * @param __from Start of input.
188  * @param __from_end End of input.
189  * @param __from_next Returns start of unconverted data.
190  * @param __to Start of output buffer.
191  * @param __to_end End of output buffer.
192  * @param __to_next Returns start of unused output area.
193  * @return codecvt_base::result.
194  */
195  result
196  in(state_type& __state, const extern_type* __from,
197  const extern_type* __from_end, const extern_type*& __from_next,
198  intern_type* __to, intern_type* __to_end,
199  intern_type*& __to_next) const
200  {
201  return this->do_in(__state, __from, __from_end, __from_next,
202  __to, __to_end, __to_next);
203  }
204 
205  int
206  encoding() const throw()
207  { return this->do_encoding(); }
208 
209  bool
210  always_noconv() const throw()
211  { return this->do_always_noconv(); }
212 
213  int
214  length(state_type& __state, const extern_type* __from,
215  const extern_type* __end, size_t __max) const
216  { return this->do_length(__state, __from, __end, __max); }
217 
218  int
219  max_length() const throw()
220  { return this->do_max_length(); }
221 
222  protected:
223  explicit
224  __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
225 
226  virtual
227  ~__codecvt_abstract_base() { }
228 
229  /**
230  * @brief Convert from internal to external character set.
231  *
232  * Converts input string of intern_type to output string of
233  * extern_type. This function is a hook for derived classes to change
234  * the value returned. @see out for more information.
235  */
236  virtual result
237  do_out(state_type& __state, const intern_type* __from,
238  const intern_type* __from_end, const intern_type*& __from_next,
239  extern_type* __to, extern_type* __to_end,
240  extern_type*& __to_next) const = 0;
241 
242  virtual result
243  do_unshift(state_type& __state, extern_type* __to,
244  extern_type* __to_end, extern_type*& __to_next) const = 0;
245 
246  virtual result
247  do_in(state_type& __state, const extern_type* __from,
248  const extern_type* __from_end, const extern_type*& __from_next,
249  intern_type* __to, intern_type* __to_end,
250  intern_type*& __to_next) const = 0;
251 
252  virtual int
253  do_encoding() const throw() = 0;
254 
255  virtual bool
256  do_always_noconv() const throw() = 0;
257 
258  virtual int
259  do_length(state_type&, const extern_type* __from,
260  const extern_type* __end, size_t __max) const = 0;
261 
262  virtual int
263  do_max_length() const throw() = 0;
264  };
265 
266 
267 
268  /**
269  * @brief Primary class template codecvt.
270  * @ingroup locales
271  *
272  * NB: Generic, mostly useless implementation.
273  *
274  */
275  template<typename _InternT, typename _ExternT, typename _StateT>
276  class codecvt
277  : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
278  {
279  public:
280  // Types:
281  typedef codecvt_base::result result;
282  typedef _InternT intern_type;
283  typedef _ExternT extern_type;
284  typedef _StateT state_type;
285 
286  protected:
287  __c_locale _M_c_locale_codecvt;
288 
289  public:
290  static locale::id id;
291 
292  explicit
293  codecvt(size_t __refs = 0)
295  _M_c_locale_codecvt(0)
296  { }
297 
298  explicit
299  codecvt(__c_locale __cloc, size_t __refs = 0);
300 
301  protected:
302  virtual
303  ~codecvt() { }
304 
305  virtual result
306  do_out(state_type& __state, const intern_type* __from,
307  const intern_type* __from_end, const intern_type*& __from_next,
308  extern_type* __to, extern_type* __to_end,
309  extern_type*& __to_next) const;
310 
311  virtual result
312  do_unshift(state_type& __state, extern_type* __to,
313  extern_type* __to_end, extern_type*& __to_next) const;
314 
315  virtual result
316  do_in(state_type& __state, const extern_type* __from,
317  const extern_type* __from_end, const extern_type*& __from_next,
318  intern_type* __to, intern_type* __to_end,
319  intern_type*& __to_next) const;
320 
321  virtual int
322  do_encoding() const throw();
323 
324  virtual bool
325  do_always_noconv() const throw();
326 
327  virtual int
328  do_length(state_type&, const extern_type* __from,
329  const extern_type* __end, size_t __max) const;
330 
331  virtual int
332  do_max_length() const throw();
333  };
334 
335  template<typename _InternT, typename _ExternT, typename _StateT>
337 
338  /// class codecvt<char, char, mbstate_t> specialization.
339  template<>
340  class codecvt<char, char, mbstate_t>
341  : public __codecvt_abstract_base<char, char, mbstate_t>
342  {
343  public:
344  // Types:
345  typedef char intern_type;
346  typedef char extern_type;
347  typedef mbstate_t state_type;
348 
349  protected:
350  __c_locale _M_c_locale_codecvt;
351 
352  public:
353  static locale::id id;
354 
355  explicit
356  codecvt(size_t __refs = 0);
357 
358  explicit
359  codecvt(__c_locale __cloc, size_t __refs = 0);
360 
361  protected:
362  virtual
363  ~codecvt();
364 
365  virtual result
366  do_out(state_type& __state, const intern_type* __from,
367  const intern_type* __from_end, const intern_type*& __from_next,
368  extern_type* __to, extern_type* __to_end,
369  extern_type*& __to_next) const;
370 
371  virtual result
372  do_unshift(state_type& __state, extern_type* __to,
373  extern_type* __to_end, extern_type*& __to_next) const;
374 
375  virtual result
376  do_in(state_type& __state, const extern_type* __from,
377  const extern_type* __from_end, const extern_type*& __from_next,
378  intern_type* __to, intern_type* __to_end,
379  intern_type*& __to_next) const;
380 
381  virtual int
382  do_encoding() const throw();
383 
384  virtual bool
385  do_always_noconv() const throw();
386 
387  virtual int
388  do_length(state_type&, const extern_type* __from,
389  const extern_type* __end, size_t __max) const;
390 
391  virtual int
392  do_max_length() const throw();
393  };
394 
395 #ifdef _GLIBCXX_USE_WCHAR_T
396  /// class codecvt<wchar_t, char, mbstate_t> specialization.
397  template<>
398  class codecvt<wchar_t, char, mbstate_t>
399  : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
400  {
401  public:
402  // Types:
403  typedef wchar_t intern_type;
404  typedef char extern_type;
405  typedef mbstate_t state_type;
406 
407  protected:
408  __c_locale _M_c_locale_codecvt;
409 
410  public:
411  static locale::id id;
412 
413  explicit
414  codecvt(size_t __refs = 0);
415 
416  explicit
417  codecvt(__c_locale __cloc, size_t __refs = 0);
418 
419  protected:
420  virtual
421  ~codecvt();
422 
423  virtual result
424  do_out(state_type& __state, const intern_type* __from,
425  const intern_type* __from_end, const intern_type*& __from_next,
426  extern_type* __to, extern_type* __to_end,
427  extern_type*& __to_next) const;
428 
429  virtual result
430  do_unshift(state_type& __state,
431  extern_type* __to, extern_type* __to_end,
432  extern_type*& __to_next) const;
433 
434  virtual result
435  do_in(state_type& __state,
436  const extern_type* __from, const extern_type* __from_end,
437  const extern_type*& __from_next,
438  intern_type* __to, intern_type* __to_end,
439  intern_type*& __to_next) const;
440 
441  virtual
442  int do_encoding() const throw();
443 
444  virtual
445  bool do_always_noconv() const throw();
446 
447  virtual
448  int do_length(state_type&, const extern_type* __from,
449  const extern_type* __end, size_t __max) const;
450 
451  virtual int
452  do_max_length() const throw();
453  };
454 #endif //_GLIBCXX_USE_WCHAR_T
455 
456  /// class codecvt_byname [22.2.1.6].
457  template<typename _InternT, typename _ExternT, typename _StateT>
458  class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
459  {
460  public:
461  explicit
462  codecvt_byname(const char* __s, size_t __refs = 0)
464  {
465  if (__builtin_strcmp(__s, "C") != 0
466  && __builtin_strcmp(__s, "POSIX") != 0)
467  {
468  this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
469  this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
470  }
471  }
472 
473  protected:
474  virtual
475  ~codecvt_byname() { }
476  };
477 
478  // Inhibit implicit instantiations for required instantiations,
479  // which are defined via explicit instantiations elsewhere.
480 #if _GLIBCXX_EXTERN_TEMPLATE
481  extern template class codecvt_byname<char, char, mbstate_t>;
482 
483  extern template
485  use_facet<codecvt<char, char, mbstate_t> >(const locale&);
486 
487  extern template
488  bool
489  has_facet<codecvt<char, char, mbstate_t> >(const locale&);
490 
491 #ifdef _GLIBCXX_USE_WCHAR_T
492  extern template class codecvt_byname<wchar_t, char, mbstate_t>;
493 
494  extern template
496  use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
497 
498  extern template
499  bool
500  has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
501 #endif
502 #endif
503 
504 _GLIBCXX_END_NAMESPACE_VERSION
505 } // namespace std
506 
507 #endif // _CODECVT_H
result in(state_type &__state, const extern_type *__from, const extern_type *__from_end, const extern_type *&__from_next, intern_type *__to, intern_type *__to_end, intern_type *&__to_next) const
Convert from external to internal character set.
Definition: codecvt.h:196
Common base for codecvt functions.
Definition: codecvt.h:68
class codecvt&lt;char, char, mbstate_t&gt; specialization.
Definition: codecvt.h:340
class codecvt_byname [22.2.1.6].
Definition: codecvt.h:458
Localization functionality base class.The facet class is the base class for a localization feature...
Empty base class for codecvt facet [22.2.1.5].
Definition: codecvt.h:46
result out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
Definition: codecvt.h:116
Container class for localization functionality.The locale class is first a class wrapper for C librar...
facet(size_t __refs=0)
Facet constructor.
virtual result do_out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const =0
Convert from internal to external character set.
Primary class template codecvt.NB: Generic, mostly useless implementation.
Definition: codecvt.h:276
Extension to use iconv for dealing with character encodings.
class codecvt&lt;wchar_t, char, mbstate_t&gt; specialization.
Definition: codecvt.h:398
result unshift(state_type &__state, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Reset conversion state.
Definition: codecvt.h:155
Facet ID class.The ID class provides facets with an index used to identify them. Every facet class mu...