libstdc++
codecvt_specializations.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
4 // 2008, 2009, 2010
5 // Free Software Foundation, Inc.
6 //
7 // This file is part of the GNU ISO C++ Library. This library is free
8 // software; you can redistribute it and/or modify it under the
9 // terms of the GNU General Public License as published by the
10 // Free Software Foundation; either version 3, or (at your option)
11 // any later version.
12 
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU General Public License for more details.
17 
18 // Under Section 7 of GPL version 3, you are granted additional
19 // permissions described in the GCC Runtime Library Exception, version
20 // 3.1, as published by the Free Software Foundation.
21 
22 // You should have received a copy of the GNU General Public License and
23 // a copy of the GCC Runtime Library Exception along with this program;
24 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 // <http://www.gnu.org/licenses/>.
26 
27 //
28 // ISO C++ 14882: 22.2.1.5 Template class codecvt
29 //
30 
31 // Written by Benjamin Kosnik <bkoz@redhat.com>
32 
33 /** @file ext/codecvt_specializations.h
34  * This file is a GNU extension to the Standard C++ Library.
35  */
36 
37 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
38 #define _EXT_CODECVT_SPECIALIZATIONS_H 1
39 
40 #include <bits/c++config.h>
41 #include <locale>
42 #include <iconv.h>
43 
44 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
45 {
46 _GLIBCXX_BEGIN_NAMESPACE_VERSION
47 
48  /// Extension to use iconv for dealing with character encodings.
49  // This includes conversions and comparisons between various character
50  // sets. This object encapsulates data that may need to be shared between
51  // char_traits, codecvt and ctype.
53  {
54  public:
55  // Types:
56  // NB: A conversion descriptor subsumes and enhances the
57  // functionality of a simple state type such as mbstate_t.
58  typedef iconv_t descriptor_type;
59 
60  protected:
61  // Name of internal character set encoding.
62  std::string _M_int_enc;
63 
64  // Name of external character set encoding.
65  std::string _M_ext_enc;
66 
67  // Conversion descriptor between external encoding to internal encoding.
68  descriptor_type _M_in_desc;
69 
70  // Conversion descriptor between internal encoding to external encoding.
71  descriptor_type _M_out_desc;
72 
73  // The byte-order marker for the external encoding, if necessary.
74  int _M_ext_bom;
75 
76  // The byte-order marker for the internal encoding, if necessary.
77  int _M_int_bom;
78 
79  // Number of external bytes needed to construct one complete
80  // character in the internal encoding.
81  // NB: -1 indicates variable, or stateful, encodings.
82  int _M_bytes;
83 
84  public:
85  explicit
87  : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
88  { }
89 
90  explicit
91  encoding_state(const char* __int, const char* __ext,
92  int __ibom = 0, int __ebom = 0, int __bytes = 1)
93  : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
94  _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
95  { init(); }
96 
97  // 21.1.2 traits typedefs
98  // p4
99  // typedef STATE_T state_type
100  // requires: state_type shall meet the requirements of
101  // CopyConstructible types (20.1.3)
102  // NB: This does not preserve the actual state of the conversion
103  // descriptor member, but it does duplicate the encoding
104  // information.
105  encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
106  { construct(__obj); }
107 
108  // Need assignment operator as well.
110  operator=(const encoding_state& __obj)
111  {
112  construct(__obj);
113  return *this;
114  }
115 
116  ~encoding_state()
117  { destroy(); }
118 
119  bool
120  good() const throw()
121  {
122  const descriptor_type __err = (iconv_t)(-1);
123  bool __test = _M_in_desc && _M_in_desc != __err;
124  __test &= _M_out_desc && _M_out_desc != __err;
125  return __test;
126  }
127 
128  int
129  character_ratio() const
130  { return _M_bytes; }
131 
132  const std::string
133  internal_encoding() const
134  { return _M_int_enc; }
135 
136  int
137  internal_bom() const
138  { return _M_int_bom; }
139 
140  const std::string
141  external_encoding() const
142  { return _M_ext_enc; }
143 
144  int
145  external_bom() const
146  { return _M_ext_bom; }
147 
148  const descriptor_type&
149  in_descriptor() const
150  { return _M_in_desc; }
151 
152  const descriptor_type&
153  out_descriptor() const
154  { return _M_out_desc; }
155 
156  protected:
157  void
158  init()
159  {
160  const descriptor_type __err = (iconv_t)(-1);
161  const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
162  if (!_M_in_desc && __have_encodings)
163  {
164  _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
165  if (_M_in_desc == __err)
166  std::__throw_runtime_error(__N("encoding_state::_M_init "
167  "creating iconv input descriptor failed"));
168  }
169  if (!_M_out_desc && __have_encodings)
170  {
171  _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
172  if (_M_out_desc == __err)
173  std::__throw_runtime_error(__N("encoding_state::_M_init "
174  "creating iconv output descriptor failed"));
175  }
176  }
177 
178  void
179  construct(const encoding_state& __obj)
180  {
181  destroy();
182  _M_int_enc = __obj._M_int_enc;
183  _M_ext_enc = __obj._M_ext_enc;
184  _M_ext_bom = __obj._M_ext_bom;
185  _M_int_bom = __obj._M_int_bom;
186  _M_bytes = __obj._M_bytes;
187  init();
188  }
189 
190  void
191  destroy() throw()
192  {
193  const descriptor_type __err = (iconv_t)(-1);
194  if (_M_in_desc && _M_in_desc != __err)
195  {
196  iconv_close(_M_in_desc);
197  _M_in_desc = 0;
198  }
199  if (_M_out_desc && _M_out_desc != __err)
200  {
201  iconv_close(_M_out_desc);
202  _M_out_desc = 0;
203  }
204  }
205  };
206 
207  /// encoding_char_traits
208  // Custom traits type with encoding_state for the state type, and the
209  // associated fpos<encoding_state> for the position type, all other
210  // bits equivalent to the required char_traits instantiations.
211  template<typename _CharT>
212  struct encoding_char_traits : public std::char_traits<_CharT>
213  {
214  typedef encoding_state state_type;
215  typedef typename std::fpos<state_type> pos_type;
216  };
217 
218 _GLIBCXX_END_NAMESPACE_VERSION
219 } // namespace
220 
221 
222 namespace std _GLIBCXX_VISIBILITY(default)
223 {
224 _GLIBCXX_BEGIN_NAMESPACE_VERSION
225 
227 
228  /// codecvt<InternT, _ExternT, encoding_state> specialization.
229  // This partial specialization takes advantage of iconv to provide
230  // code conversions between a large number of character encodings.
231  template<typename _InternT, typename _ExternT>
232  class codecvt<_InternT, _ExternT, encoding_state>
233  : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
234  {
235  public:
236  // Types:
237  typedef codecvt_base::result result;
238  typedef _InternT intern_type;
239  typedef _ExternT extern_type;
241  typedef state_type::descriptor_type descriptor_type;
242 
243  // Data Members:
244  static locale::id id;
245 
246  explicit
247  codecvt(size_t __refs = 0)
249  { }
250 
251  explicit
252  codecvt(state_type& __enc, size_t __refs = 0)
254  { }
255 
256  protected:
257  virtual
258  ~codecvt() { }
259 
260  virtual result
261  do_out(state_type& __state, const intern_type* __from,
262  const intern_type* __from_end, const intern_type*& __from_next,
263  extern_type* __to, extern_type* __to_end,
264  extern_type*& __to_next) const;
265 
266  virtual result
267  do_unshift(state_type& __state, extern_type* __to,
268  extern_type* __to_end, extern_type*& __to_next) const;
269 
270  virtual result
271  do_in(state_type& __state, const extern_type* __from,
272  const extern_type* __from_end, const extern_type*& __from_next,
273  intern_type* __to, intern_type* __to_end,
274  intern_type*& __to_next) const;
275 
276  virtual int
277  do_encoding() const throw();
278 
279  virtual bool
280  do_always_noconv() const throw();
281 
282  virtual int
283  do_length(state_type&, const extern_type* __from,
284  const extern_type* __end, size_t __max) const;
285 
286  virtual int
287  do_max_length() const throw();
288  };
289 
290  template<typename _InternT, typename _ExternT>
291  locale::id
293 
294  // This adaptor works around the signature problems of the second
295  // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2
296  // uses 'char**', which matches the POSIX 1003.1-2001 standard.
297  // Using this adaptor, g++ will do the work for us.
298  template<typename _Tp>
299  inline size_t
300  __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
301  iconv_t __cd, char** __inbuf, size_t* __inbytes,
302  char** __outbuf, size_t* __outbytes)
303  { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
304 
305  template<typename _InternT, typename _ExternT>
306  codecvt_base::result
308  do_out(state_type& __state, const intern_type* __from,
309  const intern_type* __from_end, const intern_type*& __from_next,
310  extern_type* __to, extern_type* __to_end,
311  extern_type*& __to_next) const
312  {
313  result __ret = codecvt_base::error;
314  if (__state.good())
315  {
316  const descriptor_type& __desc = __state.out_descriptor();
317  const size_t __fmultiple = sizeof(intern_type);
318  size_t __fbytes = __fmultiple * (__from_end - __from);
319  const size_t __tmultiple = sizeof(extern_type);
320  size_t __tbytes = __tmultiple * (__to_end - __to);
321 
322  // Argument list for iconv specifies a byte sequence. Thus,
323  // all to/from arrays must be brutally casted to char*.
324  char* __cto = reinterpret_cast<char*>(__to);
325  char* __cfrom;
326  size_t __conv;
327 
328  // Some encodings need a byte order marker as the first item
329  // in the byte stream, to designate endian-ness. The default
330  // value for the byte order marker is NULL, so if this is
331  // the case, it's not necessary and we can just go on our
332  // merry way.
333  int __int_bom = __state.internal_bom();
334  if (__int_bom)
335  {
336  size_t __size = __from_end - __from;
337  intern_type* __cfixed = static_cast<intern_type*>
338  (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
339  __cfixed[0] = static_cast<intern_type>(__int_bom);
340  char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
341  __cfrom = reinterpret_cast<char*>(__cfixed);
342  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
343  &__fbytes, &__cto, &__tbytes);
344  }
345  else
346  {
347  intern_type* __cfixed = const_cast<intern_type*>(__from);
348  __cfrom = reinterpret_cast<char*>(__cfixed);
349  __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
350  &__cto, &__tbytes);
351  }
352 
353  if (__conv != size_t(-1))
354  {
355  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
356  __to_next = reinterpret_cast<extern_type*>(__cto);
357  __ret = codecvt_base::ok;
358  }
359  else
360  {
361  if (__fbytes < __fmultiple * (__from_end - __from))
362  {
363  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
364  __to_next = reinterpret_cast<extern_type*>(__cto);
365  __ret = codecvt_base::partial;
366  }
367  else
368  __ret = codecvt_base::error;
369  }
370  }
371  return __ret;
372  }
373 
374  template<typename _InternT, typename _ExternT>
375  codecvt_base::result
377  do_unshift(state_type& __state, extern_type* __to,
378  extern_type* __to_end, extern_type*& __to_next) const
379  {
380  result __ret = codecvt_base::error;
381  if (__state.good())
382  {
383  const descriptor_type& __desc = __state.in_descriptor();
384  const size_t __tmultiple = sizeof(intern_type);
385  size_t __tlen = __tmultiple * (__to_end - __to);
386 
387  // Argument list for iconv specifies a byte sequence. Thus,
388  // all to/from arrays must be brutally casted to char*.
389  char* __cto = reinterpret_cast<char*>(__to);
390  size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
391  &__cto, &__tlen);
392 
393  if (__conv != size_t(-1))
394  {
395  __to_next = reinterpret_cast<extern_type*>(__cto);
396  if (__tlen == __tmultiple * (__to_end - __to))
397  __ret = codecvt_base::noconv;
398  else if (__tlen == 0)
399  __ret = codecvt_base::ok;
400  else
401  __ret = codecvt_base::partial;
402  }
403  else
404  __ret = codecvt_base::error;
405  }
406  return __ret;
407  }
408 
409  template<typename _InternT, typename _ExternT>
410  codecvt_base::result
411  codecvt<_InternT, _ExternT, encoding_state>::
412  do_in(state_type& __state, const extern_type* __from,
413  const extern_type* __from_end, const extern_type*& __from_next,
414  intern_type* __to, intern_type* __to_end,
415  intern_type*& __to_next) const
416  {
417  result __ret = codecvt_base::error;
418  if (__state.good())
419  {
420  const descriptor_type& __desc = __state.in_descriptor();
421  const size_t __fmultiple = sizeof(extern_type);
422  size_t __flen = __fmultiple * (__from_end - __from);
423  const size_t __tmultiple = sizeof(intern_type);
424  size_t __tlen = __tmultiple * (__to_end - __to);
425 
426  // Argument list for iconv specifies a byte sequence. Thus,
427  // all to/from arrays must be brutally casted to char*.
428  char* __cto = reinterpret_cast<char*>(__to);
429  char* __cfrom;
430  size_t __conv;
431 
432  // Some encodings need a byte order marker as the first item
433  // in the byte stream, to designate endian-ness. The default
434  // value for the byte order marker is NULL, so if this is
435  // the case, it's not necessary and we can just go on our
436  // merry way.
437  int __ext_bom = __state.external_bom();
438  if (__ext_bom)
439  {
440  size_t __size = __from_end - __from;
441  extern_type* __cfixed = static_cast<extern_type*>
442  (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
443  __cfixed[0] = static_cast<extern_type>(__ext_bom);
444  char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
445  __cfrom = reinterpret_cast<char*>(__cfixed);
446  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
447  &__flen, &__cto, &__tlen);
448  }
449  else
450  {
451  extern_type* __cfixed = const_cast<extern_type*>(__from);
452  __cfrom = reinterpret_cast<char*>(__cfixed);
453  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
454  &__flen, &__cto, &__tlen);
455  }
456 
457 
458  if (__conv != size_t(-1))
459  {
460  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
461  __to_next = reinterpret_cast<intern_type*>(__cto);
462  __ret = codecvt_base::ok;
463  }
464  else
465  {
466  if (__flen < static_cast<size_t>(__from_end - __from))
467  {
468  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
469  __to_next = reinterpret_cast<intern_type*>(__cto);
470  __ret = codecvt_base::partial;
471  }
472  else
473  __ret = codecvt_base::error;
474  }
475  }
476  return __ret;
477  }
478 
479  template<typename _InternT, typename _ExternT>
480  int
481  codecvt<_InternT, _ExternT, encoding_state>::
482  do_encoding() const throw()
483  {
484  int __ret = 0;
485  if (sizeof(_ExternT) <= sizeof(_InternT))
486  __ret = sizeof(_InternT) / sizeof(_ExternT);
487  return __ret;
488  }
489 
490  template<typename _InternT, typename _ExternT>
491  bool
492  codecvt<_InternT, _ExternT, encoding_state>::
493  do_always_noconv() const throw()
494  { return false; }
495 
496  template<typename _InternT, typename _ExternT>
497  int
498  codecvt<_InternT, _ExternT, encoding_state>::
499  do_length(state_type&, const extern_type* __from,
500  const extern_type* __end, size_t __max) const
501  { return std::min(__max, static_cast<size_t>(__end - __from)); }
502 
503  // _GLIBCXX_RESOLVE_LIB_DEFECTS
504  // 74. Garbled text for codecvt::do_max_length
505  template<typename _InternT, typename _ExternT>
506  int
507  codecvt<_InternT, _ExternT, encoding_state>::
508  do_max_length() const throw()
509  { return 1; }
510 
511 _GLIBCXX_END_NAMESPACE_VERSION
512 } // namespace
513 
514 #endif
Primary class template codecvt.NB: Generic, mostly useless implementation.
Definition: codecvt.h:277
Common base for codecvt functions.
Definition: codecvt.h:69
virtual result do_out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:187
const _CharT * c_str() const _GLIBCXX_NOEXCEPT
Return const pointer to null-terminated contents.
Class representing stream positions.
Definition: postypes.h:114
Extension to use iconv for dealing with character encodings.
Facet ID class.The ID class provides facets with an index used to identify them. Every facet class mu...
Basis for explicit traits specializations.
Definition: char_traits.h:229
size_type size() const _GLIBCXX_NOEXCEPT
Returns the number of characters in the string, not including any null-termination.
Definition: basic_string.h:712