// -*- c++ -*-
#ifndef _GLIBMM_USTRING_H
#define _GLIBMM_USTRING_H

/* $Id: ustring.h,v 1.23 2002/04/09 13:26:04 daniel Exp $ */

/* ustring.h
 *
 * Copyright (C) 2002 The gtkmm Development Team
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the Free
 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <glib/gunicode.h>

#include <iosfwd>
#include <iterator>
#include <string>

#include <glibmmconfig.h>
GTKMM_USING_STD(bidirectional_iterator_tag)
GTKMM_USING_STD(reverse_iterator)
GTKMM_USING_STD(string)
GTKMM_USING_STD(istream)
GTKMM_USING_STD(ostream)


namespace Glib
{

class ustring;

/** The iterator type of Glib::ustring.  Note this is not a random
 * access iterator but a bidirectional one, since all index operations
 * need to iterate over the UTF-8 data.  Use std::advance() to move to
 * a certain position.  However, all of the relational operators are
 * available:  ==  !=  <  >  <=  >=
 *
 * A writeable iterator isn't provided because:  The number of bytes of
 * the old UTF-8 character and the new one to write could be different.
 * Therefore, any write operation would invalidate all other iterators
 * pointing into the same string.
 */
template <class T>
class ustring_Iterator
{
public:
  typedef std::bidirectional_iterator_tag   iterator_category;
  typedef gunichar                          value_type;
  typedef std::string::difference_type      difference_type;
  typedef void                              pointer;
  typedef void                              reference;

  inline ustring_Iterator();
  inline ustring_Iterator(const ustring_Iterator<std::string::iterator>& other);
  inline ustring_Iterator<T>& operator=(const ustring_Iterator<std::string::iterator>& other);

  inline value_type operator*() const;

  inline ustring_Iterator<T> &     operator++();
  inline const ustring_Iterator<T> operator++(int);
  inline ustring_Iterator<T> &     operator--();
  inline const ustring_Iterator<T> operator--(int);

#ifndef DOXYGEN_SHOULD_SKIP_THIS
  explicit inline ustring_Iterator(T pos);
  inline T base() const;
#endif /* DOXYGEN_SHOULD_SKIP_THIS */

private:
  T pos_;
};


/** Extract a UCS-4 character from UTF-8 data.
 * Convert a single UTF-8 (multibyte) character starting at @p pos to
 * a UCS-4 wide character.  This may read up to 6 bytes after the start
 * position, depending on the UTF-8 character width.  You have to make
 * sure the source contains at least one valid UTF-8 character.
 *
 * This is mainly used by the implementation of Glib::ustring::iterator,
 * but it might be useful as utility function if you prefer using
 * std::string even for UTF-8 encoding.
 */
gunichar get_unichar_from_std_iterator(std::string::const_iterator pos);


/** Glib::ustring has much the same interface as std::string,
 * but contains UTF-8 strings.
 * Note that a normal 7-bit ASCII string is also a UTF-8 string, so,
 * if you want to, you can use this class without even thinking about UTF-8.
 * (Except that any character values must be in range 0 to 127,
 *  therefore you cannot use 8-bit encodings like ISO-8859-1.)
 *
 * You can find a detailed UTF-8 and Unicode FAQ here:
 * http://www.cl.cam.ac.uk/~mgk25/unicode.html
 *
 * Also, this class has conversions to and from std::string,
 * so you can use a std::string instead of a Glib::ustring --
 * However, this will not work with multi-byte translations,
 * just as normal C char* code wouldn't.
 *
 * In a perfect world the C++ Standard Library would contain
 * a UTF-8 string, but it doesn't.  Note that std::wstring is not
 * a UTF-8 string class because it contains only fixed-length characters.
 *
 * Glib::ustring does not inherit from std::string, because std::string
 * was intended to be a final class.  For instance, it does not have
 * a virtual destructor.
 */
class ustring
{
public:
  typedef std::string::size_type                        size_type;
  typedef std::string::difference_type                  difference_type;

  typedef gunichar                                      value_type;
  typedef gunichar &                                    reference;
  typedef const gunichar &                              const_reference;

  typedef ustring_Iterator<std::string::iterator>       iterator;
  typedef ustring_Iterator<std::string::const_iterator> const_iterator;
  typedef std::reverse_iterator<iterator>               reverse_iterator;
  typedef std::reverse_iterator<const_iterator>         const_reverse_iterator;

  static const size_type npos = std::string::npos;

  ustring();
  ustring(const ustring& src);
  ustring(const ustring& src, size_type i, size_type n=npos);
  ustring(const char* src, size_type n);
  ustring(const char* src);
  ustring(size_type n, gunichar uc);
  ustring(size_type n, char c);
  template <class In> inline
    ustring(In pbegin, In pend);

  ustring(const std::string& src);
  explicit ustring(const gunichar* src, size_type n=npos);

  ~ustring();

  void swap(ustring& other);

  ustring& operator=(const ustring& src);
  ustring& operator=(const std::string& src);
  ustring& operator=(const char* src);
  ustring& operator=(gunichar uc);
  ustring& operator=(char c);

  ustring& assign(const ustring& src);
  ustring& assign(const ustring& src, size_type i, size_type n);
  ustring& assign(const char* src, size_type n);
  ustring& assign(const char* src);
  ustring& assign(size_type n, gunichar uc);
  ustring& assign(size_type n, char c);
  template <class In> inline
    ustring& assign(In pbegin, In pend);

  ustring& operator+=(const ustring& src);
  ustring& operator+=(const char* src);
  ustring& operator+=(gunichar uc);
  ustring& operator+=(char c);
  void push_back(gunichar uc);
  void push_back(char c);

  ustring& append(const ustring& src);
  ustring& append(const ustring& src, size_type i, size_type n);
  ustring& append(const char* src, size_type n);
  ustring& append(const char* src);
  ustring& append(size_type n, gunichar uc);
  ustring& append(size_type n, char c);
  template <class In> inline
    ustring& append(In pbegin, In pend);

  ustring& insert(size_type i, const ustring& src);
  ustring& insert(size_type i, const ustring& src, size_type i2, size_type n);
  ustring& insert(size_type i, const char* src, size_type n);
  ustring& insert(size_type i, const char* src);
  ustring& insert(size_type i, size_type n, gunichar uc);
  ustring& insert(size_type i, size_type n, char c);

  iterator insert(iterator p, gunichar uc);
  iterator insert(iterator p, char c);
  void     insert(iterator p, size_type n, gunichar uc);
  void     insert(iterator p, size_type n, char c);
  template <class In> inline
    void insert(iterator p, In pbegin, In pend);

  ustring& replace(size_type i, size_type n, const ustring& src);
  ustring& replace(size_type i, size_type n, const ustring& src, size_type i2, size_type n2);
  ustring& replace(size_type i, size_type n, const char* src, size_type n2);
  ustring& replace(size_type i, size_type n, const char* src);
  ustring& replace(size_type i, size_type n, size_type n2, gunichar uc);
  ustring& replace(size_type i, size_type n, size_type n2, char c);

  ustring& replace(iterator pbegin, iterator pend, const ustring& src);
  ustring& replace(iterator pbegin, iterator pend, const char* src, size_type n);
  ustring& replace(iterator pbegin, iterator pend, const char* src);
  ustring& replace(iterator pbegin, iterator pend, size_type n, gunichar uc);
  ustring& replace(iterator pbegin, iterator pend, size_type n, char c);
  template <class In> inline
    ustring& replace(iterator pbegin, iterator pend, In pbegin2, In pend2);

  ustring& erase(size_type i, size_type n=npos);
  ustring& erase();
  iterator erase(iterator p);
  iterator erase(iterator pbegin, iterator pend);

  int compare(const ustring& rhs) const;
  int compare(const char* rhs)    const;
  int compare(size_type i, size_type n, const ustring& rhs) const;
  int compare(size_type i, size_type n, const ustring& rhs, size_type i2, size_type n2) const;
  int compare(size_type i, size_type n, const char* rhs, size_type n2) const;
  int compare(size_type i, size_type n, const char* rhs) const;

  /** @note No reference return; use replace() to write characters. */
  value_type operator[](size_type i) const;
  value_type at(size_type i) const;

  inline ustring substr(size_type i=0, size_type n=npos) const;

  iterator begin();
  iterator end();
  const_iterator begin() const;
  const_iterator end()   const;
  reverse_iterator rbegin();
  reverse_iterator rend();
  const_reverse_iterator rbegin() const;
  const_reverse_iterator rend()   const;

  size_type find(const ustring& str, size_type i=0) const;
  size_type find(const char* str, size_type i, size_type n) const;
  size_type find(const char* str, size_type i=0) const;
  size_type find(gunichar uc, size_type i=0) const;
  size_type find(char c, size_type i=0) const;

  size_type rfind(const ustring& str, size_type i=npos) const;
  size_type rfind(const char* str, size_type i, size_type n) const;
  size_type rfind(const char* str, size_type i=npos) const;
  size_type rfind(gunichar uc, size_type i=npos) const;
  size_type rfind(char c, size_type i=npos) const;

  size_type find_first_of(const ustring& match, size_type i=0) const;
  size_type find_first_of(const char* match, size_type i, size_type n) const;
  size_type find_first_of(const char* match, size_type i=0) const;
  size_type find_first_of(gunichar uc, size_type i=0) const;
  size_type find_first_of(char c, size_type i=0) const;

  size_type find_last_of(const ustring& match, size_type i=npos) const;
  size_type find_last_of(const char* match, size_type i, size_type n) const;
  size_type find_last_of(const char* match, size_type i=npos) const;
  size_type find_last_of(gunichar uc, size_type i=npos) const;
  size_type find_last_of(char c, size_type i=npos) const;

  size_type find_first_not_of(const ustring& match, size_type i=0) const;
  size_type find_first_not_of(const char* match, size_type i, size_type n) const;
  size_type find_first_not_of(const char* match, size_type i=0) const;
  size_type find_first_not_of(gunichar uc, size_type i=0) const;
  size_type find_first_not_of(char c, size_type i=0) const;

  size_type find_last_not_of(const ustring& match, size_type i=npos) const;
  size_type find_last_not_of(const char* match, size_type i, size_type n) const;
  size_type find_last_not_of(const char* match, size_type i=npos) const;
  size_type find_last_not_of(gunichar uc, size_type i=npos) const;
  size_type find_last_not_of(char c, size_type i=npos) const;

  bool      empty()  const;
  size_type size()   const;
  size_type length() const;
  size_type bytes()  const;

  size_type capacity() const;
  size_type max_size() const;

  void resize(size_type n, gunichar uc);
  void resize(size_type n, char c='\0');
  void reserve(size_type n=0);

  inline operator std::string() const; // e.g. std::string str = ustring();
  inline const std::string& raw() const;

  // Not necessarily an ASCII char*. Use g_utf8_*() where necessary.
  const char* data()  const;
  const char* c_str() const;

  size_type copy(char*     p, size_type n, size_type i=0) const;
  size_type copy(gunichar* p, size_type n, size_type i=0) const;

  /** Check whether the string contains valid UTF-8 data. */
  bool validate() const;
  bool validate(iterator& first_invalid);
  bool validate(const_iterator& first_invalid) const;

  /** Check whether the string is plain 7-bit ASCII.
   * Unlike any other ustring method, is_ascii() is safe to use on invalid
   * UTF-8 strings.  If the string isn't valid UTF-8, it cannot be valid
   * ASCII either, therefore is_ascii() will just return @c false then.
   * @return Whether the string contains only ASCII characters.
   */
  bool is_ascii() const;

  /** 'Normalize' the Unicode character values of the string. */
  ustring normalize(GNormalizeMode mode = G_NORMALIZE_DEFAULT_COMPOSE) const;

  ustring uppercase() const;
  ustring lowercase() const;
  ustring casefold()  const;

  /** Create a unique sorting key for the UTF-8 string. */
  std::string collate_key() const;
  std::string casefold_collate_key() const;

private:
  template <class Iterator>
    struct IteratorConvert_;

  std::string string_;
};

/** Stream input operator.
 * @relates Glib::ustring
 * @throw Glib::ConvertError
 */
std::istream& operator>>(std::istream& is, Glib::ustring& utf8_string);

/** Stream output operator.
 * @relates Glib::ustring
 * @throw Glib::ConvertError
 */
std::ostream& operator<<(std::ostream& os, const Glib::ustring& utf8_string);


/***************************************************************************/
/*  Inline implementation                                                  */
/***************************************************************************/


/**** Glib::ustring_Iterator<> *********************************************/

#ifndef DOXYGEN_SHOULD_SKIP_THIS

template <class T> inline
ustring_Iterator<T>::ustring_Iterator(T pos)
:
  pos_ (pos)
{}

template <class T> inline
T ustring_Iterator<T>::base() const
{
  return pos_;
}

#endif /* DOXYGEN_SHOULD_SKIP_THIS */

template <class T> inline
ustring_Iterator<T>::ustring_Iterator()
:
  pos_ ()
{}

template <class T> inline
ustring_Iterator<T>::ustring_Iterator(const ustring_Iterator<std::string::iterator>& other)
:
  pos_ (other.base())
{}

template <class T> inline
ustring_Iterator<T>&
ustring_Iterator<T>::operator=(const ustring_Iterator<std::string::iterator>& other)
{
  pos_ = other.base();
  return *this;
}

template <class T> inline
typename ustring_Iterator<T>::value_type ustring_Iterator<T>::operator*() const
{
  return Glib::get_unichar_from_std_iterator(pos_);
}

template <class T> inline
ustring_Iterator<T>& ustring_Iterator<T>::operator++()
{
  pos_ += g_utf8_skip[static_cast<unsigned char>(*pos_)];
  return *this;
}

template <class T> inline
const ustring_Iterator<T> ustring_Iterator<T>::operator++(int)
{
  const ustring_Iterator<T> tmp (*this);
  this->operator++();
  return tmp;
}

template <class T> inline
ustring_Iterator<T>& ustring_Iterator<T>::operator--()
{
  do { --pos_; } while((*pos_ & '\xC0') == '\x80');
  return *this;
}

template <class T> inline
const ustring_Iterator<T> ustring_Iterator<T>::operator--(int)
{
  const ustring_Iterator<T> tmp (*this);
  this->operator--();
  return tmp;
}

/** @relates Glib::ustring_Iterator */
template <class T> inline
bool operator==(const ustring_Iterator<T>& lhs, const ustring_Iterator<T>& rhs)
{
  return (lhs.base() == rhs.base());
}

/** @relates Glib::ustring_Iterator */
template <class T> inline
bool operator!=(const ustring_Iterator<T>& lhs, const ustring_Iterator<T>& rhs)
{
  return (lhs.base() != rhs.base());
}

/** @relates Glib::ustring_Iterator */
template <class T> inline
bool operator<(const ustring_Iterator<T>& lhs, const ustring_Iterator<T>& rhs)
{
  return (lhs.base() < rhs.base());
}

/** @relates Glib::ustring_Iterator */
template <class T> inline
bool operator>(const ustring_Iterator<T>& lhs, const ustring_Iterator<T>& rhs)
{
  return (lhs.base() > rhs.base());
}

/** @relates Glib::ustring_Iterator */
template <class T> inline
bool operator<=(const ustring_Iterator<T>& lhs, const ustring_Iterator<T>& rhs)
{
  return (lhs.base() <= rhs.base());
}

/** @relates Glib::ustring_Iterator */
template <class T> inline
bool operator>=(const ustring_Iterator<T>& lhs, const ustring_Iterator<T>& rhs)
{
  return (lhs.base() >= rhs.base());
}


/**** Glib::ustring::IteratorConvert_<> ************************************/

template <class Iterator>
struct ustring::IteratorConvert_
{
  static inline Iterator
  get_base(Iterator p) { return p; }
};

template <>
struct ustring::IteratorConvert_<Glib::ustring::iterator>
{
  static inline std::string::iterator
  get_base(Glib::ustring::iterator p) { return p.base(); }
};

template <>
struct ustring::IteratorConvert_<Glib::ustring::const_iterator>
{
  static inline std::string::const_iterator
  get_base(Glib::ustring::const_iterator p) { return p.base(); }
};


/**** Glib::ustring ********************************************************/

template <class In> inline
ustring::ustring(In pbegin, In pend)
:
  string_ (IteratorConvert_<In>::get_base(pbegin),
           IteratorConvert_<In>::get_base(pend))
{}

template <class In> inline
ustring& ustring::assign(In pbegin, In pend)
{
  typedef IteratorConvert_<In> Conv;
  string_.assign(Conv::get_base(pbegin), Conv::get_base(pend));
  return *this;
}

template <class In> inline
ustring& ustring::append(In pbegin, In pend)
{
  typedef IteratorConvert_<In> Conv;
  string_.append(Conv::get_base(pbegin), Conv::get_base(pend));
  return *this;
}

template <class In> inline
void ustring::insert(ustring::iterator p, In pbegin, In pend)
{
  typedef IteratorConvert_<In> Conv;
  string_.insert(p.base(), Conv::get_base(pbegin), Conv::get_base(pend));
}

template <class In> inline
ustring& ustring::replace(ustring::iterator pbegin, ustring::iterator pend, In pbegin2, In pend2)
{
  typedef IteratorConvert_<In> Conv;
  string_.replace(
      pbegin.base(), pend.base(),
      Conv::get_base(pbegin2), Conv::get_base(pend2));
  return *this;
}

// substr() and operator std::string() are inline, so that
// the compiler has a fair chance to optimize the copy ctor away.
inline
ustring ustring::substr(ustring::size_type i, ustring::size_type n) const
{
  return ustring(*this, i, n);
}

inline
ustring::operator std::string() const
{
  return string_;
}

inline
const std::string& ustring::raw() const
{
  return string_;
}

/** @relates Glib::ustring */
inline
void swap(ustring& lhs, ustring& rhs)
{
  lhs.swap(rhs);
}


/**** Glib::ustring -- comparison operators ********************************/

/** @relates Glib::ustring */
inline bool operator==(const ustring& lhs, const ustring& rhs)
  { return (lhs.compare(rhs) == 0); }

/** @relates Glib::ustring */
inline bool operator==(const ustring& lhs, const char* rhs)
  { return (lhs.compare(rhs) == 0); }

/** @relates Glib::ustring */
inline bool operator==(const char* lhs, const ustring& rhs)
  { return (rhs.compare(lhs) == 0); }


/** @relates Glib::ustring */
inline bool operator!=(const ustring& lhs, const ustring& rhs)
  { return (lhs.compare(rhs) != 0); }

/** @relates Glib::ustring */
inline bool operator!=(const ustring& lhs, const char* rhs)
  { return (lhs.compare(rhs) != 0); }

/** @relates Glib::ustring */
inline bool operator!=(const char* lhs, const ustring& rhs)
  { return (rhs.compare(lhs) != 0); }


/** @relates Glib::ustring */
inline bool operator<(const ustring& lhs, const ustring& rhs)
  { return (lhs.compare(rhs) < 0); }

/** @relates Glib::ustring */
inline bool operator<(const ustring& lhs, const char* rhs)
  { return (lhs.compare(rhs) < 0); }

/** @relates Glib::ustring */
inline bool operator<(const char* lhs, const ustring& rhs)
  { return (rhs.compare(lhs) > 0); }


/** @relates Glib::ustring */
inline bool operator>(const ustring& lhs, const ustring& rhs)
  { return (lhs.compare(rhs) > 0); }

/** @relates Glib::ustring */
inline bool operator>(const ustring& lhs, const char* rhs)
  { return (lhs.compare(rhs) > 0); }

/** @relates Glib::ustring */
inline bool operator>(const char* lhs, const ustring& rhs)
  { return (rhs.compare(lhs) < 0); }


/** @relates Glib::ustring */
inline bool operator<=(const ustring& lhs, const ustring& rhs)
  { return (lhs.compare(rhs) <= 0); }

/** @relates Glib::ustring */
inline bool operator<=(const ustring& lhs, const char* rhs)
  { return (lhs.compare(rhs) <= 0); }

/** @relates Glib::ustring */
inline bool operator<=(const char* lhs, const ustring& rhs)
  { return (rhs.compare(lhs) >= 0); }


/** @relates Glib::ustring */
inline bool operator>=(const ustring& lhs, const ustring& rhs)
  { return (lhs.compare(rhs) >= 0); }

/** @relates Glib::ustring */
inline bool operator>=(const ustring& lhs, const char* rhs)
  { return (lhs.compare(rhs) >= 0); }

/** @relates Glib::ustring */
inline bool operator>=(const char* lhs, const ustring& rhs)
  { return (rhs.compare(lhs) <= 0); }


/**** Glib::ustring -- concatenation operators *****************************/

/** @relates Glib::ustring */
inline ustring operator+(const ustring& lhs, const ustring& rhs)
  { return ustring(lhs) += rhs; }

/** @relates Glib::ustring */
inline ustring operator+(const ustring& lhs, const char* rhs)
  { return ustring(lhs) += rhs; }

/** @relates Glib::ustring */
inline ustring operator+(const char* lhs, const ustring& rhs)
  { return ustring(lhs) += rhs; }

/** @relates Glib::ustring */
inline ustring operator+(const ustring& lhs, gunichar rhs)
  { return ustring(lhs) += rhs; }

/** @relates Glib::ustring */
inline ustring operator+(gunichar lhs, const ustring& rhs)
  { return ustring(1, lhs) += rhs; }

/** @relates Glib::ustring */
inline ustring operator+(const ustring& lhs, char rhs)
  { return ustring(lhs) += rhs; }

/** @relates Glib::ustring */
inline ustring operator+(char lhs, const ustring& rhs)
  { return ustring(1, lhs) += rhs; }

} // namespace Glib


#endif /* _GLIBMM_USTRING_H */

