/*
    SPDX-FileCopyrightText: 2000-2001 Dawit Alemayehu <adawit@kde.org>
    SPDX-FileCopyrightText: 2001 Rik Hemsley (rikkus) <rik@kde.org>
    SPDX-FileCopyrightText: 2001-2002 Marc Mutz <mutz@kde.org>

    SPDX-License-Identifier: LGPL-2.0-only

    The quoted-printable codec as described in RFC 2045, section 6.7. is by
    Rik Hemsley (C) 2001.
*/

#ifndef KCODECS_H
#define KCODECS_H

#include <kcodecs_export.h>

#include <QString>

#include <memory>

class QByteArray;
class QIODevice;

/*!
 * \namespace KCodecs
 * \inmodule KCodecs
 *
 * A wrapper class for the most commonly used encoding and
 * decoding algorithms.
 *
 * Currently there is support for encoding
 * and decoding input using base64, uu and the quoted-printable
 * specifications.
 *
 * \code
 * QByteArray input = "Aladdin:open sesame";
 * QByteArray result = KCodecs::base64Encode(input);
 * cout << "Result: " << result.data() << endl;
 * \endcode
 *
 * Output should be
 * \badcode
 * Result: QWxhZGRpbjpvcGVuIHNlc2FtZQ==
 * \endcode
 *
 * The above example makes use of the convenience functions
 * (ones that accept/return null-terminated strings) to encode/decode
 * a string.  If what you need is to encode or decode binary data, then
 * it is highly recommended that you use the functions that take an input
 * and output QByteArray as arguments.  These functions are specifically
 * tailored for encoding and decoding binary data.
 *
 * \brief A collection of commonly used encoding and decoding algorithms.
 */
namespace KCodecs
{
/*!
 * Encodes the given data using the quoted-printable algorithm.
 *
 * \a in the data to be encoded.
 *
 * \a useCRLF if true the input data is expected to have
 *                CRLF line breaks and the output will have CRLF line
 *                breaks, too.
 *
 * Returns quoted-printable encoded string.
 */
KCODECS_EXPORT QByteArray quotedPrintableEncode(QByteArrayView in, bool useCRLF = true);

/*!
 * Encodes the given data using the quoted-printable algorithm.
 *
 * Use this function if you want the result of the encoding
 * to be placed in another array which cuts down the number
 * of copy operation that have to be performed in the process.
 * This is also the preferred method for encoding binary data.
 *
 * \note the output array is first reset and then resized
 * appropriately before use, hence, all data stored in the
 * output array will be lost.
 *
 * \a in      data to be encoded.
 *
 * \a out     encoded data.
 *
 * \a useCRLF if true the input data is expected to have
 *                CRLF line breaks and the output will have CRLF line
 *                breaks, too.
 */
KCODECS_EXPORT void quotedPrintableEncode(QByteArrayView in, QByteArray &out, bool useCRLF);

/*!
 * Decodes a quoted-printable encoded data.
 *
 * Accepts data with CRLF or standard unix line breaks.
 *
 * \a in  data to be decoded.
 *
 * Returns    decoded string.
 *
 * \since 5.5
 */
KCODECS_EXPORT QByteArray quotedPrintableDecode(QByteArrayView in);

/*!
 * Decodes a quoted-printable encoded data.
 *
 * Accepts data with CRLF or standard unix line breaks.
 * Use this function if you want the result of the decoding
 * to be placed in another array which cuts down the number
 * of copy operation that have to be performed in the process.
 * This is also the preferred method for decoding an encoded
 * binary data.
 *
 * \note the output array is first reset and then resized
 * appropriately before use, hence, all data stored in the
 * output array will be lost.
 *
 * \a in   data to be decoded.
 *
 * \a out  decoded data.
 */
KCODECS_EXPORT void quotedPrintableDecode(QByteArrayView in, QByteArray &out);

/*!
 * Decodes the given data using the uudecode algorithm.
 *
 * Any 'begin' and 'end' lines like those generated by
 * the utilities in unix and unix-like OS will be
 * automatically ignored.
 *
 * \a in the data to be decoded.
 *
 * Returns the decoded string.
 */
KCODECS_EXPORT QByteArray uudecode(QByteArrayView in);

/*!
 * Decodes the given data using the uudecode algorithm.
 *
 * Use this function if you want the result of the decoding
 * to be placed in another array which cuts down the number
 * of copy operation that have to be performed in the process.
 * This is the preferred method for decoding binary data.
 *
 * Any 'begin' and 'end' lines like those generated by
 * the utilities in unix and unix-like OS will be
 * automatically ignored.
 *
 * \note the output array is first reset and then resized
 * appropriately before use, hence, all data stored in the
 * output array will be lost.
 *
 * \a in   data to be decoded.
 *
 * \a out  uudecoded data.
 */
KCODECS_EXPORT void uudecode(QByteArrayView in, QByteArray &out);

/*!
 * Encodes the given data using the base64 algorithm.
 *
 * The boolean argument determines if the encoded data is
 * going to be restricted to 76 characters or less per line
 * as specified by RFC 2045.
 *
 * \a in         data to be encoded.
 *
 * Returns           base64 encoded string.
 * \since 5.5
 */
KCODECS_EXPORT QByteArray base64Encode(QByteArrayView in);

/*!
 * Encodes the given data using the base64 algorithm.
 *
 * Use this function if you want the result of the encoding
 * to be placed in another array which cuts down the number
 * of copy operation that have to be performed in the process.
 * This is also the preferred method for encoding binary data.
 *
 * The boolean argument determines if the encoded data is going
 * to be restricted to 76 characters or less per line as specified
 * by RFC 2045.  If \a insertLFs is true, then there will be 76
 * characters or less per line.
 *
 * \note the output array is first reset and then resized
 * appropriately before use, hence, all data stored in the
 * output array will be lost.
 *
 * \a in        data to be encoded.
 *
 * \a out       encoded data.
 *
 * \a insertLFs limit the number of characters per line.
 */
KCODECS_EXPORT void base64Encode(QByteArrayView in, QByteArray &out, bool insertLFs = false);

/*!
 * Decodes the given data that was encoded using the
 * base64 algorithm.
 *
 * \a in   data to be decoded.
 *
 * Returns     decoded string.
 */
KCODECS_EXPORT QByteArray base64Decode(QByteArrayView in);

/*!
 * Decodes the given data that was encoded with the base64
 * algorithm.
 *
 * Use this function if you want the result of the decoding
 * to be placed in another array which cuts down the number
 * of copy operation that have to be performed in the process.
 * This is also the preferred method for decoding an encoded
 * binary data.
 *
 * \note the output array is first reset and then resized
 * appropriately before use, hence, all data stored in the
 * output array will be lost.
 *
 * \a in   data to be decoded.
 *
 * \a out  decoded data.
 */
KCODECS_EXPORT void base64Decode(QByteArrayView in, QByteArray &out);

/*!
 * Decodes string \a text according to RFC2047,
 * i.e., the construct =?charset?[qb]?encoded?=
 *
 * \a text source string
 *
 * Returns the decoded string
 */
KCODECS_EXPORT QString decodeRFC2047String(QStringView text);

/*!
 * Charset options for RFC2047 encoder
 * \since 5.5
 *
 * \value NoOption No special option
 * \value ForceDefaultCharset Force use of the default charset
 */
enum CharsetOption {
    NoOption = 0,
    ForceDefaultCharset = 1,
};

/*!
 * Decodes string \a src according to RFC2047, i.e. the construct
 *  =?charset?[qb]?encoded?=
 *
 * \a src       source string.
 *
 * \a usedCS    the name of any detected charset or, in case of multiple
 *                  different ones, "UTF-8" as that of a super charset is
 *                  returned here
 *
 * \a defaultCS the charset to use in case the detected
 *                  one isn't known to us.
 *
 * \a option    options for the encoder
 *
 * Returns the decoded string.
 * \since 5.5
 */
KCODECS_EXPORT QString decodeRFC2047String(QByteArrayView src, QByteArray *usedCS, const QByteArray &defaultCS = QByteArray(), CharsetOption option = NoOption);

/*!
 * Encodes string \a src according to RFC2047 using charset \a charset.
 *
 * This function also makes commas, quotes and other characters part of the encoded name, for example
 * the string "Jöhn Döe" <john@example.com"> would be encoded as <encoded word for "Jöhn Döe"> <john@example.com>,
 * i.e. the opening and closing quote mark would be part of the encoded word.
 * Therefore don't use this function for input strings that contain semantically meaningful characters,
 * like the quoting marks in this example.
 *
 * \a src           source string.
 *
 * \a charset       charset to use. If it can't encode the string, UTF-8 will be used instead.
 *
 * Returns the encoded string.
 * \since 5.5
 */
KCODECS_EXPORT QByteArray encodeRFC2047String(QStringView src, const QByteArray &charset);

/*!
 * Decodes the given data that was encoded using the
 * base45 codec.
 *
 * \a in   data to be decoded.
 *
 * Returns     decoded string.
 *
 * \since 5.84
 *
 * \sa https://datatracker.ietf.org/doc/draft-faltstrom-base45/
 */
KCODECS_EXPORT QByteArray base45Decode(QByteArrayView in);

class Encoder;
class EncoderPrivate;
class Decoder;
class DecoderPrivate;

/*!
  \class KCodecs::Codec
  \inheaderfile KCodecs
  \inmodule KCodecs

  \section1 Glossary:
  \section2 MIME:
  Multipurpose Internet Mail Extensions or MIME is an
  Internet Standard that extends the format of e-mail to support text in
  character sets other than US-ASCII, non-text attachments, multi-part message
  bodies, and header information in non-ASCII character sets. Virtually all
  human-written Internet e-mail and a fairly large proportion of automated
  e-mail is transmitted via SMTP in MIME format. Internet e-mail is
  so closely associated with the SMTP and MIME standards that it is sometimes
  called SMTP/MIME e-mail. The content types defined by MIME standards are
  also of growing importance outside of e-mail, such as in communication
  protocols like HTTP for the World Wide Web. MIME is also a
  fundamental component of communication protocols such as  HTTP, which
  requires that data be transmitted in the context of e-mail-like messages,
  even though the data may not actually be e-mail.

  \section2 Codec:
  a program capable of performing encoding and decoding on a digital data
  stream. Codecs encode data for storage or encryption and decode it for
  viewing or editing.

  \section2 CRLF:
  A "Carriage Return (0x0D)" followed by a
  "Line Feed (0x0A)", two ASCII control characters used to represent a
  newline on some operating systems, notably DOS and Microsoft Windows.

  \section2 LF:
  a "Line Feed (0x0A)" ASCII control character used
  to represent a newline on some operating systems, notably Unix, Unix-like,
  and Linux.

  \brief An abstract base class of codecs for common mail transfer encodings.

  Provides an abstract base class of codecs like base64 and quoted-printable.
  Implemented as a singleton.

  \since 5.5
*/
class KCODECS_EXPORT Codec
{
public:
    /*!
     * \value NewlineLF Line Feed
     * \value NewlineCRLF Carriage Return Line Feed
     */
    enum NewlineType {
        NewlineLF,
        NewlineCRLF,
    };

    /*!
      Returns a codec associated with the specified \a name.

      \a name is a valid codec name.
    */
    static Codec *codecForName(QByteArrayView name);

    /*!
      Computes the maximum size, in characters, needed for the encoding.

      \a insize is the number of input characters to be encoded.

      \a newline whether make new lines using CRLF, or LF (default is LF).

      Returns the maximum number of characters in the encoding.
    */
    virtual qsizetype maxEncodedSizeFor(qsizetype insize, NewlineType newline = NewlineLF) const = 0;

    /*!
      Computes the maximum size, in characters, needed for the deccoding.

      \a insize is the number of input characters to be decoded.

      \a newline whether make new lines using CRLF, or LF (default is LF).

      Returns the maximum number of characters in the decoding.
    */
    virtual qsizetype maxDecodedSizeFor(qsizetype insize, NewlineType newline = NewlineLF) const = 0;

    /*!
      Creates the encoder for the codec.

      \a newline whether make new lines using CRLF, or LF (default is LF).

      Returns a pointer to an instance of the codec's encoder.
    */
    virtual Encoder *makeEncoder(NewlineType newline = NewlineLF) const = 0;

    /*!
      Creates the decoder for the codec.

      \a newline whether make new lines using CRLF, or LF (default is LF).

      Returns a pointer to an instance of the codec's decoder.
    */
    virtual Decoder *makeDecoder(NewlineType newline = NewlineLF) const = 0;

    /*!
      Convenience wrapper that can be used for small chunks of data
      when you can provide a large enough buffer. The default
      implementation creates an Encoder and uses it.

      Encodes a chunk of bytes starting at \a scursor and extending to
      \a send into the buffer described by \a dcursor and \a dend.

      This function doesn't support chaining of blocks. The returned
      block cannot be added to, but you don't need to finalize it, too.

      Example usage (\c in contains the input data):
      \code
      KCodecs::Codec *codec = KCodecs::Codec::codecForName("base64");
      if (!codec) {
          qFatal() << "no base64 codec found!?";
      }
      QByteArray out(in.size() * 1.4); // crude maximal size of b64 encoding
      QByteArray::Iterator iit = in.begin();
      QByteArray::Iterator oit = out.begin();
      if (!codec->encode(iit, in.end(), oit, out.end())) {
          qDebug() << "output buffer too small";
          return;
      }
      qDebug() << "Size of encoded data:" << oit - out.begin();
      \endcode

      \a scursor is a pointer to the start of the input buffer.

      \a send is a pointer to the end of the input buffer.

      \a dcursor is a pointer to the start of the output buffer.

      \a dend is a pointer to the end of the output buffer.

      \a newline whether make new lines using CRLF, or LF (default is LF).

      Returns false if the encoded data didn't fit into the output buffer;
      true otherwise.
    */
    virtual bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend, NewlineType newline = NewlineLF) const;

    /*!
      Convenience wrapper that can be used for small chunks of data
      when you can provide a large enough buffer. The default
      implementation creates a Decoder and uses it.

      Decodes a chunk of bytes starting at \a scursor and extending to
      \a send into the buffer described by \a dcursor and \a dend.

      This function doesn't support chaining of blocks. The returned
      block cannot be added to, but you don't need to finalize it, too.

      Example usage (\c in contains the input data):
      \code
      KCodecs::Codec *codec = KCodecs::Codec::codecForName("base64");
      if (!codec) {
          qFatal() << "no base64 codec found!?";
      }
      QByteArray out(in.size()); // good guess for any encoding...
      QByteArray::Iterator iit = in.begin();
      QByteArray::Iterator oit = out.begin();
      if (!codec->decode(iit, in.end(), oit, out.end())) {
          qDebug() << "output buffer too small";
          return;
      }
      qDebug() << "Size of decoded data:" << oit - out.begin();
      \endcode

      \a scursor is a pointer to the start of the input buffer.

      \a send is a pointer to the end of the input buffer.

      \a dcursor is a pointer to the start of the output buffer.

      \a dend is a pointer to the end of the output buffer.

      \a newline whether make new lines using CRLF, or LF (default is LF).

      Returns false if the decoded data didn't fit into the output buffer;
      true otherwise.
    */
    virtual bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend, NewlineType newline = NewlineLF) const;

    /*!
      Even more convenient, but also a bit slower and more memory
      intensive, since it allocates storage for the worst case and then
      shrinks the result QByteArray to the actual size again.

      For use with small \a src.

      \a src is the data to encode.

      \a newline whether make new lines using CRLF, or LF (default is LF).
    */
    QByteArray encode(QByteArrayView src, NewlineType newline = NewlineLF) const;

    /*!
      Even more convenient, but also a bit slower and more memory
      intensive, since it allocates storage for the worst case and then
      shrinks the result QByteArray to the actual size again.

      For use with small \a src.

      \a src is the data to decode.

      \a newline whether make new lines using CRLF, or LF (default is LF).
    */
    QByteArray decode(QByteArrayView src, NewlineType newline = NewlineLF) const;

    /*!
      Returns the name of the encoding. Guaranteed to be lowercase.
    */
    virtual const char *name() const = 0;

    virtual ~Codec()
    {
    }

protected:
    Codec()
    {
    }
};

/*!
  \class KCodecs::Decoder
  \inheaderfile KCodecs
  \inmodule KCodecs

  \brief Stateful CTE decoder class.

  Stateful decoder class, modelled after QTextDecoder.

  \section1 Overview

  KCodecs decoders are designed to be able to process encoded data in
  chunks of arbitrary size and to work with output buffers of also
  arbitrary size. They maintain any state necessary to go on where
  the previous call left off.

  The class consists of only two methods of interest: see decode,
  which decodes an input block and finalize, which flushes any
  remaining data to the output stream.

  Typically, you will create a decoder instance, call decode as
  often as necessary, then call finalize (most often a single
  call suffices, but it might be that during that call the output
  buffer is filled, so you should be prepared to call finalize
  as often as necessary, i.e. until it returns \c true).

  \section1 Return Values

  Both methods return \c true to indicate that they've finished their
  job. For decode, a return value of \c true means that the
  current input block has been finished (\c false most often means
  that the output buffer is full, but that isn't required
  behavior. The decode call is free to return at arbitrary
  times during processing).

  For finalize, a return value of \c true means that all data
  implicitly or explicitly stored in the decoder instance has been
  flushed to the output buffer. A \c false return value should be
  interpreted as "check if the output buffer is full and call me
  again", just as with decode.

  \section1 Usage Pattern

  Since the decoder maintains state, you can only use it once. After
  a sequence of input blocks has been processed, you finalize
  the output and then delete the decoder instance. If you want to
  process another input block sequence, you create a new instance.

  Typical usage (\a in contains the (base64-encoded) input data),
  taking into account all the conventions detailed above:

  \code
  KCodecs::Codec *codec = KCodecs::Codec::codecForName("base64");
  if (!codec) {
      qFatal() << "No codec found for base64!";
  }
  KCodecs::Decoder *dec = codec->makeDecoder();
  Q_ASSERT(dec); // should not happen
  QByteArray out(256); // small buffer is enough ;-)
  QByteArray::Iterator iit = in.begin();
  QByteArray::Iterator oit = out.begin();
  // decode the chunk
  while (!dec->decode(iit, in.end(), oit, out.end()))
    if (oit == out.end()) { // output buffer full, process contents
      do_something_with(out);
      oit = out.begin();
    }
  // repeat while loop for each input block
  // ...
  // finish (flush remaining data from decoder):
  while (!dec->finish(oit, out.end()))
    if (oit == out.end()) { // output buffer full, process contents
      do_something_with(out);
      oit = out.begin();
    }
  // now process last chunk:
  out.resize(oit - out.begin());
  do_something_with(out);
  // _delete_ the decoder, but not the codec:
  delete dec;
  \endcode

  \since 5.5
*/
class KCODECS_EXPORT Decoder
{
protected:
    friend class Codec;
    friend class DecoderPrivate;

    /*!
      Protected constructor. Use KCodecs::Codec::makeDecoder to create an
      instance.

      \a newline whether make new lines using CRLF, or LF (default is LF).
    */
    Decoder(Codec::NewlineType newline = Codec::NewlineLF);

public:
    virtual ~Decoder();

    /*!
      Decodes a chunk of data, maintaining state information between
      calls. See class decumentation for calling conventions.

      \a scursor is a pointer to the start of the input buffer.

      \a send is a pointer to the end of the input buffer.

      \a dcursor is a pointer to the start of the output buffer.

      \a dend is a pointer to the end of the output buffer.

      Returns true on success
    */
    virtual bool decode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) = 0;

    /*!
      Call this method to finalize the output stream. Writes all
      remaining data and resets the decoder. See KCodecs::Codec for
      calling conventions.

      \a dcursor is a pointer to the start of the output buffer.

      \a dend is a pointer to the end of the output buffer.

      Returns true on success
    */
    virtual bool finish(char *&dcursor, const char *const dend) = 0;

protected:
    //@cond PRIVATE
    std::unique_ptr<DecoderPrivate> const d;
    //@endcond
};

/*!
  \class KCodecs::Encoder
  \inheaderfile KCodecs
  \inmodule KCodecs

  \brief Stateful encoder class.

  Stateful encoder class, modeled after QTextEncoder.

  \since 5.5
*/
class KCODECS_EXPORT Encoder
{
protected:
    friend class Codec;
    friend class EncoderPrivate;

    /*!
      Protected constructor. Use KCodecs::Codec::makeEncoder if you want one.

      \a newline whether make new lines using CRLF, or LF (default is LF).
    */
    explicit Encoder(Codec::NewlineType newline = Codec::NewlineLF);

public:
    virtual ~Encoder();

    /*!
      Encodes a chunk of data, maintaining state information between
      calls. See KCodecs::Codec for calling conventions.

      \a scursor is a pointer to the start of the input buffer.

      \a send is a pointer to the end of the input buffer.

      \a dcursor is a pointer to the start of the output buffer.

      \a dend is a pointer to the end of the output buffer.

      Returns true on success
    */
    virtual bool encode(const char *&scursor, const char *const send, char *&dcursor, const char *const dend) = 0;

    /*!
      Call this method to finalize the output stream. Writes all remaining
      data and resets the encoder. See KCodecs::Codec for calling conventions.

      \a dcursor is a pointer to the start of the output buffer.

      \a dend is a pointer to the end of the output buffer.

      Returns true on success.
    */
    virtual bool finish(char *&dcursor, const char *const dend) = 0;

protected:
    /*
      The maximum number of characters permitted in the output buffer.
    */
    enum {
        maxBufferedChars = 8,
    };

    /*!
      Writes character \a ch to the output stream or the output buffer,
      depending on whether or not the output stream has space left.

      \a ch is the character to write.

      \a dcursor is a pointer to the start of the output buffer.

      \a dend is a pointer to the end of the output buffer.

      Returns true if written to the output stream; else false if buffered.
    */
    bool write(char ch, char *&dcursor, const char *const dend);

    /*!
      Writes characters from the output buffer to the output stream.
      Implementations of encode and finish should call this
      at the very beginning and for each iteration of the while loop.

      \a dcursor is a pointer to the start of the output buffer.

      \a dend is a pointer to the end of the output buffer.

      Returns true if all chars could be written, false otherwise
    */
    bool flushOutputBuffer(char *&dcursor, const char *const dend);

    /*!
      Convenience function. Outputs LF or CRLF, based on the
      state of mWithCRLF.

      \a dcursor is a pointer to the start of the output buffer.

      \a dend is a pointer to the end of the output buffer.

      Returns true on success
    */
    bool writeCRLF(char *&dcursor, const char *const dend);

protected:
    //@cond PRIVATE
    std::unique_ptr<EncoderPrivate> const d;
    //@endcond
};

} // namespace KCodecs

#endif // KCODECS_H
