RetroZilla/xpcom/string/public/nsUTF8Utils.h

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is mozilla.org code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 2001
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Peter Annema <jaggernaut@netscape.com> (original author)
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#ifndef nsUTF8Utils_h_
#define nsUTF8Utils_h_

#include "nsCharTraits.h"

class UTF8traits
  {
    public:
      static PRBool isASCII(char c) { return (c & 0x80) == 0x00; }
      static PRBool isInSeq(char c) { return (c & 0xC0) == 0x80; }
      static PRBool is2byte(char c) { return (c & 0xE0) == 0xC0; }
      static PRBool is3byte(char c) { return (c & 0xF0) == 0xE0; }
      static PRBool is4byte(char c) { return (c & 0xF8) == 0xF0; }
      static PRBool is5byte(char c) { return (c & 0xFC) == 0xF8; }
      static PRBool is6byte(char c) { return (c & 0xFE) == 0xFC; }
  };

#ifdef __GNUC__
#define NS_ALWAYS_INLINE __attribute__((always_inline))
#else
#define NS_ALWAYS_INLINE
#endif

/**
 * A character sink (see |copy_string| in nsAlgorithm.h) for converting
 * UTF-8 to UTF-16
 */
class ConvertUTF8toUTF16
  {
    public:
      typedef nsACString::char_type value_type;
      typedef nsAString::char_type  buffer_type;

    ConvertUTF8toUTF16( buffer_type* aBuffer )
        : mStart(aBuffer), mBuffer(aBuffer), mErrorEncountered(PR_FALSE) {}

    size_t Length() const { return mBuffer - mStart; }

    PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
      {
        if ( mErrorEncountered )
          return N;

        // algorithm assumes utf8 units won't
        // be spread across fragments
        const value_type* p = start;
        const value_type* end = start + N;
        buffer_type* out = mBuffer;
        for ( ; p != end /* && *p */; )
          {
            char c = *p++;

            if ( UTF8traits::isASCII(c) )
              {
                *out++ = buffer_type(c);
                continue;
              }

            PRUint32 ucs4;
            PRUint32 minUcs4;
            PRInt32 state = 0;

            if ( UTF8traits::is2byte(c) )
              {
                ucs4 = (PRUint32(c) << 6) & 0x000007C0L;
                state = 1;
                minUcs4 = 0x00000080;
              }
            else if ( UTF8traits::is3byte(c) )
              {
                ucs4 = (PRUint32(c) << 12) & 0x0000F000L;
                state = 2;
                minUcs4 = 0x00000800;
              }
            else if ( UTF8traits::is4byte(c) )
              {
                ucs4 = (PRUint32(c) << 18) & 0x001F0000L;
                state = 3;
                minUcs4 = 0x00010000;
              }
            else if ( UTF8traits::is5byte(c) )
              {
                ucs4 = (PRUint32(c) << 24) & 0x03000000L;
                state = 4;
                minUcs4 = 0x00200000;
              }
            else if ( UTF8traits::is6byte(c) )
              {
                ucs4 = (PRUint32(c) << 30) & 0x40000000L;
                state = 5;
                minUcs4 = 0x04000000;
              }
            else
              {
                NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");
                mErrorEncountered = PR_TRUE;
                mBuffer = out;
                return N;
              }

            while ( state-- )
              {
                if (p == end)
                  {
                    NS_ERROR("Buffer ended in the middle of a multibyte sequence");
                    mErrorEncountered = PR_TRUE;
                    mBuffer = out;
                    return N;
                  }

                c = *p++;

                if ( UTF8traits::isInSeq(c) )
                  {
                    PRInt32 shift = state * 6;
                    ucs4 |= (PRUint32(c) & 0x3F) << shift;
                  }
                else
                  {
                    NS_ERROR("not a UTF8 string");
                    mErrorEncountered = PR_TRUE;
                    mBuffer = out;
                    return N;
                  }
              }

            if ( ucs4 < minUcs4 )
              {
                // Overlong sequence
                *out++ = UCS2_REPLACEMENT_CHAR;
              }
            else if ( ucs4 <= 0xD7FF )
              {
                *out++ = ucs4;
              }
            else if ( /* ucs4 >= 0xD800 && */ ucs4 <= 0xDFFF )
              {
                // Surrogates
                *out++ = UCS2_REPLACEMENT_CHAR;
              }
            else if ( ucs4 == 0xFFFE || ucs4 == 0xFFFF )
              {
                // Prohibited characters
                *out++ = UCS2_REPLACEMENT_CHAR;
              }
            else if ( ucs4 >= PLANE1_BASE )
              {
                if ( ucs4 >= UCS_END )
                  *out++ = UCS2_REPLACEMENT_CHAR;
                else {
                  *out++ = (buffer_type)H_SURROGATE(ucs4);
                  *out++ = (buffer_type)L_SURROGATE(ucs4);
                }
              }
            else
              {
                *out++ = ucs4;
              }
          }
        mBuffer = out;
        return p - start;
      }

    void write_terminator()
      {
        *mBuffer = buffer_type(0);
      }

    private:
      buffer_type* const mStart;
      buffer_type* mBuffer;
      PRBool mErrorEncountered;
  };

/**
 * A character sink (see |copy_string| in nsAlgorithm.h) for computing
 * the length of the UTF-16 string equivalent to a UTF-8 string.
 */
class CalculateUTF8Length
  {
    public:
      typedef nsACString::char_type value_type;

    CalculateUTF8Length() : mLength(0), mErrorEncountered(PR_FALSE) { }

    size_t Length() const { return mLength; }

    PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
      {
          // ignore any further requests
        if ( mErrorEncountered )
            return N;

        // algorithm assumes utf8 units won't
        // be spread across fragments
        const value_type* p = start;
        const value_type* end = start + N;
        for ( ; p < end /* && *p */; ++mLength )
          {
            if ( UTF8traits::isASCII(*p) )
                p += 1;
            else if ( UTF8traits::is2byte(*p) )
                p += 2;
            else if ( UTF8traits::is3byte(*p) )
                p += 3;
            else if ( UTF8traits::is4byte(*p) ) {
                p += 4;
                // Because a UTF-8 sequence of 4 bytes represents a codepoint
                // greater than 0xFFFF, it will become a surrogate pair in the
                // UTF-16 string, so add 1 more to mLength.
                // This doesn't happen with is5byte and is6byte because they
                // are illegal UTF-8 sequences (greater than 0x10FFFF) so get
                // converted to a single replacement character.
                //
                // XXX: if the 4-byte sequence is an illegal non-shortest form,
                //      it also gets converted to a replacement character, so
                //      mLength will be off by one in this case.
                ++mLength;
            }
            else if ( UTF8traits::is5byte(*p) )
                p += 5;
            else if ( UTF8traits::is6byte(*p) )
                p += 6;
            else
              {
                break;
              }
          }
        if ( p != end )
          {
            NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");
            mErrorEncountered = PR_TRUE;
            return N;
          }
        return p - start;
      }

    private:
      size_t mLength;
      PRBool mErrorEncountered;
  };

/**
 * A character sink (see |copy_string| in nsAlgorithm.h) for converting
 * UTF-16 to UTF-8.
 */
class ConvertUTF16toUTF8
  {
    public:
      typedef nsAString::char_type  value_type;
      typedef nsACString::char_type buffer_type;

    // The error handling here is more lenient than that in
    // |ConvertUTF8toUTF16|, but it's that way for backwards
    // compatibility.

    ConvertUTF16toUTF8( buffer_type* aBuffer )
        : mStart(aBuffer), mBuffer(aBuffer) {}

    size_t Size() const { return mBuffer - mStart; }

    PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
      {
        buffer_type *out = mBuffer; // gcc isn't smart enough to do this!

        for (const value_type *p = start, *end = start + N; p < end; ++p )
          {
            value_type c = *p;
            if (! (c & 0xFF80)) // U+0000 - U+007F
              {
                *out++ = (char)c;
              }
            else if (! (c & 0xF800)) // U+0100 - U+07FF
              {
                *out++ = 0xC0 | (char)(c >> 6);
                *out++ = 0x80 | (char)(0x003F & c);
              }
            else if (!IS_SURROGATE(c)) // U+0800 - U+D7FF,U+E000 - U+FFFF
              {
                *out++ = 0xE0 | (char)(c >> 12);
                *out++ = 0x80 | (char)(0x003F & (c >> 6));
                *out++ = 0x80 | (char)(0x003F & c );
              }
            else if (IS_HIGH_SURROGATE(c)) // U+D800 - U+DBFF
              {
                // D800- DBFF - High Surrogate
                value_type h = c;

                ++p;
                if (p == end)
                  {
                    NS_ERROR("Surrogate pair split between fragments");
                    mBuffer = out;
                    return N;
                  }
                c = *p;

                if (IS_LOW_SURROGATE(c))
                  {
                    // DC00- DFFF - Low Surrogate
                    // N = (H - D800) *400 + 10000 + ( L - DC00 )
                    PRUint32 ucs4 = SURROGATE_TO_UCS4(h, c);

                    // 0001 0000-001F FFFF
                    *out++ = 0xF0 | (char)(ucs4 >> 18);
                    *out++ = 0x80 | (char)(0x003F & (ucs4 >> 12));
                    *out++ = 0x80 | (char)(0x003F & (ucs4 >> 6));
                    *out++ = 0x80 | (char)(0x003F & ucs4);
                  }
                else
                  {
                    NS_ERROR("got a High Surrogate but no low surrogate");
                    // output nothing.
                  }
              }
            else // U+DC00 - U+DFFF
              {
                // DC00- DFFF - Low Surrogate
                NS_ERROR("got a low Surrogate but no high surrogate");
                // output nothing.
              }
          }

        mBuffer = out;
        return N;
      }

    void write_terminator()
      {
        *mBuffer = buffer_type(0);
      }

    private:
      buffer_type* const mStart;
      buffer_type* mBuffer;
  };

/**
 * A character sink (see |copy_string| in nsAlgorithm.h) for computing
 * the number of bytes a UTF-16 would occupy in UTF-8.
 */
class CalculateUTF8Size
  {
    public:
      typedef nsAString::char_type value_type;

    CalculateUTF8Size()
      : mSize(0) { }

    size_t Size() const { return mSize; }

    PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )
      {
        // Assume UCS2 surrogate pairs won't be spread across fragments.
        for (const value_type *p = start, *end = start + N; p < end; ++p )
          {
            value_type c = *p;
            if (! (c & 0xFF80)) // U+0000 - U+007F
              mSize += 1;
            else if (! (c & 0xF800)) // U+0100 - U+07FF
              mSize += 2;
            else if (0xD800 != (0xF800 & c)) // U+0800 - U+D7FF,U+E000 - U+FFFF
              mSize += 3;
            else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF
              {
                ++p;
                if (p == end)
                  {
                    NS_ERROR("Surrogate pair split between fragments");
                    return N;
                  }
                c = *p;

                if (0xDC00 == (0xFC00 & c))
                  mSize += 4;
                else
                  NS_ERROR("got a high Surrogate but no low surrogate");
              }
            else // U+DC00 - U+DFFF
              NS_ERROR("got a low Surrogate but no high surrogate");
          }

        return N;
      }

    private:
      size_t mSize;
  };

/**
 * A character sink that performs a |reinterpret_cast| style conversion
 * between character types.
 */
template <class FromCharT, class ToCharT>
class LossyConvertEncoding
  {
    public:
      typedef FromCharT value_type;
 
      typedef FromCharT input_type;
      typedef ToCharT   output_type;

      typedef typename nsCharTraits<FromCharT>::unsigned_char_type unsigned_input_type;

    public:
      LossyConvertEncoding( output_type* aDestination ) : mDestination(aDestination) { }

      PRUint32
      write( const input_type* aSource, PRUint32 aSourceLength )
        {
          const input_type* done_writing = aSource + aSourceLength;
          while ( aSource < done_writing )
            *mDestination++ = (output_type)(unsigned_input_type)(*aSource++);  // use old-style cast to mimic old |ns[C]String| behavior
          return aSourceLength;
        }

      void
      write_terminator()
        {
          *mDestination = output_type(0);
        }

    private:
      output_type* mDestination;
  };

#endif /* !defined(nsUTF8Utils_h_) */
first commit 2015-10-21 05:03:22 +02:00			`/* -- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -- */`
			`/* *** BEGIN LICENSE BLOCK ***`
			`* Version: MPL 1.1/GPL 2.0/LGPL 2.1`
			`*`
			`* The contents of this file are subject to the Mozilla Public License Version`
			`* 1.1 (the "License"); you may not use this file except in compliance with`
			`* the License. You may obtain a copy of the License at`
			`* http://www.mozilla.org/MPL/`
			`*`
			`* Software distributed under the License is distributed on an "AS IS" basis,`
			`* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License`
			`* for the specific language governing rights and limitations under the`
			`* License.`
			`*`
			`* The Original Code is mozilla.org code.`
			`*`
			`* The Initial Developer of the Original Code is`
			`* Netscape Communications Corporation.`
			`* Portions created by the Initial Developer are Copyright (C) 2001`
			`* the Initial Developer. All Rights Reserved.`
			`*`
			`* Contributor(s):`
			`* Peter Annema <jaggernaut@netscape.com> (original author)`
			`*`
			`* Alternatively, the contents of this file may be used under the terms of`
			`* either of the GNU General Public License Version 2 or later (the "GPL"),`
			`* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),`
			`* in which case the provisions of the GPL or the LGPL are applicable instead`
			`* of those above. If you wish to allow use of your version of this file only`
			`* under the terms of either the GPL or the LGPL, and not to allow others to`
			`* use your version of this file under the terms of the MPL, indicate your`
			`* decision by deleting the provisions above and replace them with the notice`
			`* and other provisions required by the GPL or the LGPL. If you do not delete`
			`* the provisions above, a recipient may use your version of this file under`
			`* the terms of any one of the MPL, the GPL or the LGPL.`
			`*`
			`* *** END LICENSE BLOCK *** */`

			`#ifndef nsUTF8Utils_h_`
			`#define nsUTF8Utils_h_`

			`#include "nsCharTraits.h"`

			`class UTF8traits`
			`{`
			`public:`
			`static PRBool isASCII(char c) { return (c & 0x80) == 0x00; }`
			`static PRBool isInSeq(char c) { return (c & 0xC0) == 0x80; }`
			`static PRBool is2byte(char c) { return (c & 0xE0) == 0xC0; }`
			`static PRBool is3byte(char c) { return (c & 0xF0) == 0xE0; }`
			`static PRBool is4byte(char c) { return (c & 0xF8) == 0xF0; }`
			`static PRBool is5byte(char c) { return (c & 0xFC) == 0xF8; }`
			`static PRBool is6byte(char c) { return (c & 0xFE) == 0xFC; }`
			`};`

			`#ifdef __GNUC__`
			`#define NS_ALWAYS_INLINE __attribute__((always_inline))`
			`#else`
			`#define NS_ALWAYS_INLINE`
			`#endif`

			`/**`
			`* A character sink (see \|copy_string\| in nsAlgorithm.h) for converting`
			`* UTF-8 to UTF-16`
			`*/`
			`class ConvertUTF8toUTF16`
			`{`
			`public:`
			`typedef nsACString::char_type value_type;`
			`typedef nsAString::char_type buffer_type;`

			`ConvertUTF8toUTF16( buffer_type* aBuffer )`
			`: mStart(aBuffer), mBuffer(aBuffer), mErrorEncountered(PR_FALSE) {}`

			`size_t Length() const { return mBuffer - mStart; }`

			`PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )`
			`{`
			`if ( mErrorEncountered )`
			`return N;`

			`// algorithm assumes utf8 units won't`
			`// be spread across fragments`
			`const value_type* p = start;`
			`const value_type* end = start + N;`
			`buffer_type* out = mBuffer;`
			`for ( ; p != end /* && p /; )`
			`{`
			`char c = *p++;`

			`if ( UTF8traits::isASCII(c) )`
			`{`
			`*out++ = buffer_type(c);`
			`continue;`
			`}`

			`PRUint32 ucs4;`
			`PRUint32 minUcs4;`
			`PRInt32 state = 0;`

			`if ( UTF8traits::is2byte(c) )`
			`{`
			`ucs4 = (PRUint32(c) << 6) & 0x000007C0L;`
			`state = 1;`
			`minUcs4 = 0x00000080;`
			`}`
			`else if ( UTF8traits::is3byte(c) )`
			`{`
			`ucs4 = (PRUint32(c) << 12) & 0x0000F000L;`
			`state = 2;`
			`minUcs4 = 0x00000800;`
			`}`
			`else if ( UTF8traits::is4byte(c) )`
			`{`
			`ucs4 = (PRUint32(c) << 18) & 0x001F0000L;`
			`state = 3;`
			`minUcs4 = 0x00010000;`
			`}`
			`else if ( UTF8traits::is5byte(c) )`
			`{`
			`ucs4 = (PRUint32(c) << 24) & 0x03000000L;`
			`state = 4;`
			`minUcs4 = 0x00200000;`
			`}`
			`else if ( UTF8traits::is6byte(c) )`
			`{`
			`ucs4 = (PRUint32(c) << 30) & 0x40000000L;`
			`state = 5;`
			`minUcs4 = 0x04000000;`
			`}`
			`else`
			`{`
			`NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");`
			`mErrorEncountered = PR_TRUE;`
			`mBuffer = out;`
			`return N;`
			`}`

			`while ( state-- )`
			`{`
			`if (p == end)`
			`{`
			`NS_ERROR("Buffer ended in the middle of a multibyte sequence");`
			`mErrorEncountered = PR_TRUE;`
			`mBuffer = out;`
			`return N;`
			`}`

			`c = *p++;`

			`if ( UTF8traits::isInSeq(c) )`
			`{`
			`PRInt32 shift = state * 6;`
			`ucs4 \|= (PRUint32(c) & 0x3F) << shift;`
			`}`
			`else`
			`{`
			`NS_ERROR("not a UTF8 string");`
			`mErrorEncountered = PR_TRUE;`
			`mBuffer = out;`
			`return N;`
			`}`
			`}`

			`if ( ucs4 < minUcs4 )`
			`{`
			`// Overlong sequence`
			`*out++ = UCS2_REPLACEMENT_CHAR;`
			`}`
			`else if ( ucs4 <= 0xD7FF )`
			`{`
			`*out++ = ucs4;`
			`}`
			`else if ( /* ucs4 >= 0xD800 && */ ucs4 <= 0xDFFF )`
			`{`
			`// Surrogates`
			`*out++ = UCS2_REPLACEMENT_CHAR;`
			`}`
			`else if ( ucs4 == 0xFFFE \|\| ucs4 == 0xFFFF )`
			`{`
			`// Prohibited characters`
			`*out++ = UCS2_REPLACEMENT_CHAR;`
			`}`
			`else if ( ucs4 >= PLANE1_BASE )`
			`{`
			`if ( ucs4 >= UCS_END )`
			`*out++ = UCS2_REPLACEMENT_CHAR;`
			`else {`
			`*out++ = (buffer_type)H_SURROGATE(ucs4);`
			`*out++ = (buffer_type)L_SURROGATE(ucs4);`
			`}`
			`}`
			`else`
			`{`
			`*out++ = ucs4;`
			`}`
			`}`
			`mBuffer = out;`
			`return p - start;`
			`}`

			`void write_terminator()`
			`{`
			`*mBuffer = buffer_type(0);`
			`}`

			`private:`
			`buffer_type* const mStart;`
			`buffer_type* mBuffer;`
			`PRBool mErrorEncountered;`
			`};`

			`/**`
			`* A character sink (see \|copy_string\| in nsAlgorithm.h) for computing`
			`* the length of the UTF-16 string equivalent to a UTF-8 string.`
			`*/`
			`class CalculateUTF8Length`
			`{`
			`public:`
			`typedef nsACString::char_type value_type;`

			`CalculateUTF8Length() : mLength(0), mErrorEncountered(PR_FALSE) { }`

			`size_t Length() const { return mLength; }`

			`PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )`
			`{`
			`// ignore any further requests`
			`if ( mErrorEncountered )`
			`return N;`

			`// algorithm assumes utf8 units won't`
			`// be spread across fragments`
			`const value_type* p = start;`
			`const value_type* end = start + N;`
			`for ( ; p < end /* && p /; ++mLength )`
			`{`
			`if ( UTF8traits::isASCII(*p) )`
			`p += 1;`
			`else if ( UTF8traits::is2byte(*p) )`
			`p += 2;`
			`else if ( UTF8traits::is3byte(*p) )`
			`p += 3;`
			`else if ( UTF8traits::is4byte(*p) ) {`
			`p += 4;`
			`// Because a UTF-8 sequence of 4 bytes represents a codepoint`
			`// greater than 0xFFFF, it will become a surrogate pair in the`
			`// UTF-16 string, so add 1 more to mLength.`
			`// This doesn't happen with is5byte and is6byte because they`
			`// are illegal UTF-8 sequences (greater than 0x10FFFF) so get`
			`// converted to a single replacement character.`
			`//`
			`// XXX: if the 4-byte sequence is an illegal non-shortest form,`
			`// it also gets converted to a replacement character, so`
			`// mLength will be off by one in this case.`
			`++mLength;`
			`}`
			`else if ( UTF8traits::is5byte(*p) )`
			`p += 5;`
			`else if ( UTF8traits::is6byte(*p) )`
			`p += 6;`
			`else`
			`{`
			`break;`
			`}`
			`}`
			`if ( p != end )`
			`{`
			`NS_ERROR("Not a UTF-8 string. This code should only be used for converting from known UTF-8 strings.");`
			`mErrorEncountered = PR_TRUE;`
			`return N;`
			`}`
			`return p - start;`
			`}`

			`private:`
			`size_t mLength;`
			`PRBool mErrorEncountered;`
			`};`

			`/**`
			`* A character sink (see \|copy_string\| in nsAlgorithm.h) for converting`
			`* UTF-16 to UTF-8.`
			`*/`
			`class ConvertUTF16toUTF8`
			`{`
			`public:`
			`typedef nsAString::char_type value_type;`
			`typedef nsACString::char_type buffer_type;`

			`// The error handling here is more lenient than that in`
			`// \|ConvertUTF8toUTF16\|, but it's that way for backwards`
			`// compatibility.`

			`ConvertUTF16toUTF8( buffer_type* aBuffer )`
			`: mStart(aBuffer), mBuffer(aBuffer) {}`

			`size_t Size() const { return mBuffer - mStart; }`

			`PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )`
			`{`
			`buffer_type *out = mBuffer; // gcc isn't smart enough to do this!`

			`for (const value_type p = start, end = start + N; p < end; ++p )`
			`{`
			`value_type c = *p;`
			`if (! (c & 0xFF80)) // U+0000 - U+007F`
			`{`
			`*out++ = (char)c;`
			`}`
			`else if (! (c & 0xF800)) // U+0100 - U+07FF`
			`{`
			`*out++ = 0xC0 \| (char)(c >> 6);`
			`*out++ = 0x80 \| (char)(0x003F & c);`
			`}`
			`else if (!IS_SURROGATE(c)) // U+0800 - U+D7FF,U+E000 - U+FFFF`
			`{`
			`*out++ = 0xE0 \| (char)(c >> 12);`
			`*out++ = 0x80 \| (char)(0x003F & (c >> 6));`
			`*out++ = 0x80 \| (char)(0x003F & c );`
			`}`
			`else if (IS_HIGH_SURROGATE(c)) // U+D800 - U+DBFF`
			`{`
			`// D800- DBFF - High Surrogate`
			`value_type h = c;`

			`++p;`
			`if (p == end)`
			`{`
			`NS_ERROR("Surrogate pair split between fragments");`
			`mBuffer = out;`
			`return N;`
			`}`
			`c = *p;`

			`if (IS_LOW_SURROGATE(c))`
			`{`
			`// DC00- DFFF - Low Surrogate`
			`// N = (H - D800) *400 + 10000 + ( L - DC00 )`
			`PRUint32 ucs4 = SURROGATE_TO_UCS4(h, c);`

			`// 0001 0000-001F FFFF`
			`*out++ = 0xF0 \| (char)(ucs4 >> 18);`
			`*out++ = 0x80 \| (char)(0x003F & (ucs4 >> 12));`
			`*out++ = 0x80 \| (char)(0x003F & (ucs4 >> 6));`
			`*out++ = 0x80 \| (char)(0x003F & ucs4);`
			`}`
			`else`
			`{`
			`NS_ERROR("got a High Surrogate but no low surrogate");`
			`// output nothing.`
			`}`
			`}`
			`else // U+DC00 - U+DFFF`
			`{`
			`// DC00- DFFF - Low Surrogate`
			`NS_ERROR("got a low Surrogate but no high surrogate");`
			`// output nothing.`
			`}`
			`}`

			`mBuffer = out;`
			`return N;`
			`}`

			`void write_terminator()`
			`{`
			`*mBuffer = buffer_type(0);`
			`}`

			`private:`
			`buffer_type* const mStart;`
			`buffer_type* mBuffer;`
			`};`

			`/**`
			`* A character sink (see \|copy_string\| in nsAlgorithm.h) for computing`
			`* the number of bytes a UTF-16 would occupy in UTF-8.`
			`*/`
			`class CalculateUTF8Size`
			`{`
			`public:`
			`typedef nsAString::char_type value_type;`

			`CalculateUTF8Size()`
			`: mSize(0) { }`

			`size_t Size() const { return mSize; }`

			`PRUint32 NS_ALWAYS_INLINE write( const value_type* start, PRUint32 N )`
			`{`
			`// Assume UCS2 surrogate pairs won't be spread across fragments.`
			`for (const value_type p = start, end = start + N; p < end; ++p )`
			`{`
			`value_type c = *p;`
			`if (! (c & 0xFF80)) // U+0000 - U+007F`
			`mSize += 1;`
			`else if (! (c & 0xF800)) // U+0100 - U+07FF`
			`mSize += 2;`
			`else if (0xD800 != (0xF800 & c)) // U+0800 - U+D7FF,U+E000 - U+FFFF`
			`mSize += 3;`
			`else if (0xD800 == (0xFC00 & c)) // U+D800 - U+DBFF`
			`{`
			`++p;`
			`if (p == end)`
			`{`
			`NS_ERROR("Surrogate pair split between fragments");`
			`return N;`
			`}`
			`c = *p;`

			`if (0xDC00 == (0xFC00 & c))`
			`mSize += 4;`
			`else`
			`NS_ERROR("got a high Surrogate but no low surrogate");`
			`}`
			`else // U+DC00 - U+DFFF`
			`NS_ERROR("got a low Surrogate but no high surrogate");`
			`}`

			`return N;`
			`}`

			`private:`
			`size_t mSize;`
			`};`

			`/**`
			`* A character sink that performs a \|reinterpret_cast\| style conversion`
			`* between character types.`
			`*/`
			`template <class FromCharT, class ToCharT>`
			`class LossyConvertEncoding`
			`{`
			`public:`
			`typedef FromCharT value_type;`

			`typedef FromCharT input_type;`
			`typedef ToCharT output_type;`

			`typedef typename nsCharTraits<FromCharT>::unsigned_char_type unsigned_input_type;`

			`public:`
			`LossyConvertEncoding( output_type* aDestination ) : mDestination(aDestination) { }`

			`PRUint32`
			`write( const input_type* aSource, PRUint32 aSourceLength )`
			`{`
			`const input_type* done_writing = aSource + aSourceLength;`
			`while ( aSource < done_writing )`
			`mDestination++ = (output_type)(unsigned_input_type)(aSource++); // use old-style cast to mimic old \|ns[C]String\| behavior`
			`return aSourceLength;`
			`}`

			`void`
			`write_terminator()`
			`{`
			`*mDestination = output_type(0);`
			`}`

			`private:`
			`output_type* mDestination;`
			`};`

			`#endif /* !defined(nsUTF8Utils_h_) */`