/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Daniel Bratell * Ben Bucksch * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #include "nsPlainTextSerializer.h" #include "nsILineBreakerFactory.h" #include "nsLWBrkCIID.h" #include "nsIServiceManager.h" #include "nsHTMLAtoms.h" #include "nsIDOMText.h" #include "nsIDOMCDATASection.h" #include "nsIDOMElement.h" #include "nsINameSpaceManager.h" #include "nsITextContent.h" #include "nsTextFragment.h" #include "nsContentUtils.h" #include "nsReadableUtils.h" #include "nsUnicharUtils.h" #include "nsCRT.h" #include "nsIParserService.h" static NS_DEFINE_CID(kLWBrkCID, NS_LWBRK_CID); #define PREF_STRUCTS "converter.html2txt.structs" #define PREF_HEADER_STRATEGY "converter.html2txt.header_strategy" static const PRInt32 kTabSize=4; static const PRInt32 kOLNumberWidth = 3; static const PRInt32 kIndentSizeHeaders = 2; /* Indention of h1, if mHeaderStrategy = 1 or = 2. Indention of other headers is derived from that. XXX center h1? */ static const PRInt32 kIndentIncrementHeaders = 2; /* If mHeaderStrategy = 1, indent h(x+1) this many columns more than h(x) */ static const PRInt32 kIndentSizeList = (kTabSize > kOLNumberWidth+3) ? kTabSize: kOLNumberWidth+3; // Indention of non-first lines of ul and ol static const PRInt32 kIndentSizeDD = kTabSize; // Indention of

static PRInt32 HeaderLevel(eHTMLTags aTag); static PRInt32 GetUnicharWidth(PRUnichar ucs); static PRInt32 GetUnicharStringWidth(const PRUnichar* pwcs, PRInt32 n); // Someday may want to make this non-const: static const PRUint32 TagStackSize = 500; static const PRUint32 OLStackSize = 100; nsresult NS_NewPlainTextSerializer(nsIContentSerializer** aSerializer) { nsPlainTextSerializer* it = new nsPlainTextSerializer(); if (!it) { return NS_ERROR_OUT_OF_MEMORY; } return CallQueryInterface(it, aSerializer); } nsPlainTextSerializer::nsPlainTextSerializer() : kSpace(NS_LITERAL_STRING(" ")) // Init of "constant" { mOutputString = nsnull; mInHead = PR_FALSE; mAtFirstColumn = PR_TRUE; mIndent = 0; mCiteQuoteLevel = 0; mStructs = PR_TRUE; // will be read from prefs later mHeaderStrategy = 1 /*indent increasingly*/; // ditto mQuotesPreformatted = PR_FALSE; // ditto mDontWrapAnyQuotes = PR_FALSE; // ditto mHasWrittenCiteBlockquote = PR_FALSE; mSpanLevel = 0; for (PRInt32 i = 0; i <= 6; i++) { mHeaderCounter[i] = 0; } // Line breaker mWrapColumn = 72; // XXX magic number, we expect someone to reset this mCurrentLineWidth = 0; // Flow mEmptyLines = 1; // The start of the document is an "empty line" in itself, mInWhitespace = PR_TRUE; mPreFormatted = PR_FALSE; mStartedOutput = PR_FALSE; // initialize the tag stack to zero: mTagStack = new nsHTMLTag[TagStackSize]; mTagStackIndex = 0; mIgnoreAboveIndex = (PRUint32)kNotFound; // initialize the OL stack, where numbers for ordered lists are kept: mOLStack = new PRInt32[OLStackSize]; mOLStackIndex = 0; mULCount = 0; } nsPlainTextSerializer::~nsPlainTextSerializer() { delete[] mTagStack; delete[] mOLStack; } NS_IMPL_ISUPPORTS4(nsPlainTextSerializer, nsIContentSerializer, nsIContentSink, nsIHTMLContentSink, nsIHTMLToTextSink) NS_IMETHODIMP nsPlainTextSerializer::Init(PRUint32 aFlags, PRUint32 aWrapColumn, const char* aCharSet, PRBool aIsCopying) { #ifdef DEBUG // Check if the major control flags are set correctly. if(aFlags & nsIDocumentEncoder::OutputFormatFlowed) { NS_ASSERTION(aFlags & nsIDocumentEncoder::OutputFormatted, "If you want format=flowed, you must combine it with " "nsIDocumentEncoder::OutputFormatted"); } if(aFlags & nsIDocumentEncoder::OutputFormatted) { NS_ASSERTION(!(aFlags & nsIDocumentEncoder::OutputPreformatted), "Can't do formatted and preformatted output at the same time!"); } #endif NS_ENSURE_TRUE(nsContentUtils::GetParserServiceWeakRef(), NS_ERROR_UNEXPECTED); nsresult rv; mFlags = aFlags; mWrapColumn = aWrapColumn; // Only create a linebreaker if we will handle wrapping. if (MayWrap()) { nsCOMPtr lf(do_GetService(kLWBrkCID, &rv)); if (NS_SUCCEEDED(rv)) { nsAutoString lbarg; rv = lf->GetBreaker(lbarg, getter_AddRefs(mLineBreaker)); if (NS_FAILED(rv)) return NS_ERROR_FAILURE; } } // Set the line break character: if ((mFlags & nsIDocumentEncoder::OutputCRLineBreak) && (mFlags & nsIDocumentEncoder::OutputLFLineBreak)) { // Windows mLineBreak.AssignLiteral("\r\n"); } else if (mFlags & nsIDocumentEncoder::OutputCRLineBreak) { // Mac mLineBreak.Assign(PRUnichar('\r')); } else if (mFlags & nsIDocumentEncoder::OutputLFLineBreak) { // Unix/DOM mLineBreak.Assign(PRUnichar('\n')); } else { // Platform/default mLineBreak.AssignLiteral(NS_LINEBREAK); } mLineBreakDue = PR_FALSE; mFloatingLines = -1; if (mFlags & nsIDocumentEncoder::OutputFormatted) { // Get some prefs that controls how we do formatted output mStructs = nsContentUtils::GetBoolPref(PREF_STRUCTS, mStructs); mHeaderStrategy = nsContentUtils::GetIntPref(PREF_HEADER_STRATEGY, mHeaderStrategy); // The quotesPreformatted pref is a temporary measure. See bug 69638. mQuotesPreformatted = nsContentUtils::GetBoolPref("editor.quotesPreformatted", mQuotesPreformatted); // DontWrapAnyQuotes is set according to whether plaintext mail // is wrapping to window width -- see bug 134439. // We'll only want this if we're wrapping and formatted. if (mFlags & nsIDocumentEncoder::OutputWrap || mWrapColumn > 0) { mDontWrapAnyQuotes = nsContentUtils::GetBoolPref("mail.compose.wrap_to_window_width", mDontWrapAnyQuotes); } } // XXX We should let the caller pass this in. if (nsContentUtils::GetBoolPref("browser.frames.enabled")) { mFlags &= ~nsIDocumentEncoder::OutputNoFramesContent; } else { mFlags |= nsIDocumentEncoder::OutputNoFramesContent; } return NS_OK; } PRBool nsPlainTextSerializer::GetLastBool(const nsVoidArray& aStack) { PRUint32 size = aStack.Count(); if (size == 0) { return PR_FALSE; } return (aStack.ElementAt(size-1) != NS_REINTERPRET_CAST(void*, PR_FALSE)); } void nsPlainTextSerializer::SetLastBool(nsVoidArray& aStack, PRBool aValue) { PRUint32 size = aStack.Count(); if (size > 0) { aStack.ReplaceElementAt(NS_REINTERPRET_CAST(void*, aValue), size-1); } else { NS_ERROR("There is no \"Last\" value"); } } void nsPlainTextSerializer::PushBool(nsVoidArray& aStack, PRBool aValue) { aStack.AppendElement(NS_REINTERPRET_CAST(void*, aValue)); } PRBool nsPlainTextSerializer::PopBool(nsVoidArray& aStack) { PRBool returnValue = PR_FALSE; PRUint32 size = aStack.Count(); if (size > 0) { returnValue = (aStack.ElementAt(size-1) != NS_REINTERPRET_CAST(void*, PR_FALSE)); aStack.RemoveElementAt(size-1); } return returnValue; } NS_IMETHODIMP nsPlainTextSerializer::Initialize(nsAString* aOutString, PRUint32 aFlags, PRUint32 aWrapCol) { nsresult rv = Init(aFlags, aWrapCol, nsnull, PR_FALSE); NS_ENSURE_SUCCESS(rv, rv); // XXX This is wrong. It violates XPCOM string ownership rules. // We're only getting away with this because instances of this // class are restricted to single function scope. mOutputString = aOutString; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::AppendText(nsIDOMText* aText, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAString& aStr) { if (mIgnoreAboveIndex != (PRUint32)kNotFound) { return NS_OK; } NS_ASSERTION(aStartOffset >= 0, "Negative start offset for text fragment!"); if ( aStartOffset < 0 ) return NS_ERROR_INVALID_ARG; NS_ENSURE_ARG(aText); nsresult rv = NS_OK; PRInt32 length = 0; nsAutoString textstr; nsCOMPtr content = do_QueryInterface(aText); if (!content) return NS_ERROR_FAILURE; const nsTextFragment* frag = content->Text(); if (frag) { PRInt32 endoffset = (aEndOffset == -1) ? frag->GetLength() : aEndOffset; NS_ASSERTION(aStartOffset <= endoffset, "A start offset is beyond the end of the text fragment!"); length = endoffset - aStartOffset; if (length <= 0) { return NS_OK; } if (frag->Is2b()) { textstr.Assign(frag->Get2b() + aStartOffset, length); } else { textstr.AssignWithConversion(frag->Get1b()+aStartOffset, length); } } mOutputString = &aStr; // We have to split the string across newlines // to match parser behavior PRInt32 start = 0; PRInt32 offset = textstr.FindCharInSet("\n\r"); while (offset != kNotFound) { if(offset>start) { // Pass in the line rv = DoAddLeaf(nsnull, eHTMLTag_text, Substring(textstr, start, offset-start)); if (NS_FAILED(rv)) break; } // Pass in a newline rv = DoAddLeaf(nsnull, eHTMLTag_newline, mLineBreak); if (NS_FAILED(rv)) break; start = offset+1; offset = textstr.FindCharInSet("\n\r", start); } // Consume the last bit of the string if there's any left if (NS_SUCCEEDED(rv) && start < length) { if (start) { rv = DoAddLeaf(nsnull, eHTMLTag_text, Substring(textstr, start, length-start)); } else { rv = DoAddLeaf(nsnull, eHTMLTag_text, textstr); } } mOutputString = nsnull; return rv; } NS_IMETHODIMP nsPlainTextSerializer::AppendCDATASection(nsIDOMCDATASection* aCDATASection, PRInt32 aStartOffset, PRInt32 aEndOffset, nsAString& aStr) { return AppendText(aCDATASection, aStartOffset, aEndOffset, aStr); } NS_IMETHODIMP nsPlainTextSerializer::AppendElementStart(nsIDOMElement *aElement, PRBool aHasChildren, nsAString& aStr) { NS_ENSURE_ARG(aElement); mContent = do_QueryInterface(aElement); if (!mContent) return NS_ERROR_FAILURE; nsresult rv; PRInt32 id = GetIdForContent(mContent); PRBool isContainer = IsContainer(id); mOutputString = &aStr; if (isContainer) { rv = DoOpenContainer(nsnull, id); } else { nsAutoString empty; rv = DoAddLeaf(nsnull, id, empty); } mContent = 0; mOutputString = nsnull; if (!mInHead && id == eHTMLTag_head) mInHead = PR_TRUE; return rv; } NS_IMETHODIMP nsPlainTextSerializer::AppendElementEnd(nsIDOMElement *aElement, nsAString& aStr) { NS_ENSURE_ARG(aElement); mContent = do_QueryInterface(aElement); if (!mContent) return NS_ERROR_FAILURE; nsresult rv; PRInt32 id = GetIdForContent(mContent); PRBool isContainer = IsContainer(id); mOutputString = &aStr; rv = NS_OK; if (isContainer) { rv = DoCloseContainer(id); } mContent = 0; mOutputString = nsnull; if (mInHead && id == eHTMLTag_head) mInHead = PR_FALSE; return rv; } NS_IMETHODIMP nsPlainTextSerializer::Flush(nsAString& aStr) { mOutputString = &aStr; FlushLine(); mOutputString = nsnull; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::AppendDocumentStart(nsIDOMDocument *aDocument, nsAString& aStr) { return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::OpenContainer(const nsIParserNode& aNode) { PRInt32 type = aNode.GetNodeType(); return DoOpenContainer(&aNode, type); } NS_IMETHODIMP nsPlainTextSerializer::CloseContainer(const nsHTMLTag aTag) { return DoCloseContainer(aTag); } NS_IMETHODIMP nsPlainTextSerializer::AddHeadContent(const nsIParserNode& aNode) { if (eHTMLTag_title == aNode.GetNodeType()) { // XXX collect the skipped content return NS_OK; } OpenHead(aNode); nsresult rv = AddLeaf(aNode); CloseHead(); return rv; } NS_IMETHODIMP nsPlainTextSerializer::AddLeaf(const nsIParserNode& aNode) { if (mIgnoreAboveIndex != (PRUint32)kNotFound) { return NS_OK; } eHTMLTags type = (eHTMLTags)aNode.GetNodeType(); const nsAString& text = aNode.GetText(); if ((type == eHTMLTag_text) || (type == eHTMLTag_whitespace) || (type == eHTMLTag_newline)) { // Copy the text out, stripping out CRs nsAutoString str; PRUint32 length; str.SetCapacity(text.Length()); nsReadingIterator srcStart, srcEnd; length = nsContentUtils::CopyNewlineNormalizedUnicodeTo(text.BeginReading(srcStart), text.EndReading(srcEnd), str); str.SetLength(length); return DoAddLeaf(&aNode, type, str); } else { return DoAddLeaf(&aNode, type, text); } } NS_IMETHODIMP nsPlainTextSerializer::OpenHTML(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseHTML() { return CloseContainer(eHTMLTag_html); } NS_IMETHODIMP nsPlainTextSerializer::OpenHead(const nsIParserNode& aNode) { mInHead = PR_TRUE; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::CloseHead() { mInHead = PR_FALSE; return NS_OK; } NS_IMETHODIMP nsPlainTextSerializer::OpenBody(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseBody() { return CloseContainer(eHTMLTag_body); } NS_IMETHODIMP nsPlainTextSerializer::OpenForm(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseForm() { return CloseContainer(eHTMLTag_form); } NS_IMETHODIMP nsPlainTextSerializer::OpenMap(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseMap() { return CloseContainer(eHTMLTag_map); } NS_IMETHODIMP nsPlainTextSerializer::OpenFrameset(const nsIParserNode& aNode) { return OpenContainer(aNode); } NS_IMETHODIMP nsPlainTextSerializer::CloseFrameset() { return CloseContainer(eHTMLTag_frameset); } NS_IMETHODIMP nsPlainTextSerializer::IsEnabled(PRInt32 aTag, PRBool* aReturn) { nsHTMLTag theHTMLTag = nsHTMLTag(aTag); if (theHTMLTag == eHTMLTag_script) { *aReturn = !(mFlags & nsIDocumentEncoder::OutputNoScriptContent); } else if (theHTMLTag == eHTMLTag_frameset) { *aReturn = !(mFlags & nsIDocumentEncoder::OutputNoFramesContent); } else { *aReturn = PR_FALSE; } return NS_OK; } /** * aNode may be null when we're working with the DOM, but then mContent is * useable instead. */ nsresult nsPlainTextSerializer::DoOpenContainer(const nsIParserNode* aNode, PRInt32 aTag) { if (mFlags & nsIDocumentEncoder::OutputRaw) { // Raw means raw. Don't even think about doing anything fancy // here like indenting, adding line breaks or any other // characters such as list item bullets, quote characters // around , etc. I mean it! Don't make me smack you! return NS_OK; } eHTMLTags type = (eHTMLTags)aTag; if (mTagStackIndex < TagStackSize) { mTagStack[mTagStackIndex++] = type; } if (mIgnoreAboveIndex != (PRUint32)kNotFound) { return NS_OK; } // Reset this so that

doesn't affect the whitespace // above random

s below it.
  mHasWrittenCiteBlockquote = mHasWrittenCiteBlockquote && aTag == eHTMLTag_pre;

  PRBool isInCiteBlockquote = PR_FALSE;

  // XXX special-case  so that we don't add additional
  // newlines before the text.
  if (aTag == eHTMLTag_blockquote) {
    nsAutoString value;
    nsresult rv = GetAttributeValue(aNode, nsHTMLAtoms::type, value);
    isInCiteBlockquote = NS_SUCCEEDED(rv) && value.EqualsIgnoreCase("cite");
  }

  if (mLineBreakDue && !isInCiteBlockquote)
    EnsureVerticalSpace(mFloatingLines);

  // Check if this tag's content that should not be output
  if ((type == eHTMLTag_noscript &&
       !(mFlags & nsIDocumentEncoder::OutputNoScriptContent)) ||
      ((type == eHTMLTag_iframe || type == eHTMLTag_noframes) &&
       !(mFlags & nsIDocumentEncoder::OutputNoFramesContent))) {
    // Ignore everything that follows the current tag in 
    // question until a matching end tag is encountered.
    mIgnoreAboveIndex = mTagStackIndex - 1;
    return NS_OK;
  }

  if (type == eHTMLTag_body) {
    // Try to figure out here whether we have a
    // preformatted style attribute.
    //
    // Trigger on the presence of a "-moz-pre-wrap" in the
    // style attribute. That's a very simplistic way to do
    // it, but better than nothing.
    // Also set mWrapColumn to the value given there
    // (which arguably we should only do if told to do so).
    nsAutoString style;
    PRInt32 whitespace;
    if(NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::style, style)) &&
       (kNotFound != (whitespace = style.Find("white-space:")))) {

      if (kNotFound != style.Find("pre-wrap", PR_TRUE, whitespace)) {
#ifdef DEBUG_preformatted
        printf("Set mPreFormatted based on style moz-pre-wrap\n");
#endif
        mPreFormatted = PR_TRUE;
        PRInt32 widthOffset = style.Find("width:");
        if (widthOffset >= 0) {
          // We have to search for the ch before the semicolon,
          // not for the semicolon itself, because nsString::ToInteger()
          // considers 'c' to be a valid numeric char (even if radix=10)
          // but then gets confused if it sees it next to the number
          // when the radix specified was 10, and returns an error code.
          PRInt32 semiOffset = style.Find("ch", widthOffset+6);
          PRInt32 length = (semiOffset > 0 ? semiOffset - widthOffset - 6
                            : style.Length() - widthOffset);
          nsAutoString widthstr;
          style.Mid(widthstr, widthOffset+6, length);
          PRInt32 err;
          PRInt32 col = widthstr.ToInteger(&err);

          if (NS_SUCCEEDED(err)) {
            mWrapColumn = (PRUint32)col;
#ifdef DEBUG_preformatted
            printf("Set wrap column to %d based on style\n", mWrapColumn);
#endif
          }
        }
      }
      else if (kNotFound != style.Find("pre", PR_TRUE, whitespace)) {
#ifdef DEBUG_preformatted
        printf("Set mPreFormatted based on style pre\n");
#endif
        mPreFormatted = PR_TRUE;
        mWrapColumn = 0;
      }
    } 
    else {
      mPreFormatted = PR_FALSE;
    }

    return NS_OK;
  }

  if (!DoOutput()) {
    return NS_OK;
  }

  if (type == eHTMLTag_p)
    EnsureVerticalSpace(1);
  else if (type == eHTMLTag_pre) {
    if (GetLastBool(mIsInCiteBlockquote))
      EnsureVerticalSpace(0);
    else if (mHasWrittenCiteBlockquote) {
      EnsureVerticalSpace(0);
      mHasWrittenCiteBlockquote = PR_FALSE;
    }
    else
      EnsureVerticalSpace(1);
  }
  else if (type == eHTMLTag_tr) {
    PushBool(mHasWrittenCellsForRow, PR_FALSE);
  }
  else if (type == eHTMLTag_td || type == eHTMLTag_th) {
    // We must make sure that the content of two table cells get a
    // space between them.

    // To make the separation between cells most obvious and
    // importable, we use a TAB.
    if (GetLastBool(mHasWrittenCellsForRow)) {
      // Bypass |Write| so that the TAB isn't compressed away.
      AddToLine(NS_LITERAL_STRING("\t").get(), 1);
      mInWhitespace = PR_TRUE;
    }
    else if (mHasWrittenCellsForRow.Count() == 0) {
      // We don't always see a  (nor a ) before the  if we're
      // copying part of a table
      PushBool(mHasWrittenCellsForRow, PR_TRUE); // will never be popped
    }
    else {
      SetLastBool(mHasWrittenCellsForRow, PR_TRUE);
    }
  }
  else if (type == eHTMLTag_ul) {
    // Indent here to support nested lists, which aren't included in li :-(
    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
         // Must end the current line before we change indention
    mIndent += kIndentSizeList;
    mULCount++;
  }
  else if (type == eHTMLTag_ol) {
    EnsureVerticalSpace(mULCount + mOLStackIndex == 0 ? 1 : 0);
    // Must end the current line before we change indention
    if (mOLStackIndex < OLStackSize) {
      nsAutoString startAttr;
      PRInt32 startVal = 1;
      if(NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::start, startAttr))){
        PRInt32 rv = 0;
        startVal = startAttr.ToInteger(&rv);
        if (NS_FAILED(rv))
          startVal = 1;
      }
      mOLStack[mOLStackIndex++] = startVal;
    }
    mIndent += kIndentSizeList;  // see ul
  }
  else if (type == eHTMLTag_li) {
    if (mTagStackIndex > 1 && IsInOL()) {
      if (mOLStackIndex > 0) {
        nsAutoString valueAttr;
        if(NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::value, valueAttr))){
          PRInt32 rv = 0;
          PRInt32 valueAttrVal = valueAttr.ToInteger(&rv);
          if (NS_SUCCEEDED(rv))
            mOLStack[mOLStackIndex-1] = valueAttrVal;
        }
        // This is what nsBulletFrame does for OLs:
        mInIndentString.AppendInt(mOLStack[mOLStackIndex-1]++, 10);
      }
      else {
        mInIndentString.Append(PRUnichar('#'));
      }

      mInIndentString.Append(PRUnichar('.'));

    }
    else {
      static char bulletCharArray[] = "*o+#";
      PRUint32 index = mULCount > 0 ? (mULCount - 1) : 3;
      char bulletChar = bulletCharArray[index % 4];
      mInIndentString.Append(PRUnichar(bulletChar));
    }
    
    mInIndentString.Append(PRUnichar(' '));
  }
  else if (type == eHTMLTag_dl) {
    EnsureVerticalSpace(1);
  }
  else if (type == eHTMLTag_dt) {
    EnsureVerticalSpace(0);
  }
  else if (type == eHTMLTag_dd) {
    EnsureVerticalSpace(0);
    mIndent += kIndentSizeDD;
  }
  else if (type == eHTMLTag_span) {
    ++mSpanLevel;
  }
  else if (type == eHTMLTag_blockquote) {
    // Push
    PushBool(mIsInCiteBlockquote, isInCiteBlockquote);
    if (isInCiteBlockquote) {
      EnsureVerticalSpace(0);
      mCiteQuoteLevel++;
    }
    else {
      EnsureVerticalSpace(1);
      mIndent += kTabSize; // Check for some maximum value?
    }
  }

  // Else make sure we'll separate block level tags,
  // even if we're about to leave, before doing any other formatting.
  else if (IsBlockLevel(aTag)) {
    EnsureVerticalSpace(0);
  }

  //////////////////////////////////////////////////////////////
  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
    return NS_OK;
  }
  //////////////////////////////////////////////////////////////
  // The rest of this routine is formatted output stuff,
  // which we should skip if we're not formatted:
  //////////////////////////////////////////////////////////////

  // Push on stack
  PRBool currentNodeIsConverted = IsCurrentNodeConverted(aNode);
  PushBool(mCurrentNodeIsConverted, currentNodeIsConverted);

  if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
      type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
      type == eHTMLTag_h5 || type == eHTMLTag_h6)
  {
    EnsureVerticalSpace(2);
    if (mHeaderStrategy == 2) {  // numbered
      mIndent += kIndentSizeHeaders;
      // Caching
      PRInt32 level = HeaderLevel(type);
      // Increase counter for current level
      mHeaderCounter[level]++;
      // Reset all lower levels
      PRInt32 i;

      for (i = level + 1; i <= 6; i++) {
        mHeaderCounter[i] = 0;
      }

      // Construct numbers
      nsAutoString leadup;
      for (i = 1; i <= level; i++) {
        leadup.AppendInt(mHeaderCounter[i]);
        leadup.Append(PRUnichar('.'));
      }
      leadup.Append(PRUnichar(' '));
      Write(leadup);
    }
    else if (mHeaderStrategy == 1) { // indent increasingly
      mIndent += kIndentSizeHeaders;
      for (PRInt32 i = HeaderLevel(type); i > 1; i--) {
           // for h(x), run x-1 times
        mIndent += kIndentIncrementHeaders;
      }
    }
  }
  else if (type == eHTMLTag_a && !currentNodeIsConverted) {
    nsAutoString url;
    if (NS_SUCCEEDED(GetAttributeValue(aNode, nsHTMLAtoms::href, url))
        && !url.IsEmpty()) {
      mURL = url;
    }
  }
  else if (type == eHTMLTag_q) {
    Write(NS_LITERAL_STRING("\""));
  }
  else if (type == eHTMLTag_sup && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("^"));
  }
  else if (type == eHTMLTag_sub && mStructs && !currentNodeIsConverted) { 
    Write(NS_LITERAL_STRING("_"));
  }
  else if (type == eHTMLTag_code && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("|"));
  }
  else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
           && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("*"));
  }
  else if ((type == eHTMLTag_em || type == eHTMLTag_i)
           && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("/"));
  }
  else if (type == eHTMLTag_u && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("_"));
  }

  return NS_OK;
}

nsresult
nsPlainTextSerializer::DoCloseContainer(PRInt32 aTag)
{
  if (mFlags & nsIDocumentEncoder::OutputRaw) {
    // Raw means raw.  Don't even think about doing anything fancy
    // here like indenting, adding line breaks or any other
    // characters such as list item bullets, quote characters
    // around , etc.  I mean it!  Don't make me smack you!

    return NS_OK;
  }

  if (mTagStackIndex > 0) {
    --mTagStackIndex;
  }

  if (mTagStackIndex >= mIgnoreAboveIndex) {
    if (mTagStackIndex == mIgnoreAboveIndex) {
      // We're dealing with the close tag whose matching
      // open tag had set the mIgnoreAboveIndex value.
      // Reset mIgnoreAboveIndex before discarding this tag.
      mIgnoreAboveIndex = (PRUint32)kNotFound;
    }
    return NS_OK;
  }

  eHTMLTags type = (eHTMLTags)aTag;
  // End current line if we're ending a block level tag
  if((type == eHTMLTag_body) || (type == eHTMLTag_html)) {
    // We want the output to end with a new line,
    // but in preformatted areas like text fields,
    // we can't emit newlines that weren't there.
    // So add the newline only in the case of formatted output.
    if (mFlags & nsIDocumentEncoder::OutputFormatted) {
      EnsureVerticalSpace(0);
    }
    else {
      FlushLine();
    }
    // We won't want to do anything with these in formatted mode either,
    // so just return now:
    return NS_OK;
  }
  else if (type == eHTMLTag_tr) {
    PopBool(mHasWrittenCellsForRow);
    // Should always end a line, but get no more whitespace
    if (mFloatingLines < 0)
      mFloatingLines = 0;
    mLineBreakDue = PR_TRUE;
  } 
  else if ((type == eHTMLTag_li) ||
           (type == eHTMLTag_dt)) {
    // Items that should always end a line, but get no more whitespace
    if (mFloatingLines < 0)
      mFloatingLines = 0;
    mLineBreakDue = PR_TRUE;
  } 
  else if (type == eHTMLTag_pre) {
    mFloatingLines = GetLastBool(mIsInCiteBlockquote) ? 0 : 1;
    mLineBreakDue = PR_TRUE;
  }
  else if (type == eHTMLTag_ul) {
    FlushLine();
    mIndent -= kIndentSizeList;
    if (--mULCount + mOLStackIndex == 0) {
      mFloatingLines = 1;
      mLineBreakDue = PR_TRUE;
    }
  }
  else if (type == eHTMLTag_ol) {
    FlushLine(); // Doing this after decreasing OLStackIndex would be wrong.
    mIndent -= kIndentSizeList;
    mOLStackIndex--;
    if (mULCount + mOLStackIndex == 0) {
      mFloatingLines = 1;
      mLineBreakDue = PR_TRUE;
    }
  }  
  else if (type == eHTMLTag_dl) {
    mFloatingLines = 1;
    mLineBreakDue = PR_TRUE;
  }
  else if (type == eHTMLTag_dd) {
    FlushLine();
    mIndent -= kIndentSizeDD;
  }
  else if (type == eHTMLTag_span) {
    --mSpanLevel;
  }
  else if (type == eHTMLTag_div) {
    if (mFloatingLines < 0)
      mFloatingLines = 0;
    mLineBreakDue = PR_TRUE;
  }
  else if (type == eHTMLTag_blockquote) {
    FlushLine();    // Is this needed?

    // Pop
    PRBool isInCiteBlockquote = PopBool(mIsInCiteBlockquote);

    if (isInCiteBlockquote) {
      mCiteQuoteLevel--;
      mFloatingLines = 0;
      mHasWrittenCiteBlockquote = PR_TRUE;
    }
    else {
      mIndent -= kTabSize;
      mFloatingLines = 1;
    }
    mLineBreakDue = PR_TRUE;
  }
  else if (IsBlockLevel(aTag)
           && type != eHTMLTag_script
           && type != eHTMLTag_doctypeDecl
           && type != eHTMLTag_markupDecl) {
    // All other blocks get 1 vertical space after them
    // in formatted mode, otherwise 0.
    // This is hard. Sometimes 0 is a better number, but
    // how to know?
    if (mFlags & nsIDocumentEncoder::OutputFormatted)
      EnsureVerticalSpace(1);
    else {
      if (mFloatingLines < 0)
        mFloatingLines = 0;
      mLineBreakDue = PR_TRUE;
    }
  }

  //////////////////////////////////////////////////////////////
  if (!(mFlags & nsIDocumentEncoder::OutputFormatted)) {
    return NS_OK;
  }
  //////////////////////////////////////////////////////////////
  // The rest of this routine is formatted output stuff,
  // which we should skip if we're not formatted:
  //////////////////////////////////////////////////////////////

  // Pop the currentConverted stack
  PRBool currentNodeIsConverted = PopBool(mCurrentNodeIsConverted);
  
  if (type == eHTMLTag_h1 || type == eHTMLTag_h2 ||
      type == eHTMLTag_h3 || type == eHTMLTag_h4 ||
      type == eHTMLTag_h5 || type == eHTMLTag_h6) {
    
    if (mHeaderStrategy) {  /*numbered or indent increasingly*/ 
      mIndent -= kIndentSizeHeaders;
    }
    if (mHeaderStrategy == 1 /*indent increasingly*/ ) {
      for (PRInt32 i = HeaderLevel(type); i > 1; i--) {
           // for h(x), run x-1 times
        mIndent -= kIndentIncrementHeaders;
      }
    }
    EnsureVerticalSpace(1);
  }
  else if (type == eHTMLTag_a && !currentNodeIsConverted && !mURL.IsEmpty()) {
    nsAutoString temp; 
    temp.AssignLiteral(" <");
    temp += mURL;
    temp.Append(PRUnichar('>'));
    Write(temp);
    mURL.Truncate();
  }
  else if (type == eHTMLTag_q) {
    Write(NS_LITERAL_STRING("\""));
  }
  else if ((type == eHTMLTag_sup || type == eHTMLTag_sub) 
           && mStructs && !currentNodeIsConverted) {
    Write(kSpace);
  }
  else if (type == eHTMLTag_code && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("|"));
  }
  else if ((type == eHTMLTag_strong || type == eHTMLTag_b)
           && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("*"));
  }
  else if ((type == eHTMLTag_em || type == eHTMLTag_i)
           && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("/"));
  }
  else if (type == eHTMLTag_u && mStructs && !currentNodeIsConverted) {
    Write(NS_LITERAL_STRING("_"));
  }

  return NS_OK;
}

/**
 * aNode may be null when we're working with the DOM, but then mContent is
 * useable instead.
 */
nsresult
nsPlainTextSerializer::DoAddLeaf(const nsIParserNode *aNode, PRInt32 aTag, 
                                 const nsAString& aText)
{
  // If we don't want any output, just return
  if (!DoOutput()) {
    return NS_OK;
  }

  if (aTag != eHTMLTag_whitespace && aTag != eHTMLTag_newline) {
    // Make sure to reset this, since it's no longer true.
    mHasWrittenCiteBlockquote = PR_FALSE;
  }
  
  if (mLineBreakDue)
    EnsureVerticalSpace(mFloatingLines);

  eHTMLTags type = (eHTMLTags)aTag;
  
  if ((mTagStackIndex > 1 &&
       mTagStack[mTagStackIndex-2] == eHTMLTag_select) ||
      (mTagStackIndex > 0 &&
        mTagStack[mTagStackIndex-1] == eHTMLTag_select)) {
    // Don't output the contents of SELECT elements;
    // Might be nice, eventually, to output just the selected element.
    // Read more in bug 31994.
    return NS_OK;
  }
  else if (mTagStackIndex > 0 && mTagStack[mTagStackIndex-1] == eHTMLTag_script) {
    // Don't output the contents of