/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- * * ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Mozilla Communicator client code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1998 * the Initial Developer. All Rights Reserved. * * Contributor(s): * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** * This Original Code has been modified by IBM Corporation. Modifications made by IBM * described herein are Copyright (c) International Business Machines Corporation, 2000. * Modifications to Mozilla code or documentation identified per MPL Section 3.3 * * Date Modified by Description of modification * 04/20/2000 IBM Corp. OS/2 VisualAge build. */ #include "nscore.h" #include "nsCOMPtr.h" #include "nsWebCrawler.h" #include "nsViewerApp.h" #include "nsIContentViewer.h" #include "nsIDocumentViewer.h" #include "nsIDocument.h" #include "nsIContent.h" #include "nsIPresShell.h" #include "nsPresContext.h" #include "nsIViewManager.h" #include "nsIFrame.h" #include "nsIFrameDebug.h" #include "nsIURL.h" #include "nsNetUtil.h" #include "nsITimer.h" #include "nsIAtom.h" #include "nsIFrameUtil.h" #include "nsIComponentManager.h" #include "nsLayoutCID.h" #include "nsRect.h" #include "plhash.h" #include "nsINameSpaceManager.h" #include "nsXPIDLString.h" #include "nsReadableUtils.h" #include "nsIServiceManager.h" #include "nsIEventQueueService.h" #include "nsIEventQueue.h" #include "prprf.h" #include "nsIContentViewer.h" #include "nsIContentViewerFile.h" #include "nsIDocShell.h" #include "nsIWebNavigation.h" #include "nsIWebProgress.h" static NS_DEFINE_IID(kFrameUtilCID, NS_FRAME_UTIL_CID); static PLHashNumber HashKey(nsIAtom* key) { return NS_PTR_TO_INT32(key); } static PRIntn CompareKeys(nsIAtom* key1, nsIAtom* key2) { return key1 == key2; } class AtomHashTable { public: AtomHashTable(); ~AtomHashTable(); const void* Get(nsIAtom* aKey); const void* Put(nsIAtom* aKey, const void* aValue); const void* Remove(nsIAtom* aKey); protected: PLHashTable* mTable; }; AtomHashTable::AtomHashTable() { mTable = PL_NewHashTable(8, (PLHashFunction) HashKey, (PLHashComparator) CompareKeys, (PLHashComparator) nsnull, nsnull, nsnull); } static PRIntn PR_CALLBACK DestroyEntry(PLHashEntry *he, PRIntn i, void *arg) { ((nsIAtom*)he->key)->Release(); return HT_ENUMERATE_NEXT; } AtomHashTable::~AtomHashTable() { PL_HashTableEnumerateEntries(mTable, DestroyEntry, 0); PL_HashTableDestroy(mTable); } /** * Get the data associated with a Atom. */ const void* AtomHashTable::Get(nsIAtom* aKey) { PRInt32 hashCode = NS_PTR_TO_INT32(aKey); PLHashEntry** hep = PL_HashTableRawLookup(mTable, hashCode, aKey); PLHashEntry* he = *hep; if (nsnull != he) { return he->value; } return nsnull; } /** * Create an association between a Atom and some data. This call * returns an old association if there was one (or nsnull if there * wasn't). */ const void* AtomHashTable::Put(nsIAtom* aKey, const void* aData) { PRInt32 hashCode = NS_PTR_TO_INT32(aKey); PLHashEntry** hep = PL_HashTableRawLookup(mTable, hashCode, aKey); PLHashEntry* he = *hep; if (nsnull != he) { const void* oldValue = he->value; he->value = NS_CONST_CAST(void*, aData); return oldValue; } NS_ADDREF(aKey); PL_HashTableRawAdd(mTable, hep, hashCode, aKey, NS_CONST_CAST(void*, aData)); return nsnull; } /** * Remove an association between a Atom and it's data. This returns * the old associated data. */ const void* AtomHashTable::Remove(nsIAtom* aKey) { PRInt32 hashCode = NS_PTR_TO_INT32(aKey); PLHashEntry** hep = PL_HashTableRawLookup(mTable, hashCode, aKey); PLHashEntry* he = *hep; void* oldValue = nsnull; if (nsnull != he) { oldValue = he->value; PL_HashTableRawRemove(mTable, hep, he); } return oldValue; } //---------------------------------------------------------------------- nsWebCrawler::nsWebCrawler(nsViewerApp* aViewer) : mHaveURLList(PR_FALSE), mQueuedLoadURLs(0) { mBrowser = nsnull; mViewer = aViewer; mCrawl = PR_FALSE; mJiggleLayout = PR_FALSE; mPostExit = PR_FALSE; mDelay = 200 /*msec*/; // XXXwaterson straigt outta my arse mMaxPages = -1; mRecord = nsnull; mLinkTag = do_GetAtom("a"); mFrameTag = do_GetAtom("frame"); mIFrameTag = do_GetAtom("iframe"); mHrefAttr = do_GetAtom("href"); mSrcAttr = do_GetAtom("src"); mBaseHrefAttr = do_GetAtom("_base_href"); mVisited = new AtomHashTable(); mVerbose = nsnull; LL_I2L(mStartLoad, 0); mRegressing = PR_FALSE; mPrinterTestType = 0; mRegressionOutputLevel = 0; // full output mIncludeStyleInfo = PR_TRUE; } static void FreeStrings(nsVoidArray& aArray) { PRInt32 i, n = aArray.Count(); for (i = 0; i < n; i++) { nsString* s = (nsString*) aArray.ElementAt(i); delete s; } aArray.Clear(); } nsWebCrawler::~nsWebCrawler() { FreeStrings(mSafeDomains); FreeStrings(mAvoidDomains); NS_IF_RELEASE(mBrowser); delete mVisited; } NS_IMPL_ISUPPORTS2(nsWebCrawler, nsIWebProgressListener, nsISupportsWeakReference) void nsWebCrawler::DumpRegressionData() { #ifdef NS_DEBUG nsCOMPtr docshell; mBrowser->GetDocShell(*getter_AddRefs(docshell)); if (! docshell) return; if (mOutputDir.Length() > 0) { nsCOMPtr shell = GetPresShell(docshell); if (!shell) return; if ( mPrinterTestType > 0 ) { nsCOMPtr viewer; docshell->GetContentViewer(getter_AddRefs(viewer)); if (viewer){ nsCOMPtr viewerFile = do_QueryInterface(viewer); if (viewerFile) { nsAutoString regressionFileName; FILE *fp = GetOutputFile(mLastURL, regressionFileName); switch (mPrinterTestType) { case 1: // dump print data to a file for regression testing viewerFile->Print(PR_TRUE, fp, nsnull); break; case 2: // visual printing tests, all go to the printer, no printer dialog viewerFile->Print(PR_TRUE, nsnull, nsnull); break; case 3: // visual printing tests, all go to the printer, with a printer dialog viewerFile->Print(PR_FALSE, nsnull, nsnull); break; default: break; } fclose(fp); if( mPrinterTestType == 1) { if (mRegressing) { PerformRegressionTest(regressionFileName); } else { fputs(NS_LossyConvertUCS2toASCII(regressionFileName).get(), stdout); printf(" - being written\n"); } } } } } else { nsIFrame* root = shell->GetRootFrame(); if (nsnull != root) { nsPresContext *presContext = shell->GetPresContext(); if (mOutputDir.Length() > 0) { nsAutoString regressionFileName; FILE *fp = GetOutputFile(mLastURL, regressionFileName); if (fp) { nsIFrameDebug* fdbg; if (NS_SUCCEEDED(root->QueryInterface(NS_GET_IID(nsIFrameDebug), (void**) &fdbg))) { fdbg->DumpRegressionData(presContext, fp, 0, mIncludeStyleInfo); } fclose(fp); if (mRegressing) { PerformRegressionTest(regressionFileName); } else { fputs(NS_LossyConvertUCS2toASCII(regressionFileName).get(), stdout); printf(" - being written\n"); } } else { nsCAutoString file; (void)mLastURL->GetPath(file); printf("could not open output file for %s\n", file.get()); } } else { nsIFrameDebug* fdbg; if (NS_SUCCEEDED(root->QueryInterface(NS_GET_IID(nsIFrameDebug), (void**) &fdbg))) { fdbg->DumpRegressionData(presContext, stdout, 0, mIncludeStyleInfo); } } } } } #endif } void nsWebCrawler::LoadNextURLCallback(nsITimer *aTimer, void *aClosure) { nsWebCrawler* self = (nsWebCrawler*) aClosure; // if we are doing printing regression tests, check to see // if we can print (a previous job is not printing) if (self->mPrinterTestType > 0) { nsCOMPtr docShell; self->mBrowser->GetDocShell(*getter_AddRefs(docShell)); if (docShell){ nsCOMPtr viewer; docShell->GetContentViewer(getter_AddRefs(viewer)); if (viewer){ nsCOMPtr viewerFile = do_QueryInterface(viewer); if (viewerFile) { PRBool printable; viewerFile->GetPrintable(&printable); if (PR_TRUE !=printable){ self->mTimer = do_CreateInstance("@mozilla.org/timer;1"); self->mTimer->InitWithFuncCallback(LoadNextURLCallback, self, self->mDelay, nsITimer::TYPE_ONE_SHOT); return; } } } } } self->DumpRegressionData(); self->LoadNextURL(PR_FALSE); } void nsWebCrawler::QueueExitCallback(nsITimer *aTimer, void *aClosure) { nsWebCrawler* self = (nsWebCrawler*) aClosure; self->DumpRegressionData(); self->QueueExit(); } // nsIWebProgressListener implementation NS_IMETHODIMP nsWebCrawler::OnStateChange(nsIWebProgress* aWebProgress, nsIRequest* aRequest, PRUint32 progressStateFlags, nsresult aStatus) { // Make sure that we're being notified for _our_ shell, and not some // subshell that's been created e.g. for an IFRAME. nsCOMPtr docShell; mBrowser->GetDocShell(*getter_AddRefs(docShell)); if (docShell) { nsCOMPtr progress = do_GetInterface(docShell); if (aWebProgress != progress) return NS_OK; } // Make sure that we're being notified for the whole document, not a // sub-load. if (! (progressStateFlags & nsIWebProgressListener::STATE_IS_DOCUMENT)) return NS_OK; if (progressStateFlags & nsIWebProgressListener::STATE_START) { // If the document load is starting, remember its URL as the last // URL we've loaded. nsCOMPtr channel(do_QueryInterface(aRequest)); if (! channel) { NS_ERROR("no channel avail"); return NS_ERROR_FAILURE; } nsCOMPtr uri; channel->GetURI(getter_AddRefs(uri)); mLastURL = uri; } //XXXwaterson are these really _not_ mutually exclusive? // else if ((progressStateFlags & nsIWebProgressListener::STATE_STOP) && (aStatus == NS_OK)) { // If the document load is finishing, then wrap up and maybe load // some more URLs. nsresult rv; PRTime endLoadTime = PR_Now(); nsCOMPtr uri; nsCOMPtr channel = do_QueryInterface(aRequest); rv = channel->GetURI(getter_AddRefs(uri)); if (NS_FAILED(rv)) return rv; // Ignore this notification unless its for the current url. That way // we skip over embedded webshell notifications (e.g. frame cells, // iframes, etc.) nsCAutoString spec; uri->GetSpec(spec); PRTime delta, cvt, rounder; LL_I2L(cvt, 1000); LL_I2L(rounder, 499); LL_SUB(delta, endLoadTime, mStartLoad); LL_ADD(delta, delta, rounder); LL_DIV(delta, delta, cvt); printf("+++ %s: done loading (%lld msec)\n", spec.get(), delta); // Make sure the document bits make it to the screen at least once nsCOMPtr shell = GetPresShell(); if (shell) { // Force the presentation shell to update the display shell->FlushPendingNotifications(Flush_Display); if (mJiggleLayout) { nsRect r; mBrowser->GetContentBounds(r); nscoord oldWidth = r.width; while (r.width > 100) { r.width -= 10; mBrowser->SizeWindowTo(r.width, r.height, PR_FALSE, PR_FALSE); } while (r.width < oldWidth) { r.width += 10; mBrowser->SizeWindowTo(r.width, r.height, PR_FALSE, PR_FALSE); } } } if (mCrawl) { FindMoreURLs(); } mTimer = do_CreateInstance("@mozilla.org/timer;1"); if(mPrinterTestType>0){ mDelay = 5000; // printing needs more time to load, so give it plenty } else { mDelay = 200; } if ((0 < mQueuedLoadURLs) || (0 < mPendingURLs.Count())) { mTimer->InitWithFuncCallback(LoadNextURLCallback, this, mDelay, nsITimer::TYPE_ONE_SHOT); } else if (mPostExit) { mTimer->InitWithFuncCallback(QueueExitCallback, this, mDelay, nsITimer::TYPE_ONE_SHOT); } } return NS_OK; } NS_IMETHODIMP nsWebCrawler::OnProgressChange(nsIWebProgress *aWebProgress, nsIRequest *aRequest, PRInt32 aCurSelfProgress, PRInt32 aMaxSelfProgress, PRInt32 aCurTotalProgress, PRInt32 aMaxTotalProgress) { NS_NOTREACHED("notification excluded in AddProgressListener(...)"); return NS_OK; } NS_IMETHODIMP nsWebCrawler::OnLocationChange(nsIWebProgress* aWebProgress, nsIRequest* aRequest, nsIURI *location) { NS_NOTREACHED("notification excluded in AddProgressListener(...)"); return NS_OK; } NS_IMETHODIMP nsWebCrawler::OnStatusChange(nsIWebProgress* aWebProgress, nsIRequest* aRequest, nsresult aStatus, const PRUnichar* aMessage) { NS_NOTREACHED("notification excluded in AddProgressListener(...)"); return NS_OK; } NS_IMETHODIMP nsWebCrawler::OnSecurityChange(nsIWebProgress *aWebProgress, nsIRequest *aRequest, PRUint32 state) { NS_NOTREACHED("notification excluded in AddProgressListener(...)"); return NS_OK; } FILE* nsWebCrawler::GetOutputFile(nsIURI *aURL, nsString& aOutputName) { static const char kDefaultOutputFileName[] = "test.txt"; // the default FILE *result = nsnull; if (nsnull!=aURL) { char *inputFileName; nsCAutoString file; (void)aURL->GetPath(file); NS_ConvertUTF8toUCS2 inputFileFullPath(file); PRInt32 fileNameOffset = inputFileFullPath.RFindChar('/'); if (-1==fileNameOffset) { inputFileName = new char[strlen(kDefaultOutputFileName) + 1]; strcpy (inputFileName, kDefaultOutputFileName); } else { PRInt32 len = inputFileFullPath.Length() - fileNameOffset; inputFileName = new char[len + 1 + 20]; char *c = inputFileName; for (PRInt32 i=fileNameOffset+1; i docShell; mBrowser->GetDocShell(*getter_AddRefs(docShell)); if (docShell) { nsCOMPtr progress(do_GetInterface(docShell)); if (progress) { progress->AddProgressListener(this, nsIWebProgress::NOTIFY_STATE_DOCUMENT); LoadNextURL(PR_FALSE); } } } void nsWebCrawler::EnableCrawler() { mCrawl = PR_TRUE; } static const unsigned char kLowerLookup[256] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47, 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63, 64, 97,98,99,100,101,102,103,104,105,106,107,108,109, 110,111,112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 }; static PRBool EndsWith(const nsString& aDomain, const char* aHost, PRInt32 aHostLen) { PRInt32 slen = aDomain.Length(); if (slen < aHostLen) { return PR_FALSE; } const PRUnichar* uc = aDomain.get(); uc += slen - aHostLen; const PRUnichar* end = uc + aHostLen; while (uc < end) { unsigned char uch = (unsigned char) ((*uc++) & 0xff); unsigned char ch = (unsigned char) ((*aHost++) & 0xff); if (kLowerLookup[uch] != kLowerLookup[ch]) { return PR_FALSE; } } return PR_TRUE; } static PRBool StartsWith(const nsString& s1, const char* s2) { PRInt32 s1len = s1.Length(); PRInt32 s2len = strlen(s2); if (s1len < s2len) { return PR_FALSE; } const PRUnichar* uc = s1.get(); const PRUnichar* end = uc + s2len; while (uc < end) { unsigned char uch = (unsigned char) ((*uc++) & 0xff); unsigned char ch = (unsigned char) ((*s2++) & 0xff); if (kLowerLookup[uch] != kLowerLookup[ch]) { return PR_FALSE; } } return PR_TRUE; } PRBool nsWebCrawler::OkToLoad(const nsString& aURLSpec) { if (!StartsWith(aURLSpec, "http:") && !StartsWith(aURLSpec, "ftp:") && !StartsWith(aURLSpec, "file:") && !StartsWith(aURLSpec, "resource:")) { return PR_FALSE; } PRBool ok = PR_TRUE; nsIURI* url; nsresult rv; rv = NS_NewURI(&url, aURLSpec); if (NS_OK == rv) { nsCAutoString host; rv = url->GetHost(host); if (rv == NS_OK) { PRInt32 hostlen = host.Length(); // Check domains to avoid PRInt32 i, n = mAvoidDomains.Count(); for (i = 0; i < n; i++) { nsString* s = (nsString*) mAvoidDomains.ElementAt(i); if (s && EndsWith(*s, host.get(), hostlen)) { printf("Avoiding '"); fputs(NS_LossyConvertUCS2toASCII(aURLSpec).get(), stdout); printf("'\n"); return PR_FALSE; } } // Check domains to stay within n = mSafeDomains.Count(); if (n == 0) { // If we don't care then all the domains that we aren't // avoiding are OK return PR_TRUE; } for (i = 0; i < n; i++) { nsString* s = (nsString*) mSafeDomains.ElementAt(i); if (s && EndsWith(*s, host.get(), hostlen)) { return PR_TRUE; } } ok = PR_FALSE; } NS_RELEASE(url); } return ok; } void nsWebCrawler::RecordLoadedURL(const nsString& aURL) { if (nsnull != mRecord) { fputs(NS_LossyConvertUCS2toASCII(aURL).get(), mRecord); fputs("\n", mRecord); fflush(mRecord); } } void nsWebCrawler::FindURLsIn(nsIDocument* aDocument, nsIContent* aNode) { nsIAtom *atom = aNode->Tag(); if ((atom == mLinkTag) || (atom == mFrameTag) || (atom == mIFrameTag)) { // Get absolute url that tag targets nsAutoString base, src, absURLSpec; if (atom == mLinkTag) { aNode->GetAttr(kNameSpaceID_None, mHrefAttr, src); } else { aNode->GetAttr(kNameSpaceID_None, mSrcAttr, src); } nsresult rv; rv = NS_MakeAbsoluteURI(absURLSpec, src, aDocument->GetDocumentURI()); if (NS_OK == rv) { nsCOMPtr urlAtom = do_GetAtom(absURLSpec); if (0 == mVisited->Get(urlAtom)) { // Remember the URL as visited so that we don't go there again mVisited->Put(urlAtom, "visited"); if (OkToLoad(absURLSpec)) { mPendingURLs.AppendElement(new nsString(absURLSpec)); if (mVerbose) { printf("Adding '"); fputs(NS_LossyConvertUCS2toASCII(absURLSpec).get(), stdout); printf("'\n"); } } else { if (mVerbose) { printf("Skipping '"); fputs(NS_LossyConvertUCS2toASCII(absURLSpec).get(), stdout); printf("'\n"); } } } else { if (mVerbose) { printf("Already visited '"); fputs(NS_LossyConvertUCS2toASCII(absURLSpec).get(), stdout); printf("'\n"); } } } } PRUint32 i, n = aNode->GetChildCount(); for (i = 0; i < n; ++i) { FindURLsIn(aDocument, aNode->GetChildAt(i)); } } void nsWebCrawler::FindMoreURLs() { nsCOMPtr docShell; mBrowser->GetDocShell(*getter_AddRefs(docShell)); if (docShell) { nsCOMPtr cv; docShell->GetContentViewer(getter_AddRefs(cv)); if (cv) { nsCOMPtr docv = do_QueryInterface(cv); if (docv) { nsCOMPtr doc; docv->GetDocument(getter_AddRefs(doc)); if (doc) { nsIContent *root = doc->GetRootContent(); if (root) { FindURLsIn(doc, root); } } } } } } void nsWebCrawler::SetBrowserWindow(nsBrowserWindow* aWindow) { NS_IF_RELEASE(mBrowser); mBrowser = aWindow; NS_IF_ADDREF(mBrowser); } void nsWebCrawler::GetBrowserWindow(nsBrowserWindow** aWindow) { NS_IF_ADDREF(mBrowser); *aWindow = mBrowser; } void nsWebCrawler::LoadNextURL(PRBool aQueueLoad) { if ((mMaxPages < 0) || (mMaxPages > 0)) { while (0 != mPendingURLs.Count()) { nsString* url = NS_REINTERPRET_CAST(nsString*, mPendingURLs.ElementAt(0)); mPendingURLs.RemoveElementAt(0); if (nsnull != url) { if (OkToLoad(*url)) { RecordLoadedURL(*url); if (aQueueLoad) { // Call stop to cancel any pending URL Refreshes... /// webShell->Stop(); QueueLoadURL(*url); } else { mCurrentURL = *url; mStartLoad = PR_Now(); nsCOMPtr docShell; mBrowser->GetDocShell(*getter_AddRefs(docShell)); nsCOMPtr webNav(do_QueryInterface(docShell)); webNav->LoadURI(url->get(), nsIWebNavigation::LOAD_FLAGS_NONE, nsnull, nsnull, nsnull); } if (mMaxPages > 0) { --mMaxPages; } delete url; return; } delete url; } } } if (nsnull != mRecord) { fclose(mRecord); mRecord = nsnull; } } already_AddRefed nsWebCrawler::GetPresShell(nsIDocShell* aDocShell) { nsIPresShell* shell = nsnull; nsCOMPtr docShell(aDocShell); if (!docShell) { mBrowser->GetDocShell(*getter_AddRefs(docShell)); } if (docShell) { docShell->GetPresShell(&shell); } return shell; } static FILE* OpenRegressionFile(const nsString& aBaseName, const nsString& aOutputName) { nsAutoString a; a.Append(aBaseName); a.AppendLiteral("/"); a.Append(aOutputName); char* fn = ToNewCString(a); FILE* fp = fopen(fn, "r"); if (!fp) { printf("Unable to open regression data file %s\n", fn); } delete[] fn; return fp; } #define BUF_SIZE 1024 // Load up both data files (original and the one we just output) into // two independent xml content trees. Then compare them. void nsWebCrawler::PerformRegressionTest(const nsString& aOutputName) { // First load the trees nsIFrameUtil* fu; nsresult rv = CallCreateInstance(kFrameUtilCID, &fu); if (NS_FAILED(rv)) { printf("Can't find nsIFrameUtil implementation\n"); return; } FILE* f1 = OpenRegressionFile(mRegressionDir, aOutputName); if (!f1) { NS_RELEASE(fu); return; } FILE* f2 = OpenRegressionFile(mOutputDir, aOutputName); if (!f2) { fclose(f1); NS_RELEASE(fu); return; } rv = fu->CompareRegressionData(f1, f2,mRegressionOutputLevel); NS_RELEASE(fu); char dirName[BUF_SIZE]; char fileName[BUF_SIZE]; mOutputDir.ToCString(dirName, BUF_SIZE-1); aOutputName.ToCString(fileName, BUF_SIZE-1); printf("regression test %s%s %s\n", dirName, fileName, NS_SUCCEEDED(rv) ? "passed" : "failed"); } //---------------------------------------------------------------------- static NS_DEFINE_IID(kEventQueueServiceCID, NS_EVENTQUEUESERVICE_CID); static NS_DEFINE_IID(kIEventQueueServiceIID, NS_IEVENTQUEUESERVICE_IID); static nsresult QueueEvent(PLEvent* aEvent) { nsresult rv; nsCOMPtr eqs = do_GetService(kEventQueueServiceCID, &rv); if (NS_FAILED(rv)) { return rv; } if (eqs) { nsCOMPtr eq; rv = eqs->GetThreadEventQueue(NS_CURRENT_THREAD, getter_AddRefs(eq)); if (eq) { eq->PostEvent(aEvent); } } return rv; } //---------------------------------------------------------------------- struct ExitEvent : public PLEvent { ExitEvent(nsWebCrawler* aCrawler); ~ExitEvent(); void DoIt() { crawler->Exit(); } nsWebCrawler* crawler; static void PR_CALLBACK HandleMe(ExitEvent* e); static void PR_CALLBACK DeleteMe(ExitEvent* e); }; ExitEvent::ExitEvent(nsWebCrawler* aCrawler) : crawler(aCrawler) { PL_InitEvent(this, crawler, (PLHandleEventProc) HandleMe, (PLDestroyEventProc) DeleteMe); NS_ADDREF(aCrawler); } ExitEvent::~ExitEvent() { NS_RELEASE(crawler); } void ExitEvent::HandleMe(ExitEvent* e) { e->DoIt(); } void ExitEvent::DeleteMe(ExitEvent* e) { delete e; } void nsWebCrawler::QueueExit() { ExitEvent* event = new ExitEvent(this); QueueEvent(event); } void nsWebCrawler::Exit() { mViewer->Exit(); } //---------------------------------------------------------------------- struct LoadEvent : public PLEvent { LoadEvent(nsWebCrawler* aCrawler, const nsString& aURL); ~LoadEvent(); void DoIt() { crawler->GoToQueuedURL(url); } nsString url; nsWebCrawler* crawler; static void PR_CALLBACK HandleMe(LoadEvent* e); static void PR_CALLBACK DeleteMe(LoadEvent* e); }; LoadEvent::LoadEvent(nsWebCrawler* aCrawler, const nsString& aURL) : url(aURL), crawler(aCrawler) { PL_InitEvent(this, crawler, (PLHandleEventProc) HandleMe, (PLDestroyEventProc) DeleteMe); NS_ADDREF(aCrawler); } LoadEvent::~LoadEvent() { NS_RELEASE(crawler); } void LoadEvent::HandleMe(LoadEvent* e) { e->DoIt(); } void LoadEvent::DeleteMe(LoadEvent* e) { delete e; } void nsWebCrawler::GoToQueuedURL(const nsString& aURL) { nsCOMPtr docShell; mBrowser->GetDocShell(*getter_AddRefs(docShell)); nsCOMPtr webNav(do_QueryInterface(docShell)); if (webNav) { mCurrentURL = aURL; mStartLoad = PR_Now(); webNav->LoadURI(aURL.get(), nsIWebNavigation::LOAD_FLAGS_NONE, nsnull, nsnull, nsnull); } mQueuedLoadURLs--; } nsresult nsWebCrawler::QueueLoadURL(const nsString& aURL) { LoadEvent* event = new LoadEvent(this, aURL); nsresult rv = QueueEvent(event); if (NS_SUCCEEDED(rv)) { mQueuedLoadURLs++; } return rv; }