/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Google Safe Browsing.
 *
 * The Initial Developer of the Original Code is Google Inc.
 * Portions created by the Initial Developer are Copyright (C) 2006
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Fritz Schneider <fritz@google.com> (original author)
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */


// This is the code used to interact with data encoded in the
// goog-black-enchash format. The format is basically a map from
// hashed hostnames to encrypted sequences of regular expressions
// where the encryption key is derived from the hashed
// hostname. Encoding lists like this raises the bar slightly on
// deriving complete table data from the db. This data format is NOT
// our idea; we would've raise the bar higher :)
//
// Anyway, this code is a port of the original C++ implementation by
// Garret. To ease verification, I mirrored that code as closely as
// possible.  As a result, you'll see some C++-style variable naming
// and roundabout (C++) ways of doing things. Additionally, I've
// omitted the comments.
//
// This code should not change, except to fix bugs.
//
// TODO: verify that using encodeURI() in getCanonicalHost is OK
// TODO: accommodate other kinds of perl-but-not-javascript qualifiers


/**
 * This thing knows how to generate lookup keys and decrypt values found in
 * a table of type enchash.
 */
function PROT_EnchashDecrypter() {
  this.debugZone = "enchashdecrypter";
  this.REs_ = PROT_EnchashDecrypter.REs;
  this.hasher_ = new G_CryptoHasher();
  this.base64_ = new G_Base64();
  this.streamCipher_ = Cc["@mozilla.org/security/streamcipher;1"]
                       .createInstance(Ci.nsIStreamCipher);
}

PROT_EnchashDecrypter.DATABASE_SALT = "oU3q.72p";
PROT_EnchashDecrypter.SALT_LENGTH = PROT_EnchashDecrypter.DATABASE_SALT.length;

PROT_EnchashDecrypter.MAX_DOTS = 5;

PROT_EnchashDecrypter.REs = {};
PROT_EnchashDecrypter.REs.FIND_DODGY_CHARS = 
  new RegExp("[\x01-\x1f\x7f-\xff]+");
PROT_EnchashDecrypter.REs.FIND_DODGY_CHARS_GLOBAL = 
  new RegExp("[\x01-\x1f\x7f-\xff]+", "g");
PROT_EnchashDecrypter.REs.FIND_END_DOTS = new RegExp("^\\.+|\\.+$");
PROT_EnchashDecrypter.REs.FIND_END_DOTS_GLOBAL = 
  new RegExp("^\\.+|\\.+$", "g");
PROT_EnchashDecrypter.REs.FIND_MULTIPLE_DOTS = new RegExp("\\.{2,}");
PROT_EnchashDecrypter.REs.FIND_MULTIPLE_DOTS_GLOBAL = 
  new RegExp("\\.{2,}", "g");
PROT_EnchashDecrypter.REs.FIND_TRAILING_DOTS = new RegExp("\\.+$");
PROT_EnchashDecrypter.REs.POSSIBLE_IP = 
  new RegExp("^((?:0x[0-9a-f]+|[0-9\\.])+)$", "i");
PROT_EnchashDecrypter.REs.FIND_BAD_OCTAL = new RegExp("(^|\\.)0\\d*[89]");
PROT_EnchashDecrypter.REs.IS_OCTAL = new RegExp("^0[0-7]*$");
PROT_EnchashDecrypter.REs.IS_DECIMAL = new RegExp("^[0-9]+$");
PROT_EnchashDecrypter.REs.IS_HEX = new RegExp("^0[xX]([0-9a-fA-F]+)$");

// Regexps are given in perl regexp format. Unfortunately, JavaScript's
// library isn't completely compatible. For example, you can't specify
// case-insensitive matching by using (?i) in the expression text :(
// So we manually set this bit with the help of this regular expression.
PROT_EnchashDecrypter.REs.CASE_INSENSITIVE = /\(\?i\)/g;

/**
 * Helper function 
 *
 * @param str String to get chars from
 * 
 * @param n Number of characters to get
 *
 * @returns String made up of the last n characters of str
 */ 
PROT_EnchashDecrypter.prototype.lastNChars_ = function(str, n) {
  n = -n;
  return str.substr(n);
}

/**
 * We have to have our own hex-decoder because decodeURIComponent
 * expects UTF-8 (so it will barf on invalid UTF-8 sequences).
 *
 * @param str String to decode
 * 
 * @returns The decoded string
 */
PROT_EnchashDecrypter.prototype.hexDecode_ = function(str) {
  var output = [];

  var i = 0;
  while (i < str.length) {
    var c = str.charAt(i);
  
    if (c == "%" && i + 2 < str.length) {

      var asciiVal = Number("0x" + str.charAt(i + 1) + str.charAt(i + 2));
      
      if (!isNaN(asciiVal)) {
        i += 2;
        c = String.fromCharCode(asciiVal);
      }
    }
    
    output[output.length] = c;
    ++i;
  }
  
  return output.join("");
}

/**
 * Translate a plaintext enchash value into regular expressions
 *
 * @param data String containing a decrypted enchash db entry
 *
 * @returns An array of RegExps
 */
PROT_EnchashDecrypter.prototype.parseRegExps = function(data) {
  var res = data.split("\t");
  
  G_Debug(this, "Got " + res.length + " regular rexpressions");
  
  for (var i = 0; i < res.length; i++) {
    // Could have leading (?i); if so, set the flag and strip it
    var flags = (this.REs_.CASE_INSENSITIVE.test(res[i])) ? "i" : "";
    res[i] = res[i].replace(this.REs_.CASE_INSENSITIVE, "");
    res[i] = new RegExp(res[i], flags);
  }

  return res;
}

/**
 * Get the canonical version of the given URL for lookup in a table of 
 * type -url.
 *
 * @param url String to canonicalize
 *
 * @returns String containing the canonicalized url (maximally url-decoded
 *          with hostname normalized, then specially url-encoded)
 */
PROT_EnchashDecrypter.prototype.getCanonicalUrl = function(url) {
  var escapedUrl = PROT_URLCanonicalizer.canonicalizeURL_(url);
  // Normalize the host
  var host = this.getCanonicalHost(escapedUrl);
  if (!host) {
    // Probably an invalid url, return what we have so far.
    return escapedUrl;
  }

  // Combine our normalized host with our escaped url.
  var ioService = Cc["@mozilla.org/network/io-service;1"]
                  .getService(Ci.nsIIOService);
  var urlObj = ioService.newURI(escapedUrl, null, null);
  urlObj.host = host;
  return urlObj.asciiSpec;
}

/**
 * @param opt_maxDots Number maximum number of dots to include.
 */
PROT_EnchashDecrypter.prototype.getCanonicalHost = function(str, opt_maxDots) {
  var ioService = Cc["@mozilla.org/network/io-service;1"]
                  .getService(Ci.nsIIOService);
  try {
    var urlObj = ioService.newURI(str, null, null);
    var asciiHost = urlObj.asciiHost;
  } catch (e) {
    G_Debug(this, "Unable to get hostname: " + str);
    return "";
  }

  var unescaped = this.hexDecode_(asciiHost);

  unescaped = unescaped.replace(this.REs_.FIND_DODGY_CHARS_GLOBAL, "")
              .replace(this.REs_.FIND_END_DOTS_GLOBAL, "")
              .replace(this.REs_.FIND_MULTIPLE_DOTS_GLOBAL, ".");

  var temp = this.parseIPAddress_(unescaped);
  if (temp)
    unescaped = temp;

  // TODO: what, exactly is it supposed to escape? This doesn't esecape 
  // ":", "/", ";", and "?"
  var escaped = encodeURI(unescaped);

  if (opt_maxDots) {
    // Limit the number of dots
    var k;
    var index = escaped.length;
    for (k = 0; k < opt_maxDots + 1; k++) {
      temp = escaped.lastIndexOf(".", index - 1);
      if (temp == -1) {
        break;
      } else {
        index = temp;
      }
    }
    
    if (k == opt_maxDots + 1 && index != -1) {
      escaped = escaped.substring(index + 1);
    }
  }

  escaped = escaped.toLowerCase();
  return escaped;
}

PROT_EnchashDecrypter.prototype.parseIPAddress_ = function(host) {

  host = host.replace(this.REs_.FIND_TRAILING_DOTS_GLOBAL, "");

  if (!this.REs_.POSSIBLE_IP.test(host))
    return "";

  var parts = host.split(".");
  if (parts.length > 4)
    return "";

  var allowOctal = !this.REs_.FIND_BAD_OCTAL.test(host);

  for (var k = 0; k < parts.length; k++) {
    var canon;
    if (k == parts.length - 1) {
      canon = this.canonicalNum_(parts[k], 5 - parts.length, allowOctal);
    } else {
      canon = this.canonicalNum_(parts[k], 1, allowOctal);
    }
    if (canon != "") 
      parts[k] = canon;
  }

  return parts.join(".");
}

PROT_EnchashDecrypter.prototype.canonicalNum_ = function(num, bytes, octal) {
  
  if (bytes < 0) 
    return "";
  var temp_num;

  if (octal && this.REs_.IS_OCTAL.test(num)) {

    num = this.lastNChars_(num, 11);

    temp_num = parseInt(num, 8);
    if (isNaN(temp_num))
      temp_num = -1;

  } else if (this.REs_.IS_DECIMAL.test(num)) {

    num = this.lastNChars_(num, 32);

    temp_num = parseInt(num, 10);
    if (isNaN(temp_num))
      temp_num = -1;

  } else if (this.REs_.IS_HEX.test(num)) {

    num = this.lastNChars_(num, 8);

    temp_num = parseInt(num, 16);
    if (isNaN(temp_num))
      temp_num = -1;

  } else {
    return "";
  }

  if (temp_num == -1) 
    return "";

  // Since we mod the number, we're removing the least significant bits.  We
  // Want to push them into the front of the array to preserve the order.
  var parts = [];
  while (bytes--) {
    parts.unshift("" + (temp_num % 256));
    temp_num -= temp_num % 256;
    temp_num /= 256;
  }

  return parts.join(".");
}

PROT_EnchashDecrypter.prototype.getLookupKey = function(host) {
  var dataKey = PROT_EnchashDecrypter.DATABASE_SALT + host;
  dataKey = this.base64_.arrayifyString(dataKey);

  this.hasher_.init(G_CryptoHasher.algorithms.MD5);
  var lookupDigest = this.hasher_.updateFromArray(dataKey);
  var lookupKey = this.hasher_.digestHex();

  return lookupKey.toUpperCase();
}

PROT_EnchashDecrypter.prototype.decryptData = function(data, host) {
  // XXX: base 64 decoding should be done in C++
  var asciiArray = this.base64_.decodeString(data);
  var ascii = this.base64_.stringifyArray(asciiArray);

  var random_salt = ascii.slice(0, PROT_EnchashDecrypter.SALT_LENGTH);
  var encrypted_data = ascii.slice(PROT_EnchashDecrypter.SALT_LENGTH);
  var temp_decryption_key = PROT_EnchashDecrypter.DATABASE_SALT
      + random_salt + host;
  this.hasher_.init(G_CryptoHasher.algorithms.MD5);
  this.hasher_.updateFromString(temp_decryption_key);

  var keyFactory = Cc["@mozilla.org/security/keyobjectfactory;1"]
                   .getService(Ci.nsIKeyObjectFactory);
  var key = keyFactory.keyFromString(Ci.nsIKeyObject.RC4,
                                     this.hasher_.digestRaw());

  this.streamCipher_.init(key);
  this.streamCipher_.updateFromString(encrypted_data);

  return this.streamCipher_.finish(false /* no base64 */);
}

#ifdef DEBUG
/**
 * Lame unittesting function
 */
function TEST_PROT_EnchashDecrypter() {
  if (G_GDEBUG) {
    var z = "enchash UNITTEST";
    G_debugService.enableZone(z);

    G_Debug(z, "Starting");  

    // Yes this defies our naming convention, but we copy verbatim from 
    // the C++ unittest, so lets just keep things clear.
    var no_dots = "abcd123;[]";
    var one_dot = "abc.123";
    var two_dots = "two..dots";
    var lots_o_dots = "I have a lovely .... bunch of dots";
    var multi_dots = "dots ... and ... more .... dots";
    var leading_dot = ".leading";
    var trailing_dot = "trailing.";
    var trailing_dots = "I love trailing dots....";
    var end_dots = ".dots.";

    var decimal = "1234567890";
    var hex = "0x123452FAf";
    var bad_hex = "0xFF0xGG";
    var octal = "012034056";
    var bad_octal = "012034089";
    var garbage = "lk,.:asdfa-=";
    var mixed = "1230x78034";
    var spaces = "123 0xFA 045";
    
    var longstr = "";
    for(var k = 0; k < 100; k++) {
      longstr += "a";
    }

    var shortstr = "short";

    var r = PROT_EnchashDecrypter.REs;
    var l = new PROT_EnchashDecrypter();

    // Test regular expressions
    function testRE(re, inputValPairs) {
      for (var i = 0; i < inputValPairs.length; i += 2) 
        G_Assert(z, re.test(inputValPairs[i]) == inputValPairs[i + 1], 
                 "RegExp broken: " + re + " (input: " + inputValPairs[i] + ")");
    };

    var tests = 
      ["", false, 
       "normal chars;!@#$%^&*&(", false,
       "MORE NORMAL ,./<>?;':{}", false,
       "Slightly less\2 normal", true, 
       "\245 stuff \45", true, 
       "\31", true];
    testRE(r.FIND_DODGY_CHARS, tests);

    tests = 
      [no_dots, false, 
       one_dot, false, 
       leading_dot, true, 
       trailing_dots, true, 
       end_dots, true];
    testRE(r.FIND_END_DOTS, tests);

    tests =
      [no_dots, false,
       one_dot, false,
       two_dots, true, 
       lots_o_dots, true,
       multi_dots, true];
    testRE(r.FIND_MULTIPLE_DOTS, tests);

    tests = 
      [no_dots, false, 
       one_dot, false,
       trailing_dot, true,
       trailing_dots, true];
    testRE(r.FIND_TRAILING_DOTS, tests);

    tests = 
      ["random junk", false,
       "123.45.6-7.89", false,
       "012.12.123", true,
       "0x12.0xff.123", true,
       "225.0.0.1", true];
    testRE(r.POSSIBLE_IP, tests);

    tests = 
      [decimal, false,
       hex, false, 
       octal, false, 
       bad_octal, true];
    testRE(r.FIND_BAD_OCTAL, tests);

    tests = 
      [decimal, false, 
       hex, false, 
       bad_octal, false,
       garbage, false,
       mixed, false,
       spaces, false,
       octal, true];
    testRE(r.IS_OCTAL, tests);

    tests = 
      [hex, false,
       garbage, false, 
       mixed, false, 
       spaces, false, 
       octal, true,
       bad_octal, true,
       decimal, true];
    testRE(r.IS_DECIMAL, tests);

    tests =
      [decimal, false, 
       octal, false, 
       bad_octal, false,
       garbage, false,
       mixed, false,
       spaces, false,
       bad_hex, false,
       hex, true];
    testRE(r.IS_HEX, tests);

    // Test find last N
    var val = l.lastNChars_(longstr, 8);
    G_Assert(z, val.length == 8, "find last eight broken on long str");
    val = l.lastNChars_(shortstr, 8);
    G_Assert(z, val.length == 5, "find last 11 broken on short str");

    // Test canonical num
    tests = 
      ["", "", 1, true, 
       "", "10", 0, true,
       "", "0x45", -1, true,
       "45", "45", 1, true,
       "16", "0x10", 1, true,
       "1.111", "367", 2, true,
       "0.20.229", "012345", 3, true,
       "123", "0173", 1, true,
       "9", "09", 1, false,
       "", "0x120x34", 2, true,
       "18.252", "0x12fc", 2, true];
    for (var i = 0; i < tests.length; i+= 4)
      G_Assert(z, tests[i] === l.canonicalNum_(tests[i + 1], 
                                               tests[i + 2], 
                                               tests[i + 3]),
               "canonicalNum broken on: " + tests[i + 1]);

    // Test parseIPAddress (these are all verifiable using ping)
    var testing = {};
    testing["123.123.0.0.1"] = "";
    testing["255.0.0.1"] = "255.0.0.1";
    testing["12.0x12.01234"] = "12.18.2.156";
    testing["012.034.01.055"] = "10.28.1.45";
    testing["0x12.0x43.0x44.0x01"] = "18.67.68.1";
    testing["0x12434401"] = "18.67.68.1";
    testing["413960661"] = "24.172.137.213";
    testing["03053104725"] = "24.172.137.213";
    testing["030.0254.0x89d5"] = "24.172.137.213";
    testing["1.234.4.0377"] = "1.234.4.255";

    for (var key in testing) 
      G_Assert(z, l.parseIPAddress_(key) === testing[key], 
               "parseIPAddress broken on " + key + "(got: " +
               l.parseIPAddress_(key));

    // Test getCanonicalHost
    var testing = {};
    testing["http://completely.bogus.url.with.a.whole.lot.of.dots"] =
      "with.a.whole.lot.of.dots";
    testing["http://poseidon.marinet.gr/~elani"] = "poseidon.marinet.gr";
    testing["http://www.google.com.."] = "www.google.com";
    testing["https://www.yaho%6F.com"] = "www.yahoo.com";
    testing["http://012.034.01.0xa"] = "10.28.1.10";
    testing["ftp://wierd..chars...%0f,%fa"] = "wierd.chars.,";
    testing["http://0x18ac89d5/http.www.paypal.com/"] = "24.172.137.213";
    testing["http://413960661/http.www.paypal.com/"] = "24.172.137.213";
    testing["http://03053104725/http.www.paypal.com/"] = "24.172.137.213";
    testing["http://www.barclays.co.uk.brccontrol.assruspede.org.bz/"
                    + "detailsconfirm"] = "co.uk.brccontrol.assruspede.org.bz";
    for (var key in testing)
      G_Assert(z, l.getCanonicalHost(key, PROT_EnchashDecrypter.MAX_DOTS) ==
                                                                testing[key],
               "getCanonicalHost broken on: " + key + 
               "(got: " + l.getCanonicalHost(key) + ")");

    // Test getCanonicalUrl
    testing = {};
    testing["http://0x18.0xac.0x89.0xd5/http.www.paypal.com/"] =
                                "http://24.172.137.213/http.www.paypal.com/";
    testing["http://0x18ac89d5/http.www.paypal.com/"] =
                                "http://24.172.137.213/http.www.paypal.com/";
    testing["http://413960661/http.www.paypal.com/"] =
                                "http://24.172.137.213/http.www.paypal.com/";
    testing["http://03053104725/http.www.paypal.com/"] =
                                "http://24.172.137.213/http.www.paypal.com/";
    testing["http://03053104725/%68t%74p.www.paypal.c%6fm/"] =
                                "http://24.172.137.213/http.www.paypal.com/";
    testing["http://www.barclays.co.uk.brccontrol.assruspede.org.bz/detailsconfirm"] =
      "http://www.barclays.co.uk.brccontrol.assruspede.org.bz/detailsconfirm";
    for (var key in testing)
      G_Assert(z, l.getCanonicalUrl(key) == testing[key], 
               "getCanonicalUrl broken on: " + key + 
               "(got: " + l.getCanonicalUrl(key) + ")");

    // Test getlookupkey
    var testing = {};
    testing["www.google.com"] = "AF5638A09FDDDAFF5B7A6013B1BE69A9";
    testing["poseidon.marinet.gr"] = "01844755C8143C4579BB28DD59C23747";
    testing["80.53.164.26"] = "B775DDC22DEBF8BEBFEAC24CE40A1FBF";

    for (var key in testing)
      G_Assert(z, l.getLookupKey(key) === testing[key], 
               "getlookupkey broken on " + key + " (got: " + 
               l.getLookupKey(key) + ", expected: " + 
               testing[key] + ")");

    // Test decryptdata
    var tests = 
      [ "bGtEQWJuMl/z2ZxSBB2hsuWI8geMAwfSh3YBfYPejQ1O+wyRAJeJ1UW3V56zm" +
        "EpUvnaEiECN1pndxW5rEMNzE+gppPeel7PvH+OuabL3NXlspcP0xnpK8rzNgB1" +
        "JT1KcajQ9K3CCl24T9r8VGb0M3w==", 
        "80.53.164.26", 
        "^(?i)http\\:\\/\\/80\\.53\\.164\\.26(?:\\:80)?\\/\\.PayPal" +
        "\\.com\\/webscr\\-id\\/secure\\-SSL\\/cmd\\-run\\=\\/login\\.htm$",

        "ZTMzZjVnb3WW1Yc2ABorgQGAwYfcaCb/BG3sMFLTMDvOQxH8LkdGGWqp2tI5SK" +
        "uNrXIHNf2cyzcVocTqUIUkt1Ud1GKieINcp4tWcU53I0VZ0ZZHCjGObDCbv9Wb" +
        "CPSx1eS8vMREDv8Jj+UVL1yaZQ==", 
        "80.53.164.26", 
        "^(?i)http\\:\\/\\/80\\.53\\.164\\.26(?:\\:80)?\\/\\.PayPal\\.com" +
        "\\/webscr\\-id\\/secure\\-SSL\\/cmd\\-run\\=\\/login\\.htm$",

        "ZTMzZjVnb3WVb6VqoJ44hVo4V77XjDRcXTxOc2Zpn4yIHcpS0AQ0nn1TVlX4MY" +
        "IeNL/6ggzCmcJSWOOkj06Mpo56LNLrbxNxTBuoy9GF+xcm", 
        "poseidon.marinet.gr", 
        "^(?i)http\\:\\/\\/poseidon\\.marinet\\.gr(?:\\:80)?\\/\\~eleni" +
        "\\/eBay\\/index\\.php$",

        "bGtEQWJuMl9FA3Kl5RiXMpgFU8nDJl9J0hXjUck9+mMUQwAN6llf0gJeY5DIPP" +
        "c2f+a8MSBFJN17ANGJZl5oZVsQfSW4i12rlScsx4tweZAE", 
        "poseidon.marinet.gr", 
        "^(?i)http\\:\\/\\/poseidon\\.marinet\\.gr(?:\\:80)?\\/\\~eleni" +
        "\\/eBay\\/index\\.php$"];

    for (var i = 0; i < tests.length; i += 3) {
      var dec = l.decryptData(tests[i], tests[i + 1]);
      G_Assert(z, dec === tests[i + 2],
               "decryptdata broken on " + tests[i] + " (got: " + dec + 
               ", expected: " + tests[i + 2] + ")");
    }

    G_Debug(z, "PASSED");
  }
}
#endif