Ignore SSL errors for web scraper

resolves #2114
2024-11-14 18:40:11 +01:00 · 2024-08-14 09:11:22 -07:00 · 2024-08-14 09:11:22 -07:00 · 619f6b3884
commit 619f6b3884
parent b541623c9e
2 changed files with 4 additions and 3 deletions
--- a/collector/processLink/convert/generic.js
+++ b/collector/processLink/convert/generic.js
@ -57,6 +57,7 @@ async function getPageContent(link) {
    const loader = new PuppeteerWebBaseLoader(link, {
      launchOptions: {
        headless: "new",
+        ignoreHTTPSErrors: true,
      },
      gotoOptions: {
        waitUntil: "domcontentloaded",
--- a/collector/utils/url/index.js
+++ b/collector/utils/url/index.js
@ -5,11 +5,11 @@
 * and is simply to prevent the user from accidentally putting in non-valid websites, which is all this protects
 * since _all urls must be submitted by the user anyway_ and cannot be done with authentication and manager or admin roles.
 * If an attacker has those roles then the system is already vulnerable and this is not a primary concern.
- * 
+ *
 * We have gotten this report may times, marked them as duplicate or information and continue to get them. We communicate
 * already that deployment (and security) of an instance is on the deployer and system admin deploying it. This would include
 * isolation, firewalls, and the general security of the instance.
-*/
+ */

 const VALID_PROTOCOLS = ["https:", "http:"];
 const INVALID_OCTETS = [192, 172, 10, 127];
@ -32,7 +32,7 @@ function validURL(url) {
    if (!VALID_PROTOCOLS.includes(destination.protocol)) return false;
    if (isInvalidIp(destination)) return false;
    return true;
-  } catch { }
+  } catch {}
  return false;
 }