diff --git a/.gitignore b/.gitignore index 0725f47c..a1d96b6e 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,4 @@ __pycache__ v-env .DS_Store aws_cf_deploy_anything_llm.json - +yarn.lock diff --git a/collector/scripts/link.py b/collector/scripts/link.py index 17a532cb..2bc604e9 100644 --- a/collector/scripts/link.py +++ b/collector/scripts/link.py @@ -80,12 +80,14 @@ def crawler(): # traverse paragraphs from soup for link in soup.find_all("a"): - data = link.get('href').strip() - if filter_value in data: - print (data) - links.append(root_site + data) - else: - print (data + " does not apply for linking...") + data = link.get('href') + if (data is not None): + if filter_value in data: + data = data.strip() + print (data) + links.append(root_site + data) + else: + print (data + " does not apply for linking...") #parse the links found parse_links(links)