dockerfile cleanup; enforce text LF line endings (#81)

This commit is contained in:
frasergr 2023-06-17 20:18:01 -07:00 committed by GitHub
parent 3945a77290
commit 4079020de0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 46 additions and 52 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
* text=auto eol=lf

View File

@ -1,39 +1,39 @@
import requests
import xml.etree.ElementTree as ET
from scripts.link import parse_links
import re
def parse_sitemap(url):
response = requests.get(url)
root = ET.fromstring(response.content)
urls = []
for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
if not has_extension_to_ignore(loc.text):
urls.append(loc.text)
else:
print(f"Skipping filetype: {loc.text}")
return urls
# Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml
def sitemap():
sitemap_url = input("Enter the URL of the sitemap: ")
if(len(sitemap_url) == 0):
print("No valid sitemap provided!")
exit(1)
url_array = parse_sitemap(sitemap_url)
#parse links from array
parse_links(url_array)
def has_extension_to_ignore(string):
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf']
pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b'
match = re.search(pattern, string, re.IGNORECASE)
import requests
import xml.etree.ElementTree as ET
from scripts.link import parse_links
import re
def parse_sitemap(url):
response = requests.get(url)
root = ET.fromstring(response.content)
urls = []
for element in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}url'):
for loc in element.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc'):
if not has_extension_to_ignore(loc.text):
urls.append(loc.text)
else:
print(f"Skipping filetype: {loc.text}")
return urls
# Example sitemap URL https://www.nerdwallet.com/blog/wp-sitemap-news-articles-1.xml
def sitemap():
sitemap_url = input("Enter the URL of the sitemap: ")
if(len(sitemap_url) == 0):
print("No valid sitemap provided!")
exit(1)
url_array = parse_sitemap(sitemap_url)
#parse links from array
parse_links(url_array)
def has_extension_to_ignore(string):
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.pdf']
pattern = r'\b(' + '|'.join(re.escape(ext) for ext in image_extensions) + r')\b'
match = re.search(pattern, string, re.IGNORECASE)
return match is not None

View File

@ -34,12 +34,10 @@ RUN groupadd -g $ARG_GID anythingllm && \
# Copy docker helper scripts
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
COPY ./docker/dual_boot.sh /usr/local/bin/
# Ensure the scripts are executable
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
chmod +x /usr/local/bin/docker-healthcheck.sh && \
chmod 777 /usr/local/bin/dual_boot.sh
chmod +x /usr/local/bin/docker-healthcheck.sh
USER anythingllm
@ -91,6 +89,4 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
# Run the server
ENTRYPOINT ["docker-entrypoint.sh"]
CMD /bin/bash /usr/local/bin/dual_boot.sh
ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"]

View File

@ -1,3 +1,5 @@
#!/usr/bin/env bash
exec "$@"
#!/bin/bash
node /app/server/index.js &
{ FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
wait -n
exit $?

View File

@ -1,5 +0,0 @@
#!/bin/bash
node /app/server/index.js &
{ FLASK_ENV=production FLASK_APP=wsgi.py cd collector && gunicorn --workers 4 --bind 0.0.0.0:8888 wsgi:api; } &
wait -n
exit $?