patch collector script

This commit is contained in:
timothycarambat 2023-11-16 10:25:23 -08:00
parent c5dc68633b
commit 1e3d82e184

View File

@ -9,6 +9,7 @@ from bs4 import BeautifulSoup
# Example Channel URL https://tim.blog/2022/08/09/nft-insider-trading-policy/
def link():
totalTokens = 0
print("[NOTICE]: The first time running this process it will download supporting libraries.\n\n")
fqdn_link = input("Paste in the URL of an online article or blog: ")
if(len(fqdn_link) == 0):
@ -33,6 +34,7 @@ def link():
link = append_meta(req, full_text, True)
if(len(full_text) > 0):
totalTokens += len(tokenize(full_text))
source = urlparse(req.url)
output_filename = f"website-{source.netloc}-{source.path.replace('/','_')}.json"
output_path = f"./outputs/website-logs"
@ -58,7 +60,7 @@ def link():
print(f"\n\n[Success]: article or link content fetched!")
print(f"////////////////////////////")
print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(tokenCount)} using {tokenCount} tokens.")
print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(totalTokens)} using {totalTokens} tokens.")
print(f"////////////////////////////")
exit(0)