mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-19 20:50:09 +01:00
patch collector script
This commit is contained in:
parent
c5dc68633b
commit
1e3d82e184
@ -9,6 +9,7 @@ from bs4 import BeautifulSoup
|
|||||||
|
|
||||||
# Example Channel URL https://tim.blog/2022/08/09/nft-insider-trading-policy/
|
# Example Channel URL https://tim.blog/2022/08/09/nft-insider-trading-policy/
|
||||||
def link():
|
def link():
|
||||||
|
totalTokens = 0
|
||||||
print("[NOTICE]: The first time running this process it will download supporting libraries.\n\n")
|
print("[NOTICE]: The first time running this process it will download supporting libraries.\n\n")
|
||||||
fqdn_link = input("Paste in the URL of an online article or blog: ")
|
fqdn_link = input("Paste in the URL of an online article or blog: ")
|
||||||
if(len(fqdn_link) == 0):
|
if(len(fqdn_link) == 0):
|
||||||
@ -33,6 +34,7 @@ def link():
|
|||||||
|
|
||||||
link = append_meta(req, full_text, True)
|
link = append_meta(req, full_text, True)
|
||||||
if(len(full_text) > 0):
|
if(len(full_text) > 0):
|
||||||
|
totalTokens += len(tokenize(full_text))
|
||||||
source = urlparse(req.url)
|
source = urlparse(req.url)
|
||||||
output_filename = f"website-{source.netloc}-{source.path.replace('/','_')}.json"
|
output_filename = f"website-{source.netloc}-{source.path.replace('/','_')}.json"
|
||||||
output_path = f"./outputs/website-logs"
|
output_path = f"./outputs/website-logs"
|
||||||
@ -58,7 +60,7 @@ def link():
|
|||||||
|
|
||||||
print(f"\n\n[Success]: article or link content fetched!")
|
print(f"\n\n[Success]: article or link content fetched!")
|
||||||
print(f"////////////////////////////")
|
print(f"////////////////////////////")
|
||||||
print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(tokenCount)} using {tokenCount} tokens.")
|
print(f"Your estimated cost to embed this data using OpenAI's text-embedding-ada-002 model at $0.0004 / 1K tokens will cost {ada_v2_cost(totalTokens)} using {totalTokens} tokens.")
|
||||||
print(f"////////////////////////////")
|
print(f"////////////////////////////")
|
||||||
exit(0)
|
exit(0)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user