mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-20 05:10:10 +01:00
15 lines
871 B
Python
15 lines
871 B
Python
import json
|
|
from datetime import datetime
|
|
from dotenv import load_dotenv
|
|
load_dotenv()
|
|
|
|
def append_meta(request, text, metadata_only = False):
|
|
meta = {
|
|
'url': request.url,
|
|
'title': request.html.find('title', first=True).text if len(request.html.find('title')) != 0 else '',
|
|
'description': request.html.find('meta[name="description"]', first=True).attrs.get('content') if request.html.find('meta[name="description"]', first=True) != None else '',
|
|
'published':request.html.find('meta[property="article:published_time"]', first=True).attrs.get('content') if request.html.find('meta[property="article:published_time"]', first=True) != None else datetime.today().strftime('%Y-%m-%d %H:%M:%S'),
|
|
'wordCount': len(text.split(' ')),
|
|
}
|
|
return "Article JSON Metadata:\n"+json.dumps(meta)+"\n\n\nText Content:\n" + text if metadata_only == False else meta
|