mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-19 20:50:09 +01:00
normalize parser struct for all file types (#321)
This commit is contained in:
parent
1c5d55c425
commit
5441717294
@ -61,7 +61,7 @@ def as_odt(**kwargs):
|
||||
'id': guid(),
|
||||
'url': "file://"+os.path.abspath(f"{parent_dir}/processed/{filename}{ext}"),
|
||||
'title': f"{filename}{ext}",
|
||||
'author': 'Unknown', # TODO: Find a better author
|
||||
'docAuthor': 'Unknown', # TODO: Find a better author
|
||||
'description': 'Unknown', # TODO: Find a better bescription
|
||||
'docSource': 'ODT Text file uploaded by the user.',
|
||||
'chunkSource': f"{filename}{ext}",
|
||||
|
@ -110,11 +110,8 @@ def as_mbox(**kwargs):
|
||||
"docAuthor": message["From"],
|
||||
"description": f"email from {message['From']} to {message['To']}",
|
||||
"docSource": "mbox file uploaded by the user.",
|
||||
"chunkSource": subject,
|
||||
"published": file_creation_time(fullpath),
|
||||
"sender": message["From"],
|
||||
"recipient": message["To"],
|
||||
"subject": subject,
|
||||
"date_sent": date_sent,
|
||||
"wordCount": len(content),
|
||||
"pageContent": content,
|
||||
"token_count_estimate": len(tokenize(content)),
|
||||
|
@ -23,6 +23,7 @@ def as_text(**kwargs):
|
||||
'title': f"{filename}{ext}",
|
||||
'docAuthor': 'Unknown', # TODO: Find a better author
|
||||
'description': 'Unknown', # TODO: Find a better description
|
||||
'docSource': 'a text file uploaded by the user.',
|
||||
'chunkSource': f"{filename}{ext}",
|
||||
'published': file_creation_time(fullpath),
|
||||
'wordCount': len(content),
|
||||
|
Loading…
Reference in New Issue
Block a user