diff --git a/collector/scripts/watch/convert/as_docx.py b/collector/scripts/watch/convert/as_docx.py index 33aaaaaeb..b37786179 100644 --- a/collector/scripts/watch/convert/as_docx.py +++ b/collector/scripts/watch/convert/as_docx.py @@ -61,7 +61,7 @@ def as_odt(**kwargs): 'id': guid(), 'url': "file://"+os.path.abspath(f"{parent_dir}/processed/{filename}{ext}"), 'title': f"{filename}{ext}", - 'author': 'Unknown', # TODO: Find a better author + 'docAuthor': 'Unknown', # TODO: Find a better author 'description': 'Unknown', # TODO: Find a better bescription 'docSource': 'ODT Text file uploaded by the user.', 'chunkSource': f"{filename}{ext}", diff --git a/collector/scripts/watch/convert/as_mbox.py b/collector/scripts/watch/convert/as_mbox.py index f5a645eaa..2d7c08e63 100644 --- a/collector/scripts/watch/convert/as_mbox.py +++ b/collector/scripts/watch/convert/as_mbox.py @@ -110,11 +110,8 @@ def as_mbox(**kwargs): "docAuthor": message["From"], "description": f"email from {message['From']} to {message['To']}", "docSource": "mbox file uploaded by the user.", + "chunkSource": subject, "published": file_creation_time(fullpath), - "sender": message["From"], - "recipient": message["To"], - "subject": subject, - "date_sent": date_sent, "wordCount": len(content), "pageContent": content, "token_count_estimate": len(tokenize(content)), diff --git a/collector/scripts/watch/convert/as_text.py b/collector/scripts/watch/convert/as_text.py index e6ad85140..1b897874b 100644 --- a/collector/scripts/watch/convert/as_text.py +++ b/collector/scripts/watch/convert/as_text.py @@ -23,6 +23,7 @@ def as_text(**kwargs): 'title': f"{filename}{ext}", 'docAuthor': 'Unknown', # TODO: Find a better author 'description': 'Unknown', # TODO: Find a better description + 'docSource': 'a text file uploaded by the user.', 'chunkSource': f"{filename}{ext}", 'published': file_creation_time(fullpath), 'wordCount': len(content),