Add id to all metadata to prevent errors in frontend document picker (#378)

add id to all metadata to prevent errors in frontend docuemnt picker

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Sean Hatfield 2023-11-16 14:36:26 -08:00 committed by GitHub
parent 73f342eb19
commit f40309cfdb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 20 additions and 14 deletions

View File

@ -1,6 +1,7 @@
import os, json, requests, tempfile
from requests_html import HTMLSession
from langchain.document_loaders import UnstructuredHTMLLoader
from .watch.utils import guid
def fetch_all_publications(subdomain):
file_path = f"./outputs/substack-logs/substack-{subdomain}.json"
@ -75,6 +76,7 @@ def get_content(article_link):
def append_meta(publication, text):
meta = {
'id': guid(),
'url': publication.get('canonical_url'),
'thumbnail': publication.get('cover_image'),
'title': publication.get('title'),

View File

@ -7,6 +7,7 @@ import os, time
import pandas as pd
import json
from .utils import tokenize, ada_v2_cost
from .watch.utils import guid
def twitter():
#get user and number of tweets to read
@ -92,6 +93,7 @@ def twitter_meta(row, metadata_only = False):
url = f"http://twitter.com/anyuser/status/{row['id']}"
title = f"Tweet {row['id']}"
meta = {
'id': guid(),
'url': url,
'title': title,
'description': 'Tweet from ' + row["Screen Name"],

View File

@ -1,6 +1,7 @@
import json, requests, os, re
from slugify import slugify
from dotenv import load_dotenv
from .watch.utils import guid
load_dotenv()
def is_yt_short(videoId):
@ -34,6 +35,7 @@ def clean_text(text):
def append_meta(video, duration, text):
meta = {
'id': guid(),
'youtubeURL': f"https://youtube.com/watch?v={video.get('id')}",
'thumbnail': video.get('thumbnail'),
'description': video.get('description'),