mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-10-05 18:30:09 +02:00
Add id to all metadata to prevent errors in frontend document picker (#378)
add id to all metadata to prevent errors in frontend docuemnt picker Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
73f342eb19
commit
f40309cfdb
@ -1,6 +1,7 @@
|
|||||||
import os, json, requests, tempfile
|
import os, json, requests, tempfile
|
||||||
from requests_html import HTMLSession
|
from requests_html import HTMLSession
|
||||||
from langchain.document_loaders import UnstructuredHTMLLoader
|
from langchain.document_loaders import UnstructuredHTMLLoader
|
||||||
|
from .watch.utils import guid
|
||||||
|
|
||||||
def fetch_all_publications(subdomain):
|
def fetch_all_publications(subdomain):
|
||||||
file_path = f"./outputs/substack-logs/substack-{subdomain}.json"
|
file_path = f"./outputs/substack-logs/substack-{subdomain}.json"
|
||||||
@ -75,6 +76,7 @@ def get_content(article_link):
|
|||||||
|
|
||||||
def append_meta(publication, text):
|
def append_meta(publication, text):
|
||||||
meta = {
|
meta = {
|
||||||
|
'id': guid(),
|
||||||
'url': publication.get('canonical_url'),
|
'url': publication.get('canonical_url'),
|
||||||
'thumbnail': publication.get('cover_image'),
|
'thumbnail': publication.get('cover_image'),
|
||||||
'title': publication.get('title'),
|
'title': publication.get('title'),
|
||||||
|
@ -7,6 +7,7 @@ import os, time
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import json
|
import json
|
||||||
from .utils import tokenize, ada_v2_cost
|
from .utils import tokenize, ada_v2_cost
|
||||||
|
from .watch.utils import guid
|
||||||
|
|
||||||
def twitter():
|
def twitter():
|
||||||
#get user and number of tweets to read
|
#get user and number of tweets to read
|
||||||
@ -92,6 +93,7 @@ def twitter_meta(row, metadata_only = False):
|
|||||||
url = f"http://twitter.com/anyuser/status/{row['id']}"
|
url = f"http://twitter.com/anyuser/status/{row['id']}"
|
||||||
title = f"Tweet {row['id']}"
|
title = f"Tweet {row['id']}"
|
||||||
meta = {
|
meta = {
|
||||||
|
'id': guid(),
|
||||||
'url': url,
|
'url': url,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': 'Tweet from ' + row["Screen Name"],
|
'description': 'Tweet from ' + row["Screen Name"],
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import json, requests, os, re
|
import json, requests, os, re
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from .watch.utils import guid
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
def is_yt_short(videoId):
|
def is_yt_short(videoId):
|
||||||
@ -34,6 +35,7 @@ def clean_text(text):
|
|||||||
|
|
||||||
def append_meta(video, duration, text):
|
def append_meta(video, duration, text):
|
||||||
meta = {
|
meta = {
|
||||||
|
'id': guid(),
|
||||||
'youtubeURL': f"https://youtube.com/watch?v={video.get('id')}",
|
'youtubeURL': f"https://youtube.com/watch?v={video.get('id')}",
|
||||||
'thumbnail': video.get('thumbnail'),
|
'thumbnail': video.get('thumbnail'),
|
||||||
'description': video.get('description'),
|
'description': video.get('description'),
|
||||||
|
Loading…
Reference in New Issue
Block a user