mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-19 04:30:10 +01:00
689 links in citation (#715)
* Include links in citations force ChunkSource key to retain this information old links will be unsupported * show special icons depending on source * remove console log * reset server documents writeTo
This commit is contained in:
parent
f4b09a8c79
commit
d52f8aafd4
@ -29,7 +29,7 @@ async function scrapeGenericUrl(link) {
|
|||||||
docAuthor: "no author found",
|
docAuthor: "no author found",
|
||||||
description: "No description found.",
|
description: "No description found.",
|
||||||
docSource: "URL link uploaded by the user.",
|
docSource: "URL link uploaded by the user.",
|
||||||
chunkSource: slugify(link) + ".html",
|
chunkSource: `link://${link}`,
|
||||||
published: new Date().toLocaleString(),
|
published: new Date().toLocaleString(),
|
||||||
wordCount: content.split(" ").length,
|
wordCount: content.split(" ").length,
|
||||||
pageContent: content,
|
pageContent: content,
|
||||||
|
@ -58,7 +58,7 @@ async function asAudio({ fullFilePath = "", filename = "" }) {
|
|||||||
docAuthor: "no author found",
|
docAuthor: "no author found",
|
||||||
description: "No description found.",
|
description: "No description found.",
|
||||||
docSource: "pdf file uploaded by the user.",
|
docSource: "pdf file uploaded by the user.",
|
||||||
chunkSource: filename,
|
chunkSource: "",
|
||||||
published: createdDate(fullFilePath),
|
published: createdDate(fullFilePath),
|
||||||
wordCount: content.split(" ").length,
|
wordCount: content.split(" ").length,
|
||||||
pageContent: content,
|
pageContent: content,
|
||||||
|
@ -39,7 +39,7 @@ async function asDocX({ fullFilePath = "", filename = "" }) {
|
|||||||
docAuthor: "no author found",
|
docAuthor: "no author found",
|
||||||
description: "No description found.",
|
description: "No description found.",
|
||||||
docSource: "pdf file uploaded by the user.",
|
docSource: "pdf file uploaded by the user.",
|
||||||
chunkSource: filename,
|
chunkSource: "",
|
||||||
published: createdDate(fullFilePath),
|
published: createdDate(fullFilePath),
|
||||||
wordCount: content.split(" ").length,
|
wordCount: content.split(" ").length,
|
||||||
pageContent: content,
|
pageContent: content,
|
||||||
|
@ -49,7 +49,7 @@ async function asMbox({ fullFilePath = "", filename = "" }) {
|
|||||||
docAuthor: mail?.from?.text,
|
docAuthor: mail?.from?.text,
|
||||||
description: "No description found.",
|
description: "No description found.",
|
||||||
docSource: "Mbox message file uploaded by the user.",
|
docSource: "Mbox message file uploaded by the user.",
|
||||||
chunkSource: filename,
|
chunkSource: "",
|
||||||
published: createdDate(fullFilePath),
|
published: createdDate(fullFilePath),
|
||||||
wordCount: content.split(" ").length,
|
wordCount: content.split(" ").length,
|
||||||
pageContent: content,
|
pageContent: content,
|
||||||
|
@ -34,7 +34,7 @@ async function asOfficeMime({ fullFilePath = "", filename = "" }) {
|
|||||||
docAuthor: "no author found",
|
docAuthor: "no author found",
|
||||||
description: "No description found.",
|
description: "No description found.",
|
||||||
docSource: "Office file uploaded by the user.",
|
docSource: "Office file uploaded by the user.",
|
||||||
chunkSource: filename,
|
chunkSource: "",
|
||||||
published: createdDate(fullFilePath),
|
published: createdDate(fullFilePath),
|
||||||
wordCount: content.split(" ").length,
|
wordCount: content.split(" ").length,
|
||||||
pageContent: content,
|
pageContent: content,
|
||||||
|
@ -44,7 +44,7 @@ async function asPDF({ fullFilePath = "", filename = "" }) {
|
|||||||
docAuthor: docs[0]?.metadata?.pdf?.info?.Creator || "no author found",
|
docAuthor: docs[0]?.metadata?.pdf?.info?.Creator || "no author found",
|
||||||
description: "No description found.",
|
description: "No description found.",
|
||||||
docSource: "pdf file uploaded by the user.",
|
docSource: "pdf file uploaded by the user.",
|
||||||
chunkSource: filename,
|
chunkSource: "",
|
||||||
published: createdDate(fullFilePath),
|
published: createdDate(fullFilePath),
|
||||||
wordCount: content.split(" ").length,
|
wordCount: content.split(" ").length,
|
||||||
pageContent: content,
|
pageContent: content,
|
||||||
|
@ -34,7 +34,7 @@ async function asTxt({ fullFilePath = "", filename = "" }) {
|
|||||||
docAuthor: "Unknown", // TODO: Find a better author
|
docAuthor: "Unknown", // TODO: Find a better author
|
||||||
description: "Unknown", // TODO: Find a better description
|
description: "Unknown", // TODO: Find a better description
|
||||||
docSource: "a text file uploaded by the user.",
|
docSource: "a text file uploaded by the user.",
|
||||||
chunkSource: filename,
|
chunkSource: "",
|
||||||
published: createdDate(fullFilePath),
|
published: createdDate(fullFilePath),
|
||||||
wordCount: content.split(" ").length,
|
wordCount: content.split(" ").length,
|
||||||
pageContent: content,
|
pageContent: content,
|
||||||
|
@ -45,8 +45,8 @@ async function loadGithubRepo(args) {
|
|||||||
title: doc.metadata.source,
|
title: doc.metadata.source,
|
||||||
docAuthor: repo.author,
|
docAuthor: repo.author,
|
||||||
description: "No description found.",
|
description: "No description found.",
|
||||||
docSource: repo.repo,
|
docSource: doc.metadata.source,
|
||||||
chunkSource: doc.metadata.source,
|
chunkSource: `link://${doc.metadata.repository}/blob/${doc.metadata.branch}/${doc.metadata.source}`,
|
||||||
published: new Date().toLocaleString(),
|
published: new Date().toLocaleString(),
|
||||||
wordCount: doc.pageContent.split(" ").length,
|
wordCount: doc.pageContent.split(" ").length,
|
||||||
pageContent: doc.pageContent,
|
pageContent: doc.pageContent,
|
||||||
|
@ -68,7 +68,7 @@ async function loadYouTubeTranscript({ url }) {
|
|||||||
docAuthor: metadata.author,
|
docAuthor: metadata.author,
|
||||||
description: metadata.description,
|
description: metadata.description,
|
||||||
docSource: url,
|
docSource: url,
|
||||||
chunkSource: url,
|
chunkSource: `link://${url}`,
|
||||||
published: new Date().toLocaleString(),
|
published: new Date().toLocaleString(),
|
||||||
wordCount: content.split(" ").length,
|
wordCount: content.split(" ").length,
|
||||||
pageContent: content,
|
pageContent: content,
|
||||||
|
@ -1,22 +1,31 @@
|
|||||||
import { memo, useState } from "react";
|
import { memo, useState } from "react";
|
||||||
import { X } from "@phosphor-icons/react";
|
|
||||||
import { v4 } from "uuid";
|
import { v4 } from "uuid";
|
||||||
import { decode as HTMLDecode } from "he";
|
import { decode as HTMLDecode } from "he";
|
||||||
import { CaretRight, FileText } from "@phosphor-icons/react";
|
import { CaretRight, FileText } from "@phosphor-icons/react";
|
||||||
import truncate from "truncate";
|
import truncate from "truncate";
|
||||||
import ModalWrapper from "@/components/ModalWrapper";
|
import ModalWrapper from "@/components/ModalWrapper";
|
||||||
|
import { middleTruncate } from "@/utils/directories";
|
||||||
|
import {
|
||||||
|
ArrowSquareOut,
|
||||||
|
GithubLogo,
|
||||||
|
Link,
|
||||||
|
X,
|
||||||
|
YoutubeLogo,
|
||||||
|
} from "@phosphor-icons/react";
|
||||||
|
|
||||||
function combineLikeSources(sources) {
|
function combineLikeSources(sources) {
|
||||||
const combined = {};
|
const combined = {};
|
||||||
sources.forEach((source) => {
|
sources.forEach((source) => {
|
||||||
const { id, title, text } = source;
|
const { id, title, text, chunkSource = "" } = source;
|
||||||
if (combined.hasOwnProperty(title)) {
|
if (combined.hasOwnProperty(title)) {
|
||||||
combined[title].text += `\n\n ---- Chunk ${id || ""} ---- \n\n${text}`;
|
combined[title].text += `\n\n ---- Chunk ${id || ""} ---- \n\n${text}`;
|
||||||
combined[title].references += 1;
|
combined[title].references += 1;
|
||||||
|
combined[title].chunkSource = chunkSource;
|
||||||
} else {
|
} else {
|
||||||
combined[title] = { title, text, references: 1 };
|
combined[title] = { title, text, chunkSource, references: 1 };
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
return Object.values(combined);
|
return Object.values(combined);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -41,10 +50,10 @@ export default function Citations({ sources = [] }) {
|
|||||||
/>
|
/>
|
||||||
</button>
|
</button>
|
||||||
{open && (
|
{open && (
|
||||||
<div className="flex flex-wrap md:flex-row flex-col items-center gap-4 overflow-x-scroll mt-1 doc__source">
|
<div className="flex flex-wrap md:flex-row flex-col md:items-center gap-4 overflow-x-scroll mt-1 doc__source">
|
||||||
{combineLikeSources(sources).map((source) => (
|
{combineLikeSources(sources).map((source) => (
|
||||||
<Citation
|
<Citation
|
||||||
key={source?.id || v4()}
|
key={v4()}
|
||||||
source={source}
|
source={source}
|
||||||
onClick={() => setSelectedSource(source)}
|
onClick={() => setSelectedSource(source)}
|
||||||
/>
|
/>
|
||||||
@ -64,16 +73,18 @@ export default function Citations({ sources = [] }) {
|
|||||||
const Citation = memo(({ source, onClick }) => {
|
const Citation = memo(({ source, onClick }) => {
|
||||||
const { title } = source;
|
const { title } = source;
|
||||||
if (!title) return null;
|
if (!title) return null;
|
||||||
|
const chunkSourceInfo = parseChunkSource(source);
|
||||||
const truncatedTitle = truncateMiddle(title);
|
const truncatedTitle = chunkSourceInfo?.text ?? middleTruncate(title, 25);
|
||||||
|
const CitationIcon = ICONS.hasOwnProperty(chunkSourceInfo?.icon)
|
||||||
|
? ICONS[chunkSourceInfo.icon]
|
||||||
|
: ICONS.file;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div
|
<div
|
||||||
className="flex flex-row justify-center items-center cursor-pointer text-sky-400"
|
className="w-fit flex flex-row justify-center items-center cursor-pointer text-sky-400"
|
||||||
style={{ width: "24%" }}
|
|
||||||
onClick={onClick}
|
onClick={onClick}
|
||||||
>
|
>
|
||||||
<FileText className="w-6 h-6" weight="bold" />
|
<CitationIcon className="w-6 h-6" weight="bold" />
|
||||||
<p className="text-sm font-medium whitespace-nowrap">{truncatedTitle}</p>
|
<p className="text-sm font-medium whitespace-nowrap">{truncatedTitle}</p>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
@ -99,14 +110,31 @@ function SkeletonLine() {
|
|||||||
|
|
||||||
function CitationDetailModal({ source, onClose }) {
|
function CitationDetailModal({ source, onClose }) {
|
||||||
const { references, title, text } = source;
|
const { references, title, text } = source;
|
||||||
|
const { isUrl, text: webpageUrl, href: linkTo } = parseChunkSource(source);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<ModalWrapper isOpen={source}>
|
<ModalWrapper isOpen={source}>
|
||||||
<div className="w-full max-w-2xl bg-main-gradient rounded-lg shadow border border-white/10 overflow-hidden">
|
<div className="w-full max-w-2xl bg-main-gradient rounded-lg shadow border border-white/10 overflow-hidden">
|
||||||
<div className="relative p-6 border-b rounded-t border-gray-500/50">
|
<div className="relative p-6 border-b rounded-t border-gray-500/50">
|
||||||
<h3 className="text-xl font-semibold text-white overflow-hidden overflow-ellipsis whitespace-nowrap">
|
<div className="w-full flex gap-x-2 items-center">
|
||||||
{truncate(title, 45)}
|
{isUrl ? (
|
||||||
</h3>
|
<a
|
||||||
|
href={linkTo}
|
||||||
|
target="_blank"
|
||||||
|
rel="noreferrer"
|
||||||
|
className="text-xl font-semibold text-white overflow-hidden overflow-ellipsis whitespace-nowrap hover:underline hover:text-blue-300 flex items-center gap-x-1"
|
||||||
|
>
|
||||||
|
<h3 className="flex items-center gap-x-1">
|
||||||
|
{webpageUrl}
|
||||||
|
<ArrowSquareOut />
|
||||||
|
</h3>
|
||||||
|
</a>
|
||||||
|
) : (
|
||||||
|
<h3 className="text-xl font-semibold text-white overflow-hidden overflow-ellipsis whitespace-nowrap">
|
||||||
|
{truncate(title, 45)}
|
||||||
|
</h3>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
{references > 1 && (
|
{references > 1 && (
|
||||||
<p className="text-xs text-gray-400 mt-2">
|
<p className="text-xs text-gray-400 mt-2">
|
||||||
Referenced {references} times.
|
Referenced {references} times.
|
||||||
@ -141,11 +169,47 @@ function CitationDetailModal({ source, onClose }) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
function truncateMiddle(title) {
|
const ICONS = {
|
||||||
if (title.length <= 18) return title;
|
file: FileText,
|
||||||
|
link: Link,
|
||||||
|
youtube: YoutubeLogo,
|
||||||
|
github: GithubLogo,
|
||||||
|
};
|
||||||
|
|
||||||
const startStr = title.substr(0, 9);
|
// Show the correct title and/or display text for citations
|
||||||
const endStr = title.substr(-9);
|
// which contain valid outbound links that can be clicked by the
|
||||||
|
// user when viewing a citation. Optionally allows various icons
|
||||||
|
// to show distinct types of sources.
|
||||||
|
function parseChunkSource({ title = "", chunkSource = "" }) {
|
||||||
|
const nullResponse = {
|
||||||
|
isUrl: false,
|
||||||
|
text: null,
|
||||||
|
href: null,
|
||||||
|
icon: "file",
|
||||||
|
};
|
||||||
|
|
||||||
return `${startStr}...${endStr}`;
|
if (!chunkSource.startsWith("link://")) return nullResponse;
|
||||||
|
try {
|
||||||
|
const url = new URL(chunkSource.split("link://")[1]);
|
||||||
|
let text = url.host + url.pathname;
|
||||||
|
let icon = "link";
|
||||||
|
|
||||||
|
if (url.host.includes("youtube.com")) {
|
||||||
|
text = title;
|
||||||
|
icon = "youtube";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (url.host.includes("github.com")) {
|
||||||
|
text = title;
|
||||||
|
icon = "github";
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
isUrl: true,
|
||||||
|
href: url.toString(),
|
||||||
|
text,
|
||||||
|
icon,
|
||||||
|
};
|
||||||
|
} catch {}
|
||||||
|
return nullResponse;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user