Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add extension point for excerpt generation #6348

Merged
merged 3 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions api/src/main/java/run/halo/app/content/ExcerptGenerator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package run.halo.app.content;

import java.util.Set;
import lombok.Data;
import lombok.experimental.Accessors;
import org.pf4j.ExtensionPoint;
import reactor.core.publisher.Mono;

public interface ExcerptGenerator extends ExtensionPoint {

Mono<String> generate(ExcerptGenerator.Context context);

@Data
@Accessors(chain = true)
class Context {
private String raw;
/**
* html content.
*/
private String content;

private String rawType;
/**
* keywords in the content to help the excerpt generation more accurate.
*/
private Set<String> keywords;
/**
* Max length of the generated excerpt.
*/
private int maxLength;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,18 @@
import java.util.Optional;
import java.util.Set;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.springframework.context.ApplicationEvent;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component;
import reactor.core.publisher.Mono;
import run.halo.app.content.CategoryService;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.PostService;
import run.halo.app.content.comment.CommentService;
Expand Down Expand Up @@ -62,6 +65,7 @@
import run.halo.app.metrics.CounterService;
import run.halo.app.metrics.MeterUtils;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;

/**
* <p>Reconciler for {@link Post}.</p>
Expand All @@ -75,6 +79,7 @@
* @author guqing
* @since 2.0.0
*/
@Slf4j
@AllArgsConstructor
@Component
public class PostReconciler implements Reconciler<Reconciler.Request> {
Expand All @@ -85,6 +90,7 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
private final CounterService counterService;
private final CommentService commentService;
private final CategoryService categoryService;
private final ExtensionGetter extensionGetter;

private final ApplicationEventPublisher eventPublisher;
private final NotificationCenter notificationCenter;
Expand Down Expand Up @@ -155,14 +161,7 @@ public Result reconcile(Request request) {
}
var isAutoGenerate = defaultIfNull(excerpt.getAutoGenerate(), true);
if (isAutoGenerate) {
Optional<ContentWrapper> contentWrapper =
postService.getContent(post.getSpec().getReleaseSnapshot(),
post.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isPresent()) {
String contentRevised = contentWrapper.get().getContent();
status.setExcerpt(getExcerpt(contentRevised));
}
status.setExcerpt(getExcerpt(post));
} else {
status.setExcerpt(excerpt.getRaw());
}
Expand Down Expand Up @@ -375,11 +374,41 @@ private void cleanUpResources(Post post) {
.block();
}

private String getExcerpt(String htmlContent) {
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
// TODO The default capture 150 words as excerpt
return StringUtils.substring(text, 0, 150);
private String getExcerpt(Post post) {
Optional<ContentWrapper> contentWrapper =
postService.getContent(post.getSpec().getReleaseSnapshot(),
post.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isEmpty()) {
return StringUtils.EMPTY;
}
var content = contentWrapper.get();
var tags = post.getSpec().getTags();
var context = new ExcerptGenerator.Context()
.setRaw(content.getRaw())
.setContent(content.getContent())
.setRaw(content.getRawType())
.setKeywords(tags == null ? Set.of() : new HashSet<>(tags))
.setMaxLength(160);
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
.defaultIfEmpty(new DefaultExcerptGenerator())
.flatMap(generator -> generator.generate(context))
.onErrorResume(Throwable.class, e -> {
log.error("Failed to generate excerpt for post [{}]",
post.getMetadata().getName(), e);
return Mono.empty();
})
.blockOptional()
.orElse(StringUtils.EMPTY);
}
guqing marked this conversation as resolved.
Show resolved Hide resolved

static class DefaultExcerptGenerator implements ExcerptGenerator {
@Override
public Mono<String> generate(Context context) {
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
return Mono.just(StringUtils.substring(text, 0, 150));
}
}

List<Snapshot> listSnapshots(Ref ref) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component;
import org.springframework.util.Assert;
import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.SinglePageService;
import run.halo.app.content.comment.CommentService;
Expand All @@ -43,6 +46,7 @@
import run.halo.app.metrics.CounterService;
import run.halo.app.metrics.MeterUtils;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;

/**
* <p>Reconciler for {@link SinglePage}.</p>
Expand All @@ -65,6 +69,7 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
private final SinglePageService singlePageService;
private final CounterService counterService;
private final CommentService commentService;
private final ExtensionGetter extensionGetter;

private final ExternalUrlSupplier externalUrlSupplier;

Expand Down Expand Up @@ -318,12 +323,7 @@ private void reconcileStatus(String name) {
}

if (excerpt.getAutoGenerate()) {
singlePageService.getContent(spec.getHeadSnapshot(), spec.getBaseSnapshot())
.blockOptional()
.ifPresent(content -> {
String contentRevised = content.getContent();
status.setExcerpt(getExcerpt(contentRevised));
});
status.setExcerpt(getExcerpt(singlePage));
} else {
status.setExcerpt(excerpt.getRaw());
}
Expand Down Expand Up @@ -363,11 +363,40 @@ private void reconcileStatus(String name) {
});
}

private String getExcerpt(String htmlContent) {
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
// TODO The default capture 150 words as excerpt
return StringUtils.substring(text, 0, 150);
private String getExcerpt(SinglePage singlePage) {
Optional<ContentWrapper> contentWrapper =
singlePageService.getContent(singlePage.getSpec().getReleaseSnapshot(),
singlePage.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isEmpty()) {
return StringUtils.EMPTY;
}
var content = contentWrapper.get();
var context = new ExcerptGenerator.Context()
.setRaw(content.getRaw())
guqing marked this conversation as resolved.
Show resolved Hide resolved
.setContent(content.getContent())
.setRaw(content.getRawType())
.setKeywords(Set.of())
.setMaxLength(160);
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
.defaultIfEmpty(new DefaultExcerptGenerator())
.flatMap(generator -> generator.generate(context))
.onErrorResume(Throwable.class, e -> {
log.error("Failed to generate excerpt for single page [{}]",
singlePage.getMetadata().getName(), e);
return Mono.empty();
})
.blockOptional()
.orElse(StringUtils.EMPTY);
}

static class DefaultExcerptGenerator implements ExcerptGenerator {
@Override
public Mono<String> generate(Context context) {
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
return Mono.just(StringUtils.substring(text, 0, 150));
}
}

private boolean isDeleted(SinglePage singlePage) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,13 @@ spec:
displayName: 页脚标签内容处理器
type: MULTI_INSTANCE
description: "提供用于扩展 <halo:footer/> 标签内容的扩展方式。"

---
apiVersion: plugin.halo.run/v1alpha1
kind: ExtensionPointDefinition
metadata:
name: excerpt-generator
spec:
className: run.halo.app.content.ExcerptGenerator
displayName: 摘要生成器
type: SINGLETON
description: "提供自动生成摘要的方式扩展,如使用算法提取或使用 AI 生成。"
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.springframework.context.ApplicationEventPublisher;
import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.PostService;
import run.halo.app.content.TestPost;
Expand All @@ -36,6 +37,7 @@
import run.halo.app.extension.ExtensionClient;
import run.halo.app.extension.controller.Reconciler;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;

/**
* Tests for {@link PostReconciler}.
Expand All @@ -61,6 +63,9 @@ class PostReconcilerTest {
@Mock
private NotificationCenter notificationCenter;

@Mock
private ExtensionGetter extensionGetter;

@InjectMocks
private PostReconciler postReconciler;

Expand Down Expand Up @@ -96,7 +101,7 @@ void reconcile() {
verify(postPermalinkPolicy, times(1)).permalink(any());

Post value = captor.getValue();
assertThat(value.getStatus().getExcerpt()).isNull();
assertThat(value.getStatus().getExcerpt()).isEmpty();
assertThat(value.getStatus().getContributors()).isEqualTo(List.of("guqing", "zhangsan"));
}

Expand Down Expand Up @@ -126,6 +131,9 @@ void reconcileExcerpt() {
Snapshot snapshotV1 = TestPost.snapshotV1();
snapshotV1.getSpec().setContributors(Set.of("guqing"));

when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());

when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of(snapshotV1, snapshotV2));

Expand Down Expand Up @@ -162,6 +170,9 @@ void reconcileLastModifyTimeWhenPostIsPublished() {
when(client.fetch(eq(Snapshot.class), eq(post.getSpec().getReleaseSnapshot())))
.thenReturn(Optional.of(snapshotV2));

when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());

when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());

Expand Down Expand Up @@ -191,6 +202,9 @@ void reconcileLastModifyTimeWhenPostIsNotPublished() {
.rawType("markdown")
.build()));

when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());

when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.springframework.context.ApplicationContext;
import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.SinglePageService;
import run.halo.app.content.TestPost;
Expand All @@ -39,6 +40,7 @@
import run.halo.app.infra.ExternalUrlSupplier;
import run.halo.app.metrics.CounterService;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;

/**
* Tests for {@link SinglePageReconciler}.
Expand Down Expand Up @@ -66,6 +68,9 @@ class SinglePageReconcilerTest {
@Mock
NotificationCenter notificationCenter;

@Mock
ExtensionGetter extensionGetter;

@InjectMocks
private SinglePageReconciler singlePageReconciler;

Expand All @@ -79,9 +84,10 @@ void reconcile() {
String name = "page-A";
SinglePage page = pageV1();
page.getSpec().setHeadSnapshot("page-A-head-snapshot");
page.getSpec().setReleaseSnapshot(page.getSpec().getHeadSnapshot());
when(client.fetch(eq(SinglePage.class), eq(name)))
.thenReturn(Optional.of(page));
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()),
when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
eq(page.getSpec().getBaseSnapshot())))
.thenReturn(Mono.just(ContentWrapper.builder()
.snapshotName(page.getSpec().getHeadSnapshot())
Expand All @@ -99,6 +105,9 @@ void reconcile() {
.thenReturn(List.of(snapshotV1, snapshotV2));
when(externalUrlSupplier.get()).thenReturn(URI.create(""));

when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());

ArgumentCaptor<SinglePage> captor = ArgumentCaptor.forClass(SinglePage.class);
singlePageReconciler.reconcile(new Reconciler.Request(name));

Expand Down Expand Up @@ -141,7 +150,7 @@ void reconcileLastModifyTimeWhenPageIsPublished() {
page.getSpec().setReleaseSnapshot("page-fake-released-snapshot");
when(client.fetch(eq(SinglePage.class), eq(name)))
.thenReturn(Optional.of(page));
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()),
when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
eq(page.getSpec().getBaseSnapshot())))
.thenReturn(Mono.just(ContentWrapper.builder()
.snapshotName(page.getSpec().getHeadSnapshot())
Expand All @@ -156,6 +165,9 @@ void reconcileLastModifyTimeWhenPageIsPublished() {
when(client.fetch(eq(Snapshot.class), eq(page.getSpec().getReleaseSnapshot())))
.thenReturn(Optional.of(snapshotV2));

when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());

when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());

Expand All @@ -176,7 +188,7 @@ void reconcileLastModifyTimeWhenPageIsNotPublished() {
page.getSpec().setPublish(false);
when(client.fetch(eq(SinglePage.class), eq(name)))
.thenReturn(Optional.of(page));
when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()),
when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()),
eq(page.getSpec().getBaseSnapshot())))
.thenReturn(Mono.just(ContentWrapper.builder()
.snapshotName(page.getSpec().getHeadSnapshot())
Expand All @@ -186,6 +198,9 @@ void reconcileLastModifyTimeWhenPageIsNotPublished() {
.build())
);

when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());

when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());

Expand Down
Loading