Skip to content

Commit

Permalink
Ensure lazy loaded images are grabbed in reader mode (#366)
Browse files Browse the repository at this point in the history
* Extract parsed item local handling

* Ensure lazy loaded images are grabbed in reader mode

- Remove extra call to Jsoup parse
  • Loading branch information
jocmp authored Sep 17, 2024
1 parent 10b7d64 commit 7ca47eb
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 104 deletions.
37 changes: 10 additions & 27 deletions capy/src/main/java/com/jocmp/capy/accounts/LocalAccountDelegate.kt
Original file line number Diff line number Diff line change
Expand Up @@ -153,20 +153,24 @@ class LocalAccountDelegate(

items.forEach { item ->
val publishedAt = published(item.pubDate, fallback = updatedAt).toEpochSecond()
val url = cleanedURL(item.link, feed.siteURL)
val parsedItem = ParsedItem(
item,
siteURL = feed.siteURL
)

val withinCutoff = cutoffDate == null || publishedAt > cutoffDate.toEpochSecond()

if (url != null && withinCutoff) {
if (parsedItem.url != null && withinCutoff) {
database.articlesQueries.create(
id = item.link!!,
feed_id = feed.id,
title = Jsoup.parse(item.title.orEmpty()).text(),
title = parsedItem.title,
author = item.author,
content_html = item.contentHTML,
url = url.toString(),
content_html = parsedItem.contentHTML,
url = parsedItem.url,
summary = item.summary,
extracted_content_url = null,
image_url = cleanedURL(item.image, siteURL = feed.siteURL)?.toString(),
image_url = parsedItem.imageURL,
published_at = publishedAt,
)

Expand All @@ -177,7 +181,6 @@ class LocalAccountDelegate(
)
}
}

}
}

Expand Down Expand Up @@ -237,23 +240,3 @@ internal val RssItem.summary: String?

return Jsoup.parse(it).text()
}

internal fun cleanedURL(inputURL: String?, siteURL: String): URL? {
val url = inputURL.orEmpty()

if (url.isBlank()) {
return null
}

return try {
val uri = URI(url)

if (uri.isAbsolute) {
uri.toURL()
} else {
URI(siteURL).resolve(uri).toURL()
}
} catch (e: Throwable) {
null
}
}
67 changes: 56 additions & 11 deletions capy/src/main/java/com/jocmp/capy/accounts/ParsedItem.kt
Original file line number Diff line number Diff line change
@@ -1,13 +1,58 @@
package com.jocmp.capy.accounts

import java.time.OffsetDateTime

internal data class ParsedItem(
val id: String,
val title: String? = null,
val contentHTML: String? = null,
val url: String? = null,
val summary: String? = null,
val imageURL: String? = null,
val publishedAt: OffsetDateTime?
)
import com.prof18.rssparser.model.RssItem
import org.jsoup.Jsoup
import java.net.URI
import java.net.URL

internal class ParsedItem(private val item: RssItem, private val siteURL: String?) {
val contentHTML: String?
get() {
val currentContent = item.content.orEmpty().ifBlank {
item.description.orEmpty()
}

if (currentContent.isBlank()) {
return null
}

return currentContent
}

val summary: String?
get() = item.description?.let {
if (it.isBlank()) {
null
} else {
Jsoup.parse(it).text()
}
}

val title: String
get() = Jsoup.parse(item.title.orEmpty()).text()

val url: String? = cleanedURL(item.link)?.toString()

val imageURL: String?
get() = cleanedURL(item.image)?.toString()

private fun cleanedURL(inputURL: String?): URL? {
val url = inputURL.orEmpty()

if (url.isBlank()) {
return null
}

return try {
val uri = URI(url)

if (uri.isAbsolute) {
uri.toURL()
} else {
URI(siteURL).resolve(uri).toURL()
}
} catch (e: Throwable) {
null
}
}
}
17 changes: 1 addition & 16 deletions capy/src/main/java/com/jocmp/capy/articles/ArticleRenderer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.jocmp.capy.MacroProcessor
import com.jocmp.capy.preferences.Preference
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import com.jocmp.capy.R as CapyRes

class ArticleRenderer(
Expand Down Expand Up @@ -51,22 +52,6 @@ class ArticleRenderer(
return document.html()
}

private fun cleanLinks(document: Document) {
document.getElementsByTag("img").forEachIndexed { index, element ->
element.attr("src", element.absUrl("src"))
val hasSizing =
element.attr("width").isNotBlank() && element.attr("height").isNotBlank()

if (index > 0 || hasSizing) {
element.attr("loading", "lazy")
}
}

document.select("img[data-src]").forEach { element ->
element.attr("src", element.absUrl("data-src"))
}
}

private fun script(article: Article, extractedContent: ExtractedContent): String {
val content = extractedContent.value()

Expand Down
19 changes: 19 additions & 0 deletions capy/src/main/java/com/jocmp/capy/articles/CleanLinks.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package com.jocmp.capy.articles

import org.jsoup.nodes.Element

internal fun cleanLinks(element: Element) {
element.getElementsByTag("img").forEachIndexed { index, child ->
child.attr("src", child.absUrl("src"))
val hasSizing =
child.attr("width").isNotBlank() && child.attr("height").isNotBlank()

if (index > 0 || hasSizing) {
child.attr("loading", "lazy")
}
}

element.select("img[data-src]").forEach { child ->
child.attr("src", child.absUrl("data-src"))
}
}
10 changes: 5 additions & 5 deletions capy/src/main/java/com/jocmp/capy/articles/ExtractedTemplate.kt
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ fun parseHtml(article: Article, html: String): String {
try {
val uri = (article.feedURL ?: article.url).toString()
val readability4J = Readability4J(uri, html)
val content = readability4J.parse().content ?: return ""
val content = readability4J.parse().articleContent ?: return ""

val document = Jsoup.parse(content)

document.getElementsByClass("readability-styled").forEach { element ->
content.getElementsByClass("readability-styled").forEach { element ->
element.append(" ")
}

return document.body().html()
cleanLinks(content)

return content.html()
} catch (ex: Throwable) {
return ""
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,50 +290,6 @@ class LocalAccountDelegateTest {
) : Feed {
override fun isValid() = true
}

@Test
fun cleanedArticleLink_whenPresent() {
val url = "https://example.com/article"

val result = cleanedURL(inputURL = url, siteURL = "")

assertEquals(expected = url, actual = result.toString())
}

@Test
fun cleanedArticleLink_whenNull() {
val result = cleanedURL(inputURL = null, siteURL = "")

assertEquals(expected = null, actual = result)
}

@Test
fun cleanedArticleLink_whenBlank() {
val result = cleanedURL(inputURL = "", siteURL = "")

assertEquals(expected = null, actual = result)
}

@Test
fun cleanedArticleLink_withRelativePathMissingSiteURL() {
val result = cleanedURL(inputURL = "/article", siteURL = "")

assertEquals(expected = null, actual = result)
}

@Test
fun cleanedArticleLink_withRelativePathAndInvalidSiteURL() {
val result = cleanedURL(inputURL = "/article", siteURL = "wrong")

assertEquals(expected = null, actual = result)
}

@Test
fun cleanedArticleLink_withRelativePathAndValidSiteURL() {
val result = cleanedURL(inputURL = "/article", siteURL = "https://example.com")

assertEquals(expected = "https://example.com/article", actual = result.toString())
}
}

private suspend fun LocalAccountDelegate.addFeed(url: String) =
Expand Down
82 changes: 82 additions & 0 deletions capy/src/test/java/com/jocmp/capy/accounts/ParsedItemTest.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package com.jocmp.capy.accounts

import com.prof18.rssparser.model.RssItem
import kotlin.test.Test
import kotlin.test.assertEquals

class ParsedItemTest {
@Test
fun title_whenPresent() {
val title = "My Plain Title"
val item = RssItem.Builder().title(title).build()
val parsedItem = ParsedItem(item, siteURL = "")

assertEquals(expected = title, actual = parsedItem.title)
}

@Test
fun title_whenPresentAndHTML() {
val title = "My <i>Fancy</i> Title"
val item = RssItem.Builder().title(title).build()
val parsedItem = ParsedItem(item, siteURL = "")

assertEquals(expected = "My Fancy Title", actual = parsedItem.title)
}

@Test
fun title_whenNull() {
val item = RssItem.Builder().title(null).build()
val parsedItem = ParsedItem(item, siteURL = "")

assertEquals(expected = "", actual = parsedItem.title)
}

@Test
fun url_whenPresent() {
val url = "https://example.com/article"
val item = RssItem.Builder().link(url).build()
val parsedItem = ParsedItem(item, siteURL = "")

assertEquals(expected = url, actual = parsedItem.url)
}

@Test
fun url_whenNull() {
val item = RssItem.Builder().link(null).build()
val parsedItem = ParsedItem(item, siteURL = "")

assertEquals(expected = null, actual = parsedItem.url)
}

@Test
fun url_whenBlank() {
val item = RssItem.Builder().link("").build()
val parsedItem = ParsedItem(item, siteURL = "")

assertEquals(expected = null, actual = parsedItem.url)
}

@Test
fun url_withRelativePathMissingSiteURL() {
val item = RssItem.Builder().link("/article").build()
val parsedItem = ParsedItem(item, siteURL = "")

assertEquals(expected = null, actual = parsedItem.url)
}

@Test
fun url_withRelativePathAndInvalidSiteURL() {
val item = RssItem.Builder().link("/article").build()
val parsedItem = ParsedItem(item, siteURL = "wrong")

assertEquals(expected = null, actual = parsedItem.url)
}

@Test
fun url_withRelativePathAndValidSiteURL() {
val item = RssItem.Builder().link("/article").build()
val parsedItem = ParsedItem(item, siteURL = "https://example.com")

assertEquals(expected = "https://example.com/article", actual = parsedItem.url)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ data class RssItem(
val itunesItemData: ItunesItemData?,
val commentsUrl: String?,
) {
internal data class Builder(
data class Builder(
private var guid: String? = null,
private var title: String? = null,
private var author: String? = null,
Expand Down

0 comments on commit 7ca47eb

Please sign in to comment.