Skip to content

Commit 0fb63da

Browse files
authored
Fix: abstract limit (#268)
Patches sent by mail by @tiosgz. Applied with `git am`. It's my very first time in this kind of [git email workflow](https://git-send-email.io/). Pfiu !. Command used: ```sh git am --3way --ignore-space-change v2-0001-fix-retrieving-article-description.patch git am --3way --ignore-space-change v2-0002-docs-configuration-fix-update-abstract_-chars_cou.patch git am --3way --ignore-space-change v2-0003-tests-add-test-cases-for-abstract_delimiter.patch ``` Supersedes #202 cc @craigbox @YDX-2147483647
2 parents a08052a + d81039d commit 0fb63da

File tree

5 files changed

+107
-41
lines changed

5 files changed

+107
-41
lines changed

docs/configuration.md

+8-10
Original file line numberDiff line numberDiff line change
@@ -240,28 +240,26 @@ Output:
240240

241241
### `abstract_chars_count`: item description length
242242

243-
To fill each [item description element](https://www.w3schools.com/xml/rss_tag_title_link_description_item.asp):
243+
Used, in combination with `abstract_delimiter`, to determine each [item description element](https://www.w3schools.com/xml/rss_tag_title_link_description_item.asp):
244244

245245
- If this value is set to `-1`, then the articles' full HTML content will be filled into the description element.
246-
- be careful: if set to `0` and there is no description, the feed's compliance is broken (an item must have a description)
247246
- Otherwise, the plugin first tries to retrieve the value of the keyword `description` from the [page metadata].
248-
- If the value is non-negative and no `description` meta is found, then the plugin retrieves the first number of characters of the page content defined by this setting. Retrieved content is the raw markdown converted roughly into HTML.
247+
- If that fails and `abstract_delimiter` is found in the page, the article content up to (but not including) the delimiter is used.
248+
- If the above has failed, then the plugin retrieves the first number of characters of the page content defined by this setting. Retrieved content is the raw markdown converted roughly into HTML.
249+
250+
Be careful: if set to `0` and there is no description, the feed's compliance is broken (an item must have a description).
249251

250252
`abstract_chars_count`: number of characters to use as item description.
251253

252254
Default: `150`
253255

254256
----
255257

256-
#### `abstract_delimiter`: abstract delimiter
257-
258-
Used to fill each [item description element](https://www.w3schools.com/xml/rss_tag_title_link_description_item.asp):
258+
### `abstract_delimiter`: abstract delimiter
259259

260-
- If this value is set to `-1`, then the full HTML content will be filled into the description element.
261-
- Otherwise, the plugin first tries to retrieve the value of the key `description` from the page metadata.
262-
- If the value is non-negative and no `description` meta is found, then the plugin retrieves the first number of characters of the page content defined by this setting. Retrieved content is the raw markdown converted rougthly into HTML (i.e. without extension, etc.).
260+
Please see `abstract_chars_count` for how this setting is used. A value of `""` (the empty string) disables this step.
263261

264-
`abstract_delimiter`: string to mark .
262+
`abstract_delimiter`: string to mark where the description ends.
265263

266264
Default: `<!-- more -->`
267265

mkdocs_rss_plugin/util.py

+22-28
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,8 @@ def get_description_or_abstract(
455455
self, in_page: Page, chars_count: int = 160, abstract_delimiter: str = None
456456
) -> str:
457457
"""Returns description from page meta. If it doesn't exist, use the \
458-
{chars_count} first characters from page content (in markdown).
458+
page content up to {abstract_delimiter} or the {chars_count} first \
459+
characters from page content (in markdown).
459460
460461
:param Page in_page: page to look at
461462
:param int chars_count: if page.meta.description is not set, number of chars \
@@ -468,22 +469,16 @@ def get_description_or_abstract(
468469

469470
description = in_page.meta.get("description")
470471

471-
# Set chars_count to None if it is set to be unlimited, for slicing.
472-
if chars_count < 0:
473-
chars_count = None
474-
475-
# If the abstract chars is not unlimited and the description exists,
476-
# return the description.
477-
if description and chars_count is not None:
472+
# If the full page is wanted (unlimited chars count)
473+
if chars_count == -1 and (in_page.content or in_page.markdown):
474+
if in_page.content:
475+
return in_page.content
476+
else:
477+
return markdown.markdown(in_page.markdown, output_format="html5")
478+
# If the description is explicitly given
479+
elif description:
478480
return description
479-
# If no description and chars_count set to 0, return empty string
480-
elif not description and chars_count == 0:
481-
logger.warning(
482-
f"No description set for page {in_page.file.src_uri} "
483-
"and 'abstract_chars_count' set to 0. The feed won't be compliant, "
484-
"because an item must have a description."
485-
)
486-
return ""
481+
# If the abstract is cut by the delimiter
487482
elif (
488483
abstract_delimiter
489484
and (
@@ -495,24 +490,23 @@ def get_description_or_abstract(
495490
in_page.markdown[:excerpt_separator_position],
496491
output_format="html5",
497492
)
498-
# If chars count is unlimited, use the html content
499-
elif in_page.content and chars_count == -1:
500-
if chars_count is None or len(in_page.content) < chars_count:
501-
return in_page.content[:chars_count]
502-
# Use markdown
503-
elif in_page.markdown:
504-
if chars_count is None or len(in_page.markdown) < chars_count:
505-
return markdown.markdown(
506-
in_page.markdown[:chars_count], output_format="html5"
507-
)
493+
# Use first chars_count from the markdown
494+
elif chars_count > 0 and in_page.markdown:
495+
if len(in_page.markdown) <= chars_count:
496+
return markdown.markdown(in_page.markdown, output_format="html5")
508497
else:
509498
return markdown.markdown(
510499
f"{in_page.markdown[: chars_count - 3]}...",
511500
output_format="html5",
512501
)
513-
# Unlimited chars_count but no content is found, then return the description.
502+
# No explicit description and no (or empty) abstract found
514503
else:
515-
return description if description else ""
504+
logger.warning(
505+
f"No description generated from metadata or content of the page {in_page.file.src_uri}, "
506+
"therefore the feed won't be compliant, "
507+
"because an item must have a description."
508+
)
509+
return ""
516510

517511
def get_image(self, in_page: Page, base_url: str) -> Optional[Tuple[str, str, int]]:
518512
"""Get page's image from page meta or social cards and returns properties.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Page without meta with early delimiter
2+
3+
<!-- more -->
4+
5+
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Project information
2+
site_name: MkDocs RSS Plugin - TEST
3+
site_description: Basic setup to test against MkDocs RSS plugin
4+
site_author: Julien Moura (Guts)
5+
site_url: https://guts.github.io/mkdocs-rss-plugin
6+
copyright: "Guts - In Geo Veritas"
7+
8+
# Repository
9+
repo_name: "guts/mkdocs-rss-plugin"
10+
repo_url: "https://github.com/guts/mkdocs-rss-plugin"
11+
12+
use_directory_urls: true
13+
14+
plugins:
15+
- rss:
16+
abstract_delimiter: ""
17+
18+
theme:
19+
name: readthedocs
20+
21+
# Extensions to enhance markdown
22+
markdown_extensions:
23+
- meta

tests/test_build.py

+49-3
Original file line numberDiff line numberDiff line change
@@ -403,9 +403,55 @@ def test_simple_build_item_length_unlimited(self):
403403
"Page without meta with short text",
404404
"Blog sample",
405405
):
406-
self.assertGreaterEqual(
407-
len(feed_item.description), 150, feed_item.title
408-
)
406+
self.assertGreater(len(feed_item.description), 150, feed_item.title)
407+
408+
def test_simple_build_item_delimiter(self):
409+
with tempfile.TemporaryDirectory() as tmpdirname:
410+
cli_result = self.build_docs_setup(
411+
testproject_path="docs",
412+
mkdocs_yml_filepath=Path("tests/fixtures/mkdocs_minimal.yml"),
413+
output_path=tmpdirname,
414+
strict=True,
415+
)
416+
if cli_result.exception is not None:
417+
e = cli_result.exception
418+
logger.debug(format_exception(type(e), e, e.__traceback__))
419+
420+
self.assertEqual(cli_result.exit_code, 0)
421+
self.assertIsNone(cli_result.exception)
422+
423+
# created items
424+
feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_RSS_FEED_CREATED)
425+
self.assertEqual(feed_parsed.bozo, 0)
426+
427+
for feed_item in feed_parsed.entries:
428+
if feed_item.title in ("Page without meta with early delimiter",):
429+
self.assertLess(len(feed_item.description), 50, feed_item.title)
430+
431+
def test_simple_build_item_delimiter_empty(self):
432+
with tempfile.TemporaryDirectory() as tmpdirname:
433+
cli_result = self.build_docs_setup(
434+
testproject_path="docs",
435+
mkdocs_yml_filepath=Path(
436+
"tests/fixtures/mkdocs_item_delimiter_empty.yml"
437+
),
438+
output_path=tmpdirname,
439+
strict=True,
440+
)
441+
if cli_result.exception is not None:
442+
e = cli_result.exception
443+
logger.debug(format_exception(type(e), e, e.__traceback__))
444+
445+
self.assertEqual(cli_result.exit_code, 0)
446+
self.assertIsNone(cli_result.exception)
447+
448+
# created items
449+
feed_parsed = feedparser.parse(Path(tmpdirname) / OUTPUT_RSS_FEED_CREATED)
450+
self.assertEqual(feed_parsed.bozo, 0)
451+
452+
for feed_item in feed_parsed.entries:
453+
if feed_item.title in ("Page without meta with early delimiter",):
454+
self.assertGreater(len(feed_item.description), 150, feed_item.title)
409455

410456
def test_simple_build_locale_with_territory(self):
411457
with tempfile.TemporaryDirectory() as tmpdirname:

0 commit comments

Comments
 (0)