Skip to content

Commit d40d8b7

Browse files
committed
fix: #583
1 parent ead48d3 commit d40d8b7

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

crawl4ai/html2text/__init__.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
510510

511511
if tag == "a" and not self.ignore_links:
512512
if start:
513+
self.inside_link = True
513514
if (
514515
"href" in attrs
515516
and attrs["href"] is not None
@@ -526,6 +527,7 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
526527
else:
527528
self.astack.append(None)
528529
else:
530+
self.inside_link = False
529531
if self.astack:
530532
a = self.astack.pop()
531533
if self.maybe_automatic_link and not self.empty_link:
@@ -1035,6 +1037,7 @@ def __init__(self, *args, handle_code_in_pre=False, **kwargs):
10351037
super().__init__(*args, **kwargs)
10361038
self.inside_pre = False
10371039
self.inside_code = False
1040+
self.inside_link = False
10381041
self.preserve_tags = set() # Set of tags to preserve
10391042
self.current_preserved_tag = None
10401043
self.preserved_content = []
@@ -1114,11 +1117,17 @@ def handle_tag(self, tag, attrs, start):
11141117
# Ignore code tags inside pre blocks if handle_code_in_pre is False
11151118
return
11161119
if start:
1117-
self.o("`") # Markdown inline code start
1120+
if not self.inside_link:
1121+
self.o("`") # Only output backtick if not inside a link
11181122
self.inside_code = True
11191123
else:
1120-
self.o("`") # Markdown inline code end
1124+
if not self.inside_link:
1125+
self.o("`") # Only output backtick if not inside a link
11211126
self.inside_code = False
1127+
1128+
# If inside a link, let the parent class handle the content
1129+
if self.inside_link:
1130+
super().handle_tag(tag, attrs, start)
11221131
else:
11231132
super().handle_tag(tag, attrs, start)
11241133

0 commit comments

Comments
 (0)