From 2bd07726854e184a0f2ed0a79871901b16206fcb Mon Sep 17 00:00:00 2001
From: Joseph Myers <jsm@polyomino.org.uk>
Date: Thu, 4 Apr 2024 18:55:54 +0000
Subject: [PATCH 01/11] Avoid inline styles inside `<code>` / `<pre>`
 conversion (#117)

* Avoid inline styles inside `<code>` / `<pre>` conversion

The check used for this is analogous to that used to avoid escaping
potential markup characters inside such tags.

Fixes #103

---------

Co-authored-by: AlexVonB <AlexVonB@users.noreply.github.com>
---
 markdownify/__init__.py   |  2 ++
 tests/test_conversions.py | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+)
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 86226d2..0945916 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -48,6 +48,8 @@ def abstract_inline_conversion(markup_fn):
     """
     def implementation(self, el, text, convert_as_inline):
         markup = markup_fn(self)
+        if el.find_parent(['pre', 'code', 'kbd', 'samp']):
+            return text
         prefix, suffix, text = chomp(text)
         if not text:
             return ''
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 1e685f3..9652143 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -87,6 +87,16 @@ def test_code():
     assert md('<code><span>*this_should_not_escape*</span></code>') == '`*this_should_not_escape*`'
     assert md('<code>this  should\t\tnormalize</code>') == '`this should normalize`'
     assert md('<code><span>this  should\t\tnormalize</span></code>') == '`this should normalize`'
+    assert md('<code>foo<b>bar</b>baz</code>') == '`foobarbaz`'
+    assert md('<kbd>foo<i>bar</i>baz</kbd>') == '`foobarbaz`'
+    assert md('<samp>foo<del> bar </del>baz</samp>') == '`foo bar baz`'
+    assert md('<samp>foo <del>bar</del> baz</samp>') == '`foo bar baz`'
+    assert md('<code>foo<em> bar </em>baz</code>') == '`foo bar baz`'
+    assert md('<code>foo<code> bar </code>baz</code>') == '`foo bar baz`'
+    assert md('<code>foo<strong> bar </strong>baz</code>') == '`foo bar baz`'
+    assert md('<code>foo<s> bar </s>baz</code>') == '`foo bar baz`'
+    assert md('<code>foo<sup>bar</sup>baz</code>', sup_symbol='^') == '`foobarbaz`'
+    assert md('<code>foo<sub>bar</sub>baz</code>', sub_symbol='^') == '`foobarbaz`'
 
 
 def test_del():
@@ -215,6 +225,17 @@ def test_pre():
     assert md('<pre><span>*this_should_not_escape*</span></pre>') == '\n```\n*this_should_not_escape*\n```\n'
     assert md('<pre>\t\tthis  should\t\tnot  normalize</pre>') == '\n```\n\t\tthis  should\t\tnot  normalize\n```\n'
     assert md('<pre><span>\t\tthis  should\t\tnot  normalize</span></pre>') == '\n```\n\t\tthis  should\t\tnot  normalize\n```\n'
+    assert md('<pre>foo<b>\nbar\n</b>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
+    assert md('<pre>foo<i>\nbar\n</i>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
+    assert md('<pre>foo\n<i>bar</i>\nbaz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
+    assert md('<pre>foo<i>\n</i>baz</pre>') == '\n```\nfoo\nbaz\n```\n'
+    assert md('<pre>foo<del>\nbar\n</del>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
+    assert md('<pre>foo<em>\nbar\n</em>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
+    assert md('<pre>foo<code>\nbar\n</code>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
+    assert md('<pre>foo<strong>\nbar\n</strong>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
+    assert md('<pre>foo<s>\nbar\n</s>baz</pre>') == '\n```\nfoo\nbar\nbaz\n```\n'
+    assert md('<pre>foo<sup>\nbar\n</sup>baz</pre>', sup_symbol='^') == '\n```\nfoo\nbar\nbaz\n```\n'
+    assert md('<pre>foo<sub>\nbar\n</sub>baz</pre>', sub_symbol='^') == '\n```\nfoo\nbar\nbaz\n```\n'
 
 
 def test_script():

From 46af45bb3c392180c254a3f97f6bcb8bfecb8116 Mon Sep 17 00:00:00 2001
From: Joseph Myers <jsm@polyomino.org.uk>
Date: Thu, 4 Apr 2024 19:42:58 +0000
Subject: [PATCH 02/11] Escape all characters with Markdown significance (#118)

* Escape all characters with Markdown significance

There are many punctuation characters that sometimes have significance
in Markdown; more systematically escape them all (based on a new
escape_misc configuration option).

A limited attempt is made to limit the escaping of '.' and ')' to the
context where they might have Markdown significance (after a number,
where they can indicate an ordered list item); no such attempt is made
for the other characters (and even that limiting of '.' and ')' may
not be entirely safe in all cases, as it's possible the HTML could
have the number outside the block being escaped in one go,
e.g. `<span>1</span>.`.

---------

Co-authored-by: AlexVonB <AlexVonB@users.noreply.github.com>
---
 README.rst              |  5 +++++
 markdownify/__init__.py |  4 ++++
 tests/test_escaping.py  | 23 +++++++++++++++++++++--
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index 51888ea..a0cd678 100644
--- a/README.rst
+++ b/README.rst
@@ -123,6 +123,11 @@ escape_underscores
   If set to ``False``, do not escape ``_`` to ``\_`` in text.
   Defaults to ``True``.
 
+escape_misc
+  If set to ``False``, do not escape miscellaneous punctuation characters
+  that sometimes have Markdown significance in text.
+  Defaults to ``True``.
+
 keep_inline_images_in
   Images are converted to their alt-text when the images are located inside
   headlines or table cells. If some inline images should be converted to
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 0945916..eaa6ded 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -71,6 +71,7 @@ class DefaultOptions:
         default_title = False
         escape_asterisks = True
         escape_underscores = True
+        escape_misc = True
         heading_style = UNDERLINED
         keep_inline_images_in = []
         newline_style = SPACES
@@ -201,6 +202,9 @@ def should_convert_tag(self, tag):
     def escape(self, text):
         if not text:
             return ''
+        if self.options['escape_misc']:
+            text = re.sub(r'([\\&<`[>~#=+|-])', r'\\\1', text)
+            text = re.sub(r'([0-9])([.)])', r'\1\\\2', text)
         if self.options['escape_asterisks']:
             text = text.replace('*', r'\*')
         if self.options['escape_underscores']:
diff --git a/tests/test_escaping.py b/tests/test_escaping.py
index 2f3a83e..eaef77d 100644
--- a/tests/test_escaping.py
+++ b/tests/test_escaping.py
@@ -12,7 +12,7 @@ def test_underscore():
 
 
 def test_xml_entities():
-    assert md('&amp;') == '&'
+    assert md('&amp;') == r'\&'
 
 
 def test_named_entities():
@@ -25,4 +25,23 @@ def test_hexadecimal_entities():
 
 
 def test_single_escaping_entities():
-    assert md('&amp;amp;') == '&amp;'
+    assert md('&amp;amp;') == r'\&amp;'
+
+
+def text_misc():
+    assert md('\\*') == r'\\\*'
+    assert md('<foo>') == r'\<foo\>'
+    assert md('# foo') == r'\# foo'
+    assert md('> foo') == r'\> foo'
+    assert md('~~foo~~') == r'\~\~foo\~\~'
+    assert md('foo\n===\n') == 'foo\n\\=\\=\\=\n'
+    assert md('---\n') == '\\-\\-\\-\n'
+    assert md('+ x\n+ y\n') == '\\+ x\n\\+ y\n'
+    assert md('`x`') == r'\`x\`'
+    assert md('[text](link)') == r'\[text](link)'
+    assert md('1. x') == r'1\. x'
+    assert md('not a number. x') == r'not a number. x'
+    assert md('1) x') == r'1\) x'
+    assert md('not a number) x') == r'not a number) x'
+    assert md('|not table|') == r'\|not table\|'
+    assert md(r'\ <foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'

From 43dbe20aaf9d11c38c9dab7d0e8f30dfdedf19e7 Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Thu, 4 Apr 2024 21:49:45 +0200
Subject: [PATCH 03/11] fixed github action badges

see https://github.com/badges/shields/issues/8671
---
 README.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index a0cd678..35d58fd 100644
--- a/README.rst
+++ b/README.rst
@@ -1,8 +1,8 @@
 |build| |version| |license| |downloads|
 
-.. |build| image:: https://img.shields.io/github/workflow/status/matthewwithanm/python-markdownify/Python%20application/develop
+.. |build| image:: https://img.shields.io/github/actions/workflow/status/matthewwithanm/python-markdownify/python-app.yml?branch=develop
     :alt: GitHub Workflow Status
-    :target: https://github.com/matthewwithanm/python-markdownify/actions?query=workflow%3A%22Python+application%22
+    :target: https://github.com/matthewwithanm/python-markdownify/actions/workflows/python-app.yml?query=workflow%3A%22Python+application%22
 
 .. |version| image:: https://img.shields.io/pypi/v/markdownify
     :alt: Pypi version

From c1672aee444d4fa8c76a7be37b0746ce769d2631 Mon Sep 17 00:00:00 2001
From: samypr100 <3933065+samypr100@users.noreply.github.com>
Date: Sun, 23 Jun 2024 06:59:14 -0400
Subject: [PATCH 04/11] Update MANIFEST.in to exclude tests during packaging
 (#125)

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 9561fb1..70656c8 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,2 @@
 include README.rst
+prune tests

From 2ec33384de85d0906b4b40a59f1a3650846150cb Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Sun, 23 Jun 2024 13:17:20 +0200
Subject: [PATCH 05/11] handle un-parsable colspan values

fixes #126
---
 markdownify/__init__.py |  6 +++---
 tests/test_tables.py    | 14 +++++++++++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index eaa6ded..6a983d9 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -383,13 +383,13 @@ def convert_figcaption(self, el, text, convert_as_inline):
 
     def convert_td(self, el, text, convert_as_inline):
         colspan = 1
-        if 'colspan' in el.attrs:
+        if 'colspan' in el.attrs and el['colspan'].isdigit():
             colspan = int(el['colspan'])
         return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
 
     def convert_th(self, el, text, convert_as_inline):
         colspan = 1
-        if 'colspan' in el.attrs:
+        if 'colspan' in el.attrs and el['colspan'].isdigit():
             colspan = int(el['colspan'])
         return ' ' + text.strip().replace("\n", " ") + ' |' * colspan
 
@@ -406,7 +406,7 @@ def convert_tr(self, el, text, convert_as_inline):
             # first row and is headline: print headline underline
             full_colspan = 0
             for cell in cells:
-                if "colspan" in cell.attrs:
+                if 'colspan' in cell.attrs and cell['colspan'].isdigit():
                     full_colspan += int(cell["colspan"])
                 else:
                     full_colspan += 1
diff --git a/tests/test_tables.py b/tests/test_tables.py
index 9120c29..594e5bf 100644
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -215,7 +215,7 @@
         <th>Age</th>
     </tr>
     <tr>
-        <td>Jill</td>
+        <td colspan="1">Jill</td>
         <td>Smith</td>
         <td>50</td>
     </tr>
@@ -226,6 +226,17 @@
     </tr>
 </table>"""
 
+table_with_undefined_colspan = """<table>
+    <tr>
+        <th colspan="undefined">Name</th>
+        <th>Age</th>
+    </tr>
+    <tr>
+        <td colspan="-1">Jill</td>
+        <td>Smith</td>
+    </tr>
+</table>"""
+
 
 def test_table():
     assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
@@ -240,3 +251,4 @@ def test_table():
     assert md(table_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
     assert md(table_with_caption) == 'TEXT\n\nCaption\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
     assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+    assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'

From 7861b330cd05c0c19fc496530f02922d5493c568 Mon Sep 17 00:00:00 2001
From: Joseph Myers <josmyers@redhat.com>
Date: Sun, 23 Jun 2024 11:28:05 +0000
Subject: [PATCH 06/11] Special-case use of HTML tags for converting `<sub>` /
 `<sup>` (#119)

Allow different strings before / after `<sub>` / `<sup>` content

In particular, this allows setting `sub_symbol='<sub>'`,
`sup_symbol='<sup>'`, to use raw HTML in the output when
converting subscripts and superscripts.
---
 README.rst                | 6 +++++-
 markdownify/__init__.py   | 9 +++++++--
 tests/test_conversions.py | 2 ++
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/README.rst b/README.rst
index 35d58fd..55ea7cf 100644
--- a/README.rst
+++ b/README.rst
@@ -87,7 +87,11 @@ strong_em_symbol
 sub_symbol, sup_symbol
   Define the chars that surround ``<sub>`` and ``<sup>`` text. Defaults to an
   empty string, because this is non-standard behavior. Could be something like
-  ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``.
+  ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``.  If the value starts
+  with ``<`` and ends with ``>``, it is treated as an HTML tag and a ``/`` is
+  inserted after the ``<`` in the string used after the text; this allows
+  specifying ``<sub>`` to use raw HTML in the output for subscripts, for
+  example.
 
 newline_style
   Defines the style of marking linebreaks (``<br>``) in markdown. The default
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 6a983d9..d7bd780 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -43,17 +43,22 @@ def abstract_inline_conversion(markup_fn):
     """
     This abstracts all simple inline tags like b, em, del, ...
     Returns a function that wraps the chomped text in a pair of the string
-    that is returned by markup_fn. markup_fn is necessary to allow for
+    that is returned by markup_fn, with '/' inserted in the string used after
+    the text if it looks like an HTML tag. markup_fn is necessary to allow for
     references to self.strong_em_symbol etc.
     """
     def implementation(self, el, text, convert_as_inline):
         markup = markup_fn(self)
+        if markup.startswith('<') and markup.endswith('>'):
+            markup_after = '</' + markup[1:]
+        else:
+            markup_after = markup
         if el.find_parent(['pre', 'code', 'kbd', 'samp']):
             return text
         prefix, suffix, text = chomp(text)
         if not text:
             return ''
-        return '%s%s%s%s%s' % (prefix, markup, text, markup, suffix)
+        return '%s%s%s%s%s' % (prefix, markup, text, markup_after, suffix)
     return implementation
 
 
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 9652143..a35b982 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -268,11 +268,13 @@ def test_strong_em_symbol():
 def test_sub():
     assert md('<sub>foo</sub>') == 'foo'
     assert md('<sub>foo</sub>', sub_symbol='~') == '~foo~'
+    assert md('<sub>foo</sub>', sub_symbol='<sub>') == '<sub>foo</sub>'
 
 
 def test_sup():
     assert md('<sup>foo</sup>') == 'foo'
     assert md('<sup>foo</sup>', sup_symbol='^') == '^foo^'
+    assert md('<sup>foo</sup>', sup_symbol='<sup>') == '<sup>foo</sup>'
 
 
 def test_lang():

From 50b4640db2d7f88b44c20f947e705ba59f1b9fe0 Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Sun, 23 Jun 2024 13:30:08 +0200
Subject: [PATCH 07/11] better naming for markup variables

---
 markdownify/__init__.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index d7bd780..2f71cad 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -48,17 +48,17 @@ def abstract_inline_conversion(markup_fn):
     references to self.strong_em_symbol etc.
     """
     def implementation(self, el, text, convert_as_inline):
-        markup = markup_fn(self)
-        if markup.startswith('<') and markup.endswith('>'):
-            markup_after = '</' + markup[1:]
+        markup_prefix = markup_fn(self)
+        if markup_prefix.startswith('<') and markup_prefix.endswith('>'):
+            markup_suffix = '</' + markup_prefix[1:]
         else:
-            markup_after = markup
+            markup_suffix = markup_prefix
         if el.find_parent(['pre', 'code', 'kbd', 'samp']):
             return text
         prefix, suffix, text = chomp(text)
         if not text:
             return ''
-        return '%s%s%s%s%s' % (prefix, markup, text, markup_after, suffix)
+        return '%s%s%s%s%s' % (prefix, markup_prefix, text, markup_suffix, suffix)
     return implementation
 
 

From 51390d738982e20267fd387ab356e995923951e6 Mon Sep 17 00:00:00 2001
From: microdnd <dnd5544@gmail.com>
Date: Sun, 23 Jun 2024 20:28:53 +0800
Subject: [PATCH 08/11] handle ol start value is not number (#127)

Co-authored-by: Mico <mico_wu@trendmicro.com>
---
 markdownify/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 2f71cad..cd66a39 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -326,7 +326,7 @@ def convert_list(self, el, text, convert_as_inline):
     def convert_li(self, el, text, convert_as_inline):
         parent = el.parent
         if parent is not None and parent.name == 'ol':
-            if parent.get("start"):
+            if parent.get("start") and str(parent.get("start")).isnumeric():
                 start = int(parent.get("start"))
             else:
                 start = 1

From 0a5c89aa493ae0cdc090305ba14ef7fa1c6f13c4 Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Sun, 23 Jun 2024 14:30:07 +0200
Subject: [PATCH 09/11] added test for ol start check

---
 tests/test_lists.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_lists.py b/tests/test_lists.py
index 5a04430..35eee13 100644
--- a/tests/test_lists.py
+++ b/tests/test_lists.py
@@ -43,6 +43,9 @@
 def test_ol():
     assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
     assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '3. a\n4. b\n'
+    assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
+    assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
+    assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
 
 
 def test_nested_ols():

From 75a678dab9d7cec2c18b58489ea4a66b6f794908 Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Sun, 14 Jul 2024 21:02:49 +0200
Subject: [PATCH 10/11] fix pytest version to 8

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 9eb8750..54ba143 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@ envlist = py38
 [testenv]
 passenv = PYTHONPATH
 deps =
-	pytest
+	pytest==8
 	flake8
 	restructuredtext_lint
 	Pygments

From f6c8daf8a58948c88256a09a60085e28e628564e Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Sun, 14 Jul 2024 21:19:23 +0200
Subject: [PATCH 11/11] bump to v0.13.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9a26468..9a703d0 100644
--- a/setup.py
+++ b/setup.py
@@ -9,7 +9,7 @@
 pkgmeta = {
     '__title__': 'markdownify',
     '__author__': 'Matthew Tretter',
-    '__version__': '0.12.1',
+    '__version__': '0.13.0',
 }
 
 read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()