Markdown notes fix (#3946)

* Improve 'remove_non_printable_characters' method in helpers.py - Requires fancy 'negative-lookahead' regex to negate existing unicode regex * Adds extra test for markdown fields - Ensure that newline characters are retained - Very necessary for markdown formatting! - Update field cleaning to allow newlines in some very specific cases * Bug fix * Better edge case handling
2024-08-30 18:33:04 +00:00 · 2022-11-18 13:36:38 +11:00 · 2022-11-18 13:36:38 +11:00 · 2a148a2f46
commit 2a148a2f46
parent 73c1c50d01
3 changed files with 91 additions and 6 deletions
--- a/InvenTree/InvenTree/helpers.py
+++ b/InvenTree/InvenTree/helpers.py
@ -950,16 +950,27 @@ def strip_html_tags(value: str, raise_error=True, field_name=None):
    return cleaned
-def remove_non_printable_characters(value: str, remove_ascii=True, remove_unicode=True):
+def remove_non_printable_characters(value: str, remove_newline=True, remove_ascii=True, remove_unicode=True):
    """Remove non-printable / control characters from the provided string"""
    cleaned = value
    if remove_ascii:
        # Remove ASCII control characters
-        cleaned = regex.sub(u'[\x01-\x1F]+', '', value)
+        # Note that we do not sub out 0x0A (\n) here, it is done separately below
        cleaned = regex.sub(u'[\x01-\x09]+', '', cleaned)
        cleaned = regex.sub(u'[\x0b-\x1F]+', '', cleaned)
    if remove_newline:
        cleaned = regex.sub(u'[\x0a]+', '', cleaned)
    if remove_unicode:
        # Remove Unicode control characters
-        cleaned = regex.sub(u'[^\P{C}]+', '', value)
+        if remove_newline:
            cleaned = regex.sub(u'[^\P{C}]+', '', cleaned)
        else:
            # Use 'negative-lookahead' to exclude newline character
            cleaned = regex.sub(u'(?![\x0A])[^\P{C}]+', '', cleaned)
    return cleaned
--- a/InvenTree/InvenTree/mixins.py
+++ b/InvenTree/InvenTree/mixins.py
@ -1,8 +1,11 @@
 """Mixins for (API) views in the whole project."""
 from django.core.exceptions import FieldDoesNotExist
 from rest_framework import generics, mixins, status
 from rest_framework.response import Response
 from InvenTree.fields import InvenTreeNotesField
 from InvenTree.helpers import remove_non_printable_characters, strip_html_tags
@ -44,10 +47,35 @@ class CleanMixin():
        Nominally, the only thing that will be "cleaned" will be HTML tags
        Ref: https://github.com/mozilla/bleach/issues/192
        """
        cleaned = strip_html_tags(data, field_name=field)
-        cleaned = remove_non_printable_characters(cleaned)
+
        # By default, newline characters are removed
        remove_newline = True
        try:
            if hasattr(self, 'serializer_class'):
                model = self.serializer_class.Meta.model
                field = model._meta.get_field(field)
                # The following field types allow newline characters
                allow_newline = [
                    InvenTreeNotesField,
                ]
                for field_type in allow_newline:
                    if issubclass(type(field), field_type):
                        remove_newline = False
                        break
        except AttributeError:
            pass
        except FieldDoesNotExist:
            pass
        cleaned = remove_non_printable_characters(cleaned, remove_newline=remove_newline)
        return cleaned
--- a/InvenTree/part/test_api.py
+++ b/InvenTree/part/test_api.py
@ -1598,8 +1598,24 @@ class PartDetailTests(InvenTreeAPITestCase):
        self.assertFalse('hello' in part.metadata)
        self.assertEqual(part.metadata['x'], 'y')
-    def test_part_notes(self):
+
-        """Unit tests for the part 'notes' field"""
+class PartNotesTests(InvenTreeAPITestCase):
    """Tests for the 'notes' field (markdown field)"""
    fixtures = [
        'category',
        'part',
        'location',
        'company',
    ]
    roles = [
        'part.change',
        'part.add',
    ]
    def test_long_notes(self):
        """Test that very long notes field is rejected"""
        # Ensure that we cannot upload a very long piece of text
        url = reverse('api-part-detail', kwargs={'pk': 1})
@ -1614,6 +1630,36 @@ class PartDetailTests(InvenTreeAPITestCase):
        self.assertIn('Ensure this field has no more than 50000 characters', str(response.data['notes']))
    def test_multiline_formatting(self):
        """Ensure that markdown formatting is retained"""
        url = reverse('api-part-detail', kwargs={'pk': 1})
        notes = """
        ### Title
        1. Numbered list
        2. Another item
        3. Another item again
        [A link](http://link.com.go)
        """
        response = self.patch(
            url,
            {
                'notes': notes,
            },
            expected_code=200
        )
        # Ensure that newline chars have not been removed
        self.assertIn('\n', response.data['notes'])
        # Entire notes field should match original value
        self.assertEqual(response.data['notes'], notes.strip())
 class PartPricingDetailTests(InvenTreeAPITestCase):
    """Tests for the part pricing API endpoint"""