Markdown notes fix (#3946)

* Improve 'remove_non_printable_characters' method in helpers.py

- Requires fancy 'negative-lookahead' regex to negate existing unicode regex

* Adds  extra test for markdown fields

- Ensure that newline characters are retained
- Very necessary for markdown formatting!
- Update field cleaning to allow newlines in some very specific cases

* Bug fix

* Better edge case handling
This commit is contained in:
Oliver 2022-11-18 13:36:38 +11:00 committed by GitHub
parent 73c1c50d01
commit 2a148a2f46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 6 deletions

View File

@ -950,16 +950,27 @@ def strip_html_tags(value: str, raise_error=True, field_name=None):
return cleaned return cleaned
def remove_non_printable_characters(value: str, remove_ascii=True, remove_unicode=True): def remove_non_printable_characters(value: str, remove_newline=True, remove_ascii=True, remove_unicode=True):
"""Remove non-printable / control characters from the provided string""" """Remove non-printable / control characters from the provided string"""
cleaned = value
if remove_ascii: if remove_ascii:
# Remove ASCII control characters # Remove ASCII control characters
cleaned = regex.sub(u'[\x01-\x1F]+', '', value) # Note that we do not sub out 0x0A (\n) here, it is done separately below
cleaned = regex.sub(u'[\x01-\x09]+', '', cleaned)
cleaned = regex.sub(u'[\x0b-\x1F]+', '', cleaned)
if remove_newline:
cleaned = regex.sub(u'[\x0a]+', '', cleaned)
if remove_unicode: if remove_unicode:
# Remove Unicode control characters # Remove Unicode control characters
cleaned = regex.sub(u'[^\P{C}]+', '', value) if remove_newline:
cleaned = regex.sub(u'[^\P{C}]+', '', cleaned)
else:
# Use 'negative-lookahead' to exclude newline character
cleaned = regex.sub(u'(?![\x0A])[^\P{C}]+', '', cleaned)
return cleaned return cleaned

View File

@ -1,8 +1,11 @@
"""Mixins for (API) views in the whole project.""" """Mixins for (API) views in the whole project."""
from django.core.exceptions import FieldDoesNotExist
from rest_framework import generics, mixins, status from rest_framework import generics, mixins, status
from rest_framework.response import Response from rest_framework.response import Response
from InvenTree.fields import InvenTreeNotesField
from InvenTree.helpers import remove_non_printable_characters, strip_html_tags from InvenTree.helpers import remove_non_printable_characters, strip_html_tags
@ -44,10 +47,35 @@ class CleanMixin():
Nominally, the only thing that will be "cleaned" will be HTML tags Nominally, the only thing that will be "cleaned" will be HTML tags
Ref: https://github.com/mozilla/bleach/issues/192 Ref: https://github.com/mozilla/bleach/issues/192
""" """
cleaned = strip_html_tags(data, field_name=field) cleaned = strip_html_tags(data, field_name=field)
cleaned = remove_non_printable_characters(cleaned)
# By default, newline characters are removed
remove_newline = True
try:
if hasattr(self, 'serializer_class'):
model = self.serializer_class.Meta.model
field = model._meta.get_field(field)
# The following field types allow newline characters
allow_newline = [
InvenTreeNotesField,
]
for field_type in allow_newline:
if issubclass(type(field), field_type):
remove_newline = False
break
except AttributeError:
pass
except FieldDoesNotExist:
pass
cleaned = remove_non_printable_characters(cleaned, remove_newline=remove_newline)
return cleaned return cleaned

View File

@ -1598,8 +1598,24 @@ class PartDetailTests(InvenTreeAPITestCase):
self.assertFalse('hello' in part.metadata) self.assertFalse('hello' in part.metadata)
self.assertEqual(part.metadata['x'], 'y') self.assertEqual(part.metadata['x'], 'y')
def test_part_notes(self):
"""Unit tests for the part 'notes' field""" class PartNotesTests(InvenTreeAPITestCase):
"""Tests for the 'notes' field (markdown field)"""
fixtures = [
'category',
'part',
'location',
'company',
]
roles = [
'part.change',
'part.add',
]
def test_long_notes(self):
"""Test that very long notes field is rejected"""
# Ensure that we cannot upload a very long piece of text # Ensure that we cannot upload a very long piece of text
url = reverse('api-part-detail', kwargs={'pk': 1}) url = reverse('api-part-detail', kwargs={'pk': 1})
@ -1614,6 +1630,36 @@ class PartDetailTests(InvenTreeAPITestCase):
self.assertIn('Ensure this field has no more than 50000 characters', str(response.data['notes'])) self.assertIn('Ensure this field has no more than 50000 characters', str(response.data['notes']))
def test_multiline_formatting(self):
"""Ensure that markdown formatting is retained"""
url = reverse('api-part-detail', kwargs={'pk': 1})
notes = """
### Title
1. Numbered list
2. Another item
3. Another item again
[A link](http://link.com.go)
"""
response = self.patch(
url,
{
'notes': notes,
},
expected_code=200
)
# Ensure that newline chars have not been removed
self.assertIn('\n', response.data['notes'])
# Entire notes field should match original value
self.assertEqual(response.data['notes'], notes.strip())
class PartPricingDetailTests(InvenTreeAPITestCase): class PartPricingDetailTests(InvenTreeAPITestCase):
"""Tests for the part pricing API endpoint""" """Tests for the part pricing API endpoint"""