diff --git a/InvenTree/InvenTree/mixins.py b/InvenTree/InvenTree/mixins.py index 584b3ac5ed..dfdde392ce 100644 --- a/InvenTree/InvenTree/mixins.py +++ b/InvenTree/InvenTree/mixins.py @@ -1,15 +1,18 @@ """Mixins for (API) views in the whole project.""" +from django.utils.translation import gettext_lazy as _ + from bleach import clean from rest_framework import generics, status +from rest_framework.exceptions import ValidationError from rest_framework.response import Response class CleanMixin(): - """Model mixin class which cleans inputs.""" + """Model mixin class which cleans inputs using the Mozilla bleach tools.""" - # Define a map of fields avaialble for import - SAFE_FIELDS = {} + # Define a list of field names which will *not* be cleaned + SAFE_FIELDS = [] def create(self, request, *args, **kwargs): """Override to clean data before processing it.""" @@ -34,6 +37,42 @@ class CleanMixin(): return Response(serializer.data) + def clean_string(self, field: str, data: str) -> str: + """Clean / sanitize a single input string. + + Note that this function will *allow* orphaned <>& characters, + which would normally be escaped by bleach. + + Nominally, the only thing that will be "cleaned" will be HTML tags + + Ref: https://github.com/mozilla/bleach/issues/192 + """ + + cleaned = clean( + data, + strip=True, + tags=[], + attributes=[], + ) + + # Add escaped characters back in + replacements = { + '>': '>', + '<': '<', + '&': '&', + } + + for o, r in replacements.items(): + cleaned = cleaned.replace(o, r) + + # If the length changed, it means that HTML tags were removed! + if len(cleaned) != len(data): + raise ValidationError({ + field: [_("Remove HTML tags from this value")] + }) + + return cleaned + def clean_data(self, data: dict) -> dict: """Clean / sanitize data. @@ -46,17 +85,24 @@ class CleanMixin(): data (dict): Data that should be sanatized. Returns: - dict: Profided data sanatized; still in the same order. + dict: Provided data sanatized; still in the same order. """ + clean_data = {} + for k, v in data.items(): - if isinstance(v, str): - ret = clean(v) + + if k in self.SAFE_FIELDS: + ret = v + elif isinstance(v, str): + ret = self.clean_string(k, v) elif isinstance(v, dict): ret = self.clean_data(v) else: ret = v + clean_data[k] = ret + return clean_data diff --git a/InvenTree/part/test_api.py b/InvenTree/part/test_api.py index c591c9405d..85c7982b7e 100644 --- a/InvenTree/part/test_api.py +++ b/InvenTree/part/test_api.py @@ -227,31 +227,40 @@ class PartCategoryAPITest(InvenTreeAPITestCase): url = reverse('api-part-category-detail', kwargs={'pk': 1}) - self.patch( - url, - { - 'description': '', - }, - expected_code=200 - ) + # Invalid values containing tags + invalid_values = [ + '', + 'Link', + "Link", + '', + ] - cat = PartCategory.objects.get(pk=1) + for v in invalid_values: + response = self.patch( + url, + { + 'description': v + }, + expected_code=400 + ) - # Image tags have been stripped - self.assertEqual(cat.description, '<img src=# onerror=alert("pwned")>') + # Raw characters should be allowed + allowed = [ + '<< hello', + 'Alpha & Omega', + 'A > B > C', + ] - self.patch( - url, - { - 'description': 'LINK', - }, - expected_code=200, - ) + for val in allowed: + response = self.patch( + url, + { + 'description': val, + }, + expected_code=200, + ) - # Tags must have been bleached out - cat.refresh_from_db() - - self.assertEqual(cat.description, 'LINK<script>alert("h4x0r")</script>') + self.assertEqual(response.data['description'], val) class PartOptionsAPITest(InvenTreeAPITestCase):