diff --git a/InvenTree/InvenTree/mixins.py b/InvenTree/InvenTree/mixins.py
index 584b3ac5ed..dfdde392ce 100644
--- a/InvenTree/InvenTree/mixins.py
+++ b/InvenTree/InvenTree/mixins.py
@@ -1,15 +1,18 @@
"""Mixins for (API) views in the whole project."""
+from django.utils.translation import gettext_lazy as _
+
from bleach import clean
from rest_framework import generics, status
+from rest_framework.exceptions import ValidationError
from rest_framework.response import Response
class CleanMixin():
- """Model mixin class which cleans inputs."""
+ """Model mixin class which cleans inputs using the Mozilla bleach tools."""
- # Define a map of fields avaialble for import
- SAFE_FIELDS = {}
+ # Define a list of field names which will *not* be cleaned
+ SAFE_FIELDS = []
def create(self, request, *args, **kwargs):
"""Override to clean data before processing it."""
@@ -34,6 +37,42 @@ class CleanMixin():
return Response(serializer.data)
+ def clean_string(self, field: str, data: str) -> str:
+ """Clean / sanitize a single input string.
+
+ Note that this function will *allow* orphaned <>& characters,
+ which would normally be escaped by bleach.
+
+ Nominally, the only thing that will be "cleaned" will be HTML tags
+
+ Ref: https://github.com/mozilla/bleach/issues/192
+ """
+
+ cleaned = clean(
+ data,
+ strip=True,
+ tags=[],
+ attributes=[],
+ )
+
+ # Add escaped characters back in
+ replacements = {
+ '>': '>',
+ '<': '<',
+ '&': '&',
+ }
+
+ for o, r in replacements.items():
+ cleaned = cleaned.replace(o, r)
+
+ # If the length changed, it means that HTML tags were removed!
+ if len(cleaned) != len(data):
+ raise ValidationError({
+ field: [_("Remove HTML tags from this value")]
+ })
+
+ return cleaned
+
def clean_data(self, data: dict) -> dict:
"""Clean / sanitize data.
@@ -46,17 +85,24 @@ class CleanMixin():
data (dict): Data that should be sanatized.
Returns:
- dict: Profided data sanatized; still in the same order.
+ dict: Provided data sanatized; still in the same order.
"""
+
clean_data = {}
+
for k, v in data.items():
- if isinstance(v, str):
- ret = clean(v)
+
+ if k in self.SAFE_FIELDS:
+ ret = v
+ elif isinstance(v, str):
+ ret = self.clean_string(k, v)
elif isinstance(v, dict):
ret = self.clean_data(v)
else:
ret = v
+
clean_data[k] = ret
+
return clean_data
diff --git a/InvenTree/part/test_api.py b/InvenTree/part/test_api.py
index c591c9405d..85c7982b7e 100644
--- a/InvenTree/part/test_api.py
+++ b/InvenTree/part/test_api.py
@@ -227,31 +227,40 @@ class PartCategoryAPITest(InvenTreeAPITestCase):
url = reverse('api-part-category-detail', kwargs={'pk': 1})
- self.patch(
- url,
- {
- 'description': '
',
- },
- expected_code=200
- )
+ # Invalid values containing tags
+ invalid_values = [
+ '
',
+ 'Link',
+ "Link",
+ '',
+ ]
- cat = PartCategory.objects.get(pk=1)
+ for v in invalid_values:
+ response = self.patch(
+ url,
+ {
+ 'description': v
+ },
+ expected_code=400
+ )
- # Image tags have been stripped
- self.assertEqual(cat.description, '<img src=# onerror=alert("pwned")>')
+ # Raw characters should be allowed
+ allowed = [
+ '<< hello',
+ 'Alpha & Omega',
+ 'A > B > C',
+ ]
- self.patch(
- url,
- {
- 'description': 'LINK',
- },
- expected_code=200,
- )
+ for val in allowed:
+ response = self.patch(
+ url,
+ {
+ 'description': val,
+ },
+ expected_code=200,
+ )
- # Tags must have been bleached out
- cat.refresh_from_db()
-
- self.assertEqual(cat.description, 'LINK<script>alert("h4x0r")</script>')
+ self.assertEqual(response.data['description'], val)
class PartOptionsAPITest(InvenTreeAPITestCase):