Adds generic API endpoint for extracting data from a tabulated file

2024-08-30 18:33:04 +00:00 · 2022-02-16 11:32:40 +11:00 · 2022-02-16 11:32:40 +11:00 · 24f1491b8e
commit 24f1491b8e
parent 6c32f5c60d
2 changed files with 177 additions and 2 deletions
--- a/InvenTree/InvenTree/serializers.py
+++ b/InvenTree/InvenTree/serializers.py
@ -5,8 +5,8 @@ Serializers used in various InvenTree apps
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals

-
 import os
+import tablib

 from decimal import Decimal

@ -332,3 +332,175 @@ class InvenTreeDecimalField(serializers.FloatField):
            return Decimal(str(data))
        except:
            raise serializers.ValidationError(_("Invalid value"))
+
+
+class DataFileUploadSerializer(serializers.Serializer):
+    """
+    Generic serializer for uploading a data file, and extracting a dataset.
+
+    - Validates uploaded file
+    - Extracts column names
+    - Extracts data rows
+    """
+
+    class Meta:
+        fields = [
+            'bom_file',
+        ]
+
+    data_file = serializers.FileField(
+        label=_("Data File"),
+        help_text=_("Select data file for upload"),
+        required=True,
+        allow_empty_file=False,
+    )
+
+    def validate_data_file(self, data_file):
+        """
+        Perform validation checks on the uploaded data file.
+        """
+
+        self.filename = data_file.name
+
+        name, ext = os.path.splitext(data_file.name)
+
+        # Remove the leading . from the extension
+        ext = ext[1:]
+
+        accepted_file_types = [
+            'xls', 'xlsx',
+            'csv', 'tsv',
+            'xml',
+        ]
+
+        if ext not in accepted_file_types:
+            raise serializers.ValidationError(_("Unsupported file type"))
+
+        # Impose a 50MB limit on uploaded BOM files
+        max_upload_file_size = 50 * 1024 * 1024
+
+        if data_file.size > max_upload_file_size:
+            raise serializers.ValidationError(_("File is too large"))
+
+        # Read file data into memory (bytes object)
+        try:
+            data = data_file.read()
+        except Exception as e:
+            raise serializers.ValidationError(str(e))
+
+        if ext in ['csv', 'tsv', 'xml']:
+            try:
+                data = data.decode()
+            except Exception as e:
+                raise serializers.ValidationError(str(e))
+
+        # Convert to a tablib dataset (we expect headers)
+        try:
+            self.dataset = tablib.Dataset().load(data, ext, headers=True)
+        except Exception as e:
+            raise serializers.ValidationError(str(e))
+
+        if len(self.dataset) == 0:
+            raise serializers.ValidationError(_("No data rows found in file"))
+
+        return data_file
+
+    def extract_data(self):
+        """
+        Returns dataset extracted from the file
+        """
+
+        return {
+            'headers': self.dataset.headers,
+            'rows': [row.values() for row in self.dataset.dict],
+            'filename': self.filename,
+        }
+
+
+class DataFileExtractSerializer(serializers.Serializer):
+    """
+    Generic serializer for extracting data from an imported dataset.
+
+    - User provides an array of matched headers
+    - User provides an array of raw data rows 
+    """
+
+    # Provide a dict of expected columns for this importer
+    EXPECTED_COLUMNS = {}
+
+    # Provide a list of required columns for this importer
+    REQUIRED_COLUMNS = []
+
+    class Meta:
+        fields = [
+            'raw_headers',
+            'mapped_headers',
+            'rows',
+        ]
+
+    raw_headers = serializers.ListField(
+        child=serializers.CharField(),
+    )
+
+    mapped_headers = serializers.ListField(
+        child=serializers.CharField(),
+    )
+
+    rows = serializers.ListField(
+        child=serializers.ListField(
+            child=serializers.CharField(
+                allow_blank=True,
+            ),
+        )
+    )
+
+    def validate(self, data):
+
+        data = super().validate(data)
+
+        self.raw_headers = data.get('raw_headers', [])
+        self.mapped_headers = data.get('mapped_headers', [])
+        self.rows = data.get('rows', [])
+
+        if len(self.rows) == 0:
+            raise serializers.ValidationError(_("No data rows provided"))
+
+        if len(self.raw_headers) == 0:
+            raise serializers.ValidationError(_("File headers not supplied"))
+
+        if len(self.mapped_headers) == 0:
+            raise serializers.ValidationError(_("Mapped headers not supplied"))
+
+        if len(self.raw_headers) != len(self.mapped_headers):
+            raise serializers.ValidationError(_("Supplied header list has incorrect length"))
+
+        self.validate_headers()
+
+        return self.extract_data(data)
+
+    def extract_data(self, data):
+        """
+        Extract row data based on the provided fields.
+        Returns an array of mapped column:value values
+        """
+
+        return data
+
+    def validate_headers(self):
+        """
+        Perform custom validation of header mapping.
+        """
+
+        print("validate_headers()")
+        
+        for col in self.REQUIRED_COLUMNS:
+            print("checking col:", col)
+            if col not in self.mapped_headers:
+                raise serializers.ValidationError(_("Missing required column") + f": {col}")
+
+
+    def save(self):
+        """
+        No "save" action for this serializer
+        """
+        ...
--- a/InvenTree/InvenTree/version.py
+++ b/InvenTree/InvenTree/version.py
@ -12,11 +12,14 @@ import common.models
 INVENTREE_SW_VERSION = "0.6.0 dev"

 # InvenTree API version
-INVENTREE_API_VERSION = 24
+INVENTREE_API_VERSION = 25

 """
 Increment this API version number whenever there is a significant change to the API that any clients need to know about

+v25 -> 2022-02-16
+    - Adds API endpoint for uploading a BOM file and extracting data
+
 v24 -> 2022-02-10
    - Adds API endpoint for deleting (cancelling) build order outputs