Adds functionality to map file columns to model fiels

This commit is contained in:
Oliver 2022-02-16 14:57:13 +11:00
parent 58aa2adde7
commit d7adb6959d
3 changed files with 143 additions and 150 deletions

View File

@ -343,6 +343,9 @@ class DataFileUploadSerializer(serializers.Serializer):
- Extracts data rows - Extracts data rows
""" """
# Implementing class should register a target model (database model) to be used for import
TARGET_MODEL = None
class Meta: class Meta:
fields = [ fields = [
'bom_file', 'bom_file',
@ -400,18 +403,81 @@ class DataFileUploadSerializer(serializers.Serializer):
except Exception as e: except Exception as e:
raise serializers.ValidationError(str(e)) raise serializers.ValidationError(str(e))
if len(self.dataset.headers) == 0:
raise serializers.ValidationError(_("No columns found in file"))
if len(self.dataset) == 0: if len(self.dataset) == 0:
raise serializers.ValidationError(_("No data rows found in file")) raise serializers.ValidationError(_("No data rows found in file"))
return data_file return data_file
def match_column(self, column_name, field_names):
"""
Attempt to match a column name (from the file) to a field (defined in the model)
Order of matching is:
- Direct match
- Case insensitive match
- Fuzzy match
"""
column_name = column_name.strip()
column_name_lower = column_name.lower()
if column_name in field_names:
return column_name
for field_name in field_names:
if field_name.lower() == column_name_lower:
return field_name
# TODO: Fuzzy pattern matching
# No matches found
return None
def extract_data(self): def extract_data(self):
""" """
Returns dataset extracted from the file Returns dataset extracted from the file
""" """
# Provide a dict of available import fields for the model
model_fields = {}
# Keep track of columns we have already extracted
matched_columns = set()
if self.TARGET_MODEL:
try:
model_fields = self.TARGET_MODEL.get_import_fields()
except:
pass
# Extract a list of valid model field names
model_field_names = [key for key in model_fields.keys()]
# Provide a dict of available columns from the dataset
file_columns = {}
for header in self.dataset.headers:
column = {}
# Attempt to "match" file columns to model fields
match = self.match_column(header, model_field_names)
if match is not None and match not in matched_columns:
matched_columns.add(match)
column['value'] = match
else:
column['value'] = None
file_columns[header] = column
return { return {
'headers': self.dataset.headers, 'file_fields': file_columns,
'model_fields': model_fields,
'rows': [row.values() for row in self.dataset.dict], 'rows': [row.values() for row in self.dataset.dict],
'filename': self.filename, 'filename': self.filename,
} }
@ -425,25 +491,20 @@ class DataFileExtractSerializer(serializers.Serializer):
- User provides an array of raw data rows - User provides an array of raw data rows
""" """
# Provide a dict of expected columns for this importer # Implementing class should register a target model (database model) to be used for import
EXPECTED_COLUMNS = {} TARGET_MODEL = None
# Provide a list of required columns for this importer
REQUIRED_COLUMNS = []
class Meta: class Meta:
fields = [ fields = [
'raw_headers', 'columns',
'mapped_headers',
'rows', 'rows',
] ]
raw_headers = serializers.ListField( # Mapping of columns
child=serializers.CharField(), columns = serializers.ListField(
) child=serializers.CharField(
allow_blank=True,
mapped_headers = serializers.ListField( ),
child=serializers.CharField(),
) )
rows = serializers.ListField( rows = serializers.ListField(
@ -458,23 +519,16 @@ class DataFileExtractSerializer(serializers.Serializer):
data = super().validate(data) data = super().validate(data)
self.raw_headers = data.get('raw_headers', []) self.columns = data.get('columns', [])
self.mapped_headers = data.get('mapped_headers', [])
self.rows = data.get('rows', []) self.rows = data.get('rows', [])
if len(self.rows) == 0: if len(self.rows) == 0:
raise serializers.ValidationError(_("No data rows provided")) raise serializers.ValidationError(_("No data rows provided"))
if len(self.raw_headers) == 0: if len(self.columns) == 0:
raise serializers.ValidationError(_("File headers not supplied")) raise serializers.ValidationError(_("No data columns supplied"))
if len(self.mapped_headers) == 0: self.validate_extracted_columns()
raise serializers.ValidationError(_("Mapped headers not supplied"))
if len(self.raw_headers) != len(self.mapped_headers):
raise serializers.ValidationError(_("Supplied header list has incorrect length"))
self.validate_headers()
return self.extract_data(data) return self.extract_data(data)
@ -486,18 +540,38 @@ class DataFileExtractSerializer(serializers.Serializer):
return data return data
def validate_headers(self): def validate_extracted_columns(self):
""" """
Perform custom validation of header mapping. Perform custom validation of header mapping.
""" """
print("validate_headers()") if self.TARGET_MODEL:
try:
for col in self.REQUIRED_COLUMNS: model_fields = self.TARGET_MODEL.get_import_fields()
print("checking col:", col) except:
if col not in self.mapped_headers: model_fields = {}
raise serializers.ValidationError(_("Missing required column") + f": {col}")
cols_seen = set()
for name, field in model_fields.items():
required = field.get('required', False)
# Check for missing required columns
if required:
if name not in self.columns:
raise serializers.ValidationError(_("Missing required column") + f": '{name}'")
for col in self.columns:
if not col:
continue
# Check for duplicated columns
if col in cols_seen:
raise serializers.ValidationError(_("Duplicate column") + f": '{col}'")
cols_seen.add(col)
def save(self): def save(self):
""" """

View File

@ -1539,7 +1539,18 @@ class BomExtract(generics.CreateAPIView):
""" """
queryset = Part.objects.none() queryset = Part.objects.none()
serializer_class = part_serializers.BomExtractSerializer serializer_class = part_serializers.BomFileExtractSerializer
class BomUpload(generics.CreateAPIView):
"""
API endpoint for uploading a complete Bill of Materials.
It is assumed that the BOM has been extracted from a file using the BomExtract endpoint.
"""
queryset = Part.objects.all()
serializer_class = part_serializers.BomFileUploadSerializer
def create(self, request, *args, **kwargs): def create(self, request, *args, **kwargs):
""" """
@ -1556,16 +1567,6 @@ class BomExtract(generics.CreateAPIView):
return Response(data, status=status.HTTP_201_CREATED, headers=headers) return Response(data, status=status.HTTP_201_CREATED, headers=headers)
class BomUpload(generics.CreateAPIView):
"""
API endpoint for uploading a complete Bill of Materials.
It is assumed that the BOM has been extracted from a file using the BomExtract endpoint.
"""
queryset = Part.objects.all()
serializer_class = part_serializers.BomUploadSerializer
class BomDetail(generics.RetrieveUpdateDestroyAPIView): class BomDetail(generics.RetrieveUpdateDestroyAPIView):
""" API endpoint for detail view of a single BomItem object """ """ API endpoint for detail view of a single BomItem object """
@ -1719,9 +1720,9 @@ bom_api_urls = [
url(r'^.*$', BomDetail.as_view(), name='api-bom-item-detail'), url(r'^.*$', BomDetail.as_view(), name='api-bom-item-detail'),
])), ])),
url(r'^upload/', BomUpload.as_view(), name='api-bom-upload'),
url(r'^extract/', BomExtract.as_view(), name='api-bom-extract'), url(r'^extract/', BomExtract.as_view(), name='api-bom-extract'),
url(r'^upload/', BomUpload.as_view(), name='api-bom-upload'),
# Catch-all # Catch-all
url(r'^.*$', BomList.as_view(), name='api-bom-list'), url(r'^.*$', BomList.as_view(), name='api-bom-list'),

View File

@ -17,7 +17,9 @@ from rest_framework import serializers
from sql_util.utils import SubqueryCount, SubquerySum from sql_util.utils import SubqueryCount, SubquerySum
from djmoney.contrib.django_rest_framework import MoneyField from djmoney.contrib.django_rest_framework import MoneyField
from InvenTree.serializers import (InvenTreeAttachmentSerializerField, from InvenTree.serializers import (DataFileUploadSerializer,
DataFileExtractSerializer,
InvenTreeAttachmentSerializerField,
InvenTreeDecimalField, InvenTreeDecimalField,
InvenTreeImageSerializerField, InvenTreeImageSerializerField,
InvenTreeModelSerializer, InvenTreeModelSerializer,
@ -709,7 +711,7 @@ class PartCopyBOMSerializer(serializers.Serializer):
) )
class BomExtractSerializer(serializers.Serializer): class BomFileUploadSerializer(DataFileUploadSerializer):
""" """
Serializer for uploading a file and extracting data from it. Serializer for uploading a file and extracting data from it.
@ -729,50 +731,7 @@ class BomExtractSerializer(serializers.Serializer):
""" """
class Meta: TARGET_MODEL = BomItem
fields = [
'bom_file',
'part',
'clear_existing',
]
# These columns must be present
REQUIRED_COLUMNS = [
'quantity',
]
# We need at least one column to specify a "part"
PART_COLUMNS = [
'part',
'part_id',
'part_name',
'part_ipn',
]
# These columns are "optional"
OPTIONAL_COLUMNS = [
'allow_variants',
'inherited',
'optional',
'overage',
'note',
'reference',
]
def find_matching_column(self, col_name, columns):
# Direct match
if col_name in columns:
return col_name
col_name = col_name.lower().strip()
for col in columns:
if col.lower().strip() == col_name:
return col
# No match
return None
def find_matching_data(self, row, col_name, columns): def find_matching_data(self, row, col_name, columns):
""" """
@ -783,58 +742,7 @@ class BomExtractSerializer(serializers.Serializer):
return row.get(col_name, None) return row.get(col_name, None)
bom_file = serializers.FileField( """
label=_("BOM File"),
help_text=_("Select Bill of Materials file"),
required=True,
allow_empty_file=False,
)
def validate_bom_file(self, bom_file):
"""
Perform validation checks on the uploaded BOM file
"""
self.filename = bom_file.name
name, ext = os.path.splitext(bom_file.name)
# Remove the leading . from the extension
ext = ext[1:]
accepted_file_types = [
'xls', 'xlsx',
'csv', 'tsv',
'xml',
]
if ext not in accepted_file_types:
raise serializers.ValidationError(_("Unsupported file type"))
# Impose a 50MB limit on uploaded BOM files
max_upload_file_size = 50 * 1024 * 1024
if bom_file.size > max_upload_file_size:
raise serializers.ValidationError(_("File is too large"))
# Read file data into memory (bytes object)
try:
data = bom_file.read()
except Exception as e:
raise serializers.ValidationError(str(e))
if ext in ['csv', 'tsv', 'xml']:
try:
data = data.decode()
except Exception as e:
raise serializers.ValidationError(str(e))
# Convert to a tablib dataset (we expect headers)
try:
self.dataset = tablib.Dataset().load(data, ext, headers=True)
except Exception as e:
raise serializers.ValidationError(str(e))
for header in self.REQUIRED_COLUMNS: for header in self.REQUIRED_COLUMNS:
match = self.find_matching_column(header, self.dataset.headers) match = self.find_matching_column(header, self.dataset.headers)
@ -861,11 +769,9 @@ class BomExtractSerializer(serializers.Serializer):
raise serializers.ValidationError(_("No data rows found")) raise serializers.ValidationError(_("No data rows found"))
return bom_file return bom_file
"""
def extract_data(self): def dextract_data(self):
"""
Read individual rows out of the BOM file
"""
rows = [] rows = []
errors = [] errors = []
@ -880,9 +786,9 @@ class BomExtractSerializer(serializers.Serializer):
row_error = {} row_error = {}
"""
If the "level" column is specified, and this is not a top-level BOM item, ignore the row! # If the "level" column is specified, and this is not a top-level BOM item, ignore the row!
"""
if level_column is not None: if level_column is not None:
level = row.get('level', None) level = row.get('level', None)
@ -989,15 +895,19 @@ class BomExtractSerializer(serializers.Serializer):
'filename': self.filename, 'filename': self.filename,
} }
"""
part = serializers.PrimaryKeyRelatedField(queryset=Part.objects.filter(assembly=True), required=True) part = serializers.PrimaryKeyRelatedField(queryset=Part.objects.filter(assembly=True), required=True)
clear_existing = serializers.BooleanField( clear_existing = serializers.BooleanField(
label=_("Clear Existing BOM"), label=_("Clear Existing BOM"),
help_text=_("Delete existing BOM data first"), help_text=_("Delete existing BOM data first"),
) )
"""
def save(self): def save(self):
...
"""
data = self.validated_data data = self.validated_data
master_part = data['part'] master_part = data['part']
@ -1006,7 +916,15 @@ class BomExtractSerializer(serializers.Serializer):
if clear_existing: if clear_existing:
# Remove all existing BOM items # Remove all existing BOM items
master_part.bom_items.all().delete() $ master_part.bom_items.all().delete()
"""
class BomFileExtractSerializer(DataFileExtractSerializer):
"""
"""
TARGET_MODEL = BomItem
class BomUploadSerializer(serializers.Serializer): class BomUploadSerializer(serializers.Serializer):