From e1f0f4f0cf55363a69e7174ea99a40be39dcb522 Mon Sep 17 00:00:00 2001 From: Justin Swanson Date: Tue, 11 Aug 2020 08:41:30 -0500 Subject: [PATCH] Refactor BSAReader to a lazier overlay style --- Compression.BSA/BSA/Reader/BSAReader.cs | 55 ++++-- Compression.BSA/BSA/Reader/FileNameBlock.cs | 42 +++++ Compression.BSA/BSA/Reader/FileRecord.cs | 183 +++++++++++--------- Compression.BSA/BSA/Reader/FolderRecord.cs | 89 +++++++--- Compression.BSA/Utils.cs | 16 +- Wabbajack.Common/Extensions/StreamExt.cs | 15 ++ Wabbajack.Common/Paths/RelativePath.cs | 7 +- Wabbajack.sln | 4 + 8 files changed, 291 insertions(+), 120 deletions(-) create mode 100644 Compression.BSA/BSA/Reader/FileNameBlock.cs create mode 100644 Wabbajack.Common/Extensions/StreamExt.cs diff --git a/Compression.BSA/BSA/Reader/BSAReader.cs b/Compression.BSA/BSA/Reader/BSAReader.cs index b52393cb..f96e0ffd 100644 --- a/Compression.BSA/BSA/Reader/BSAReader.cs +++ b/Compression.BSA/BSA/Reader/BSAReader.cs @@ -11,11 +11,13 @@ namespace Compression.BSA { public class BSAReader : IBSAReader { + public const int HeaderLength = 0x24; + internal uint _fileCount; internal AbsolutePath _fileName; internal uint _folderCount; internal uint _folderRecordOffset; - private List _folders; + private Lazy _folders; internal string _magic; internal uint _totalFileNameLength; internal uint _totalFolderNameLength; @@ -30,8 +32,8 @@ namespace Compression.BSA { get { - foreach (var folder in _folders) - foreach (var file in folder._files) + foreach (var folder in _folders.Value) + foreach (var file in folder._files.Value) yield return file; } } @@ -79,13 +81,17 @@ namespace Compression.BSA public static BSAReader Load(AbsolutePath filename) { - using var stream = File.Open(filename.ToString(), FileMode.Open, FileAccess.Read, FileShare.Read); - using var br = new BinaryReader(stream); var bsa = new BSAReader { _fileName = filename }; - bsa.LoadHeaders(br); + using var rdr = bsa.GetStream(); + bsa.LoadHeaders(rdr); return bsa; } + internal BinaryReader GetStream() + { + return new BinaryReader(File.Open(_fileName.ToString(), FileMode.Open, FileAccess.Read, FileShare.Read)); + } + private void LoadHeaders(BinaryReader rdr) { var fourcc = Encoding.ASCII.GetString(rdr.ReadBytes(4)); @@ -103,21 +109,40 @@ namespace Compression.BSA _totalFileNameLength = rdr.ReadUInt32(); FileFlags = (FileFlags)rdr.ReadUInt32(); - LoadFolderRecords(rdr); + _folders = new Lazy( + isThreadSafe: true, + valueFactory: () => LoadFolderRecords()); } - private void LoadFolderRecords(BinaryReader rdr) + private FolderRecord[] LoadFolderRecords() { - _folders = new List(); + using var rdr = GetStream(); + rdr.BaseStream.Position = _folderRecordOffset; + var folderHeaderLength = FolderRecord.HeaderLength(HeaderType); + ReadOnlyMemorySlice folderHeaderData = rdr.ReadBytes(checked((int)(folderHeaderLength * _folderCount))); + + var ret = new FolderRecord[_folderCount]; for (var idx = 0; idx < _folderCount; idx += 1) - _folders.Add(new FolderRecord(this, rdr)); + ret[idx] = new FolderRecord(this, folderHeaderData.Slice(idx * folderHeaderLength, folderHeaderLength), idx); - foreach (var folder in _folders) - folder.LoadFileRecordBlock(this, rdr); + // Slice off appropriate file header data per folder + int fileCountTally = 0; + foreach (var folder in ret) + { + folder.ProcessFileRecordHeadersBlock(rdr, fileCountTally); + fileCountTally = checked((int)(fileCountTally + folder.FileCount)); + } - foreach (var folder in _folders) - foreach (var file in folder._files) - file.LoadFileRecord(this, folder, file, rdr); + if (HasFileNames) + { + var filenameBlock = new FileNameBlock(this, rdr.BaseStream.Position); + foreach (var folder in ret) + { + folder.FileNameBlock = filenameBlock; + } + } + + return ret; } } } diff --git a/Compression.BSA/BSA/Reader/FileNameBlock.cs b/Compression.BSA/BSA/Reader/FileNameBlock.cs new file mode 100644 index 00000000..c5d6f0e6 --- /dev/null +++ b/Compression.BSA/BSA/Reader/FileNameBlock.cs @@ -0,0 +1,42 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using Wabbajack.Common; + +namespace Compression.BSA +{ + internal class FileNameBlock + { + public readonly Lazy[]> Names; + + public FileNameBlock(BSAReader bsa, long position) + { + Names = new Lazy[]>( + mode: System.Threading.LazyThreadSafetyMode.ExecutionAndPublication, + valueFactory: () => + { + using var stream = bsa.GetStream(); + stream.BaseStream.Position = position; + ReadOnlyMemorySlice data = stream.ReadBytes(checked((int)bsa._totalFileNameLength)); + ReadOnlyMemorySlice[] names = new ReadOnlyMemorySlice[bsa._fileCount]; + for (int i = 0; i < bsa._fileCount; i++) + { + var index = data.Span.IndexOf(default(byte)); + if (index == -1) + { + throw new InvalidDataException("Did not end all of its strings in null bytes"); + } + names[i] = data.Slice(0, index + 1); + var str = names[i].ReadStringTerm(bsa.HeaderType); + data = data.Slice(index + 1); + } + if (data.Length > 0) + { + throw new InvalidDataException("File name block did not parse all of its data"); + } + return names; + }); + } + } +} diff --git a/Compression.BSA/BSA/Reader/FileRecord.cs b/Compression.BSA/BSA/Reader/FileRecord.cs index 1e5050ac..d6e2b7bf 100644 --- a/Compression.BSA/BSA/Reader/FileRecord.cs +++ b/Compression.BSA/BSA/Reader/FileRecord.cs @@ -1,6 +1,8 @@ using System; +using System.Buffers.Binary; using System.Collections.Generic; using System.IO; +using System.Runtime.Versioning; using System.Text; using System.Threading.Tasks; using ICSharpCode.SharpZipLib.Zip.Compression.Streams; @@ -12,111 +14,86 @@ namespace Compression.BSA { public class FileRecord : IFile { - private readonly BSAReader _bsa; - private readonly long _dataOffset; - private string _name; - private readonly string _nameBlob; - private readonly uint _offset; - private readonly uint _onDiskSize; - private readonly uint _originalSize; - private readonly uint _size; + public const int HeaderLength = 0x10; + + private readonly ReadOnlyMemorySlice _headerData; internal readonly int _index; + internal readonly int _overallIndex; + internal readonly FileNameBlock _nameBlock; + internal readonly Lazy _name; + internal Lazy<(uint Size, uint OnDisk, uint Original)> _size; - public uint Size { get; } + public ulong Hash => BinaryPrimitives.ReadUInt64LittleEndian(_headerData); + protected uint RawSize => BinaryPrimitives.ReadUInt32LittleEndian(_headerData.Slice(0x8)); + public uint Offset => BinaryPrimitives.ReadUInt32LittleEndian(_headerData.Slice(0xC)); + public string Name => _name.Value; + public uint Size => _size.Value.Size; - public ulong Hash { get; } + public bool FlipCompression => (RawSize & (0x1 << 30)) > 0; - public FolderRecord Folder { get; } + internal FolderRecord Folder { get; } + internal BSAReader BSA => Folder.BSA; - public bool FlipCompression { get; } - - public FileRecord(BSAReader bsa, FolderRecord folderRecord, BinaryReader src, int index) + internal FileRecord( + FolderRecord folderRecord, + ReadOnlyMemorySlice data, + int index, + int overallIndex, + FileNameBlock nameBlock) { _index = index; - _bsa = bsa; - Hash = src.ReadUInt64(); - var size = src.ReadUInt32(); - FlipCompression = (size & (0x1 << 30)) > 0; - - if (FlipCompression) - _size = size ^ (0x1 << 30); - else - _size = size; - - if (Compressed) - _size -= 4; - - _offset = src.ReadUInt32(); + _overallIndex = overallIndex; + _headerData = data; + _nameBlock = nameBlock; Folder = folderRecord; + _name = new Lazy(GetName, System.Threading.LazyThreadSafetyMode.PublicationOnly); - var old_pos = src.BaseStream.Position; - - src.BaseStream.Position = _offset; - - if (bsa.HasNameBlobs) - _nameBlob = src.ReadStringLenNoTerm(bsa.HeaderType); - - - if (Compressed) - _originalSize = src.ReadUInt32(); - - _onDiskSize = (uint)(_size - (_nameBlob == null ? 0 : _nameBlob.Length + 1)); - - if (Compressed) - { - Size = _originalSize; - _onDiskSize -= 4; - } - else - { - Size = _onDiskSize; - } - - _dataOffset = src.BaseStream.Position; - - src.BaseStream.Position = old_pos; + // Will be replaced if CopyDataTo is called before value is created + _size = new Lazy<(uint Size, uint OnDisk, uint Original)>( + mode: System.Threading.LazyThreadSafetyMode.ExecutionAndPublication, + valueFactory: () => + { + using var rdr = BSA.GetStream(); + rdr.BaseStream.Position = Offset; + return ReadSize(rdr); + }); } - public RelativePath Path - { - get - { - return string.IsNullOrEmpty(Folder.Name) ? new RelativePath(_name) : new RelativePath(Folder.Name + "\\" + _name); - } - } + public RelativePath Path => new RelativePath(string.IsNullOrEmpty(Folder.Name) ? Name : Folder.Name + "\\" + Name, skipValidation: true); public bool Compressed { get { - if (FlipCompression) return !_bsa.CompressedByDefault; - return _bsa.CompressedByDefault; + if (FlipCompression) return !BSA.CompressedByDefault; + return BSA.CompressedByDefault; } } public FileStateObject State => new BSAFileStateObject(this); - internal void LoadFileRecord(BSAReader bsaReader, FolderRecord folder, FileRecord file, BinaryReader rdr) - { - _name = rdr.ReadStringTerm(_bsa.HeaderType); - } - public async ValueTask CopyDataTo(Stream output) { - await using var in_file = await _bsa._fileName.OpenRead().ConfigureAwait(false); + await using var in_file = await BSA._fileName.OpenRead().ConfigureAwait(false); using var rdr = new BinaryReader(in_file); - rdr.BaseStream.Position = _dataOffset; + rdr.BaseStream.Position = Offset; - if (_bsa.HeaderType == VersionType.SSE) + (uint Size, uint OnDisk, uint Original) size = ReadSize(rdr); + if (!_size.IsValueCreated) + { + _size = new Lazy<(uint Size, uint OnDisk, uint Original)>(value: size); + } + + if (BSA.HeaderType == VersionType.SSE) { if (Compressed) { using var r = LZ4Stream.Decode(rdr.BaseStream); - await r.CopyToLimitAsync(output, (int)_originalSize).ConfigureAwait(false); + await r.CopyToLimitAsync(output, size.Original).ConfigureAwait(false); } else { - await rdr.BaseStream.CopyToLimitAsync(output, (int)_onDiskSize).ConfigureAwait(false); + await rdr.BaseStream.CopyToLimitAsync(output, size.OnDisk).ConfigureAwait(false); } } else @@ -124,20 +101,66 @@ namespace Compression.BSA if (Compressed) { await using var z = new InflaterInputStream(rdr.BaseStream); - await z.CopyToLimitAsync(output, (int)_originalSize).ConfigureAwait(false); + await z.CopyToLimitAsync(output, size.Original).ConfigureAwait(false); } else - await rdr.BaseStream.CopyToLimitAsync(output, (int)_onDiskSize).ConfigureAwait(false); + await rdr.BaseStream.CopyToLimitAsync(output, size.OnDisk).ConfigureAwait(false); + } + } + + private string GetName() + { + var names = _nameBlock.Names.Value; + return names[_overallIndex].ReadStringTerm(BSA.HeaderType); + } + + private (uint Size, uint OnDisk, uint Original) ReadSize(BinaryReader rdr) + { + uint size = RawSize; + if (FlipCompression) + size = size ^ (0x1 << 30); + + if (Compressed) + size -= 4; + + byte nameBlobOffset; + if (BSA.HasNameBlobs) + { + nameBlobOffset = rdr.ReadByte(); + // Just skip, not using + rdr.BaseStream.Position += nameBlobOffset; + } + else + { + nameBlobOffset = 0; + } + + uint originalSize; + if (Compressed) + { + originalSize = rdr.ReadUInt32(); + } + else + { + originalSize = 0; + } + + uint onDiskSize = size - nameBlobOffset; + if (Compressed) + { + return (Size: originalSize, OnDisk: onDiskSize, Original: originalSize); + } + else + { + return (Size: onDiskSize, OnDisk: onDiskSize, Original: originalSize); } } public void Dump(Action print) { - print($"Name: {_name}"); - print($"Offset: {_offset}"); - print($"On Disk Size: {_onDiskSize}"); - print($"Original Size: {_originalSize}"); - print($"Size: {_size}"); + print($"Name: {Name}"); + print($"Offset: {Offset}"); + print($"Raw Size: {RawSize}"); print($"Index: {_index}"); } } diff --git a/Compression.BSA/BSA/Reader/FolderRecord.cs b/Compression.BSA/BSA/Reader/FolderRecord.cs index c98681a4..1cc64010 100644 --- a/Compression.BSA/BSA/Reader/FolderRecord.cs +++ b/Compression.BSA/BSA/Reader/FolderRecord.cs @@ -1,44 +1,93 @@ using System; +using System.Buffers.Binary; using System.Collections.Generic; using System.IO; using System.Text; +using NativeImport; +using Wabbajack.Common; using File = Alphaleonis.Win32.Filesystem.File; namespace Compression.BSA { public class FolderRecord { - private readonly uint _fileCount; - internal List _files; - private ulong _offset; - private uint _unk; + internal readonly BSAReader BSA; + private readonly ReadOnlyMemorySlice _data; + internal Lazy _files; + private ReadOnlyMemorySlice? _nameData; + private int _prevFileCount; + internal FileNameBlock FileNameBlock; + private readonly Lazy _name; - public string Name { get; private set; } + public int Index { get; } + public string Name => _name.Value; - public ulong Hash { get; } - - internal FolderRecord(BSAReader bsa, BinaryReader src) + internal FolderRecord(BSAReader bsa, ReadOnlyMemorySlice data, int index) { - Hash = src.ReadUInt64(); - _fileCount = src.ReadUInt32(); - if (bsa.HeaderType == VersionType.SSE) + BSA = bsa; + _data = data; + Index = index; + _name = new Lazy( + () => _nameData.HasValue ? _nameData.Value.ReadStringTerm(BSA.HeaderType) : string.Empty, + isThreadSafe: true); + } + + private bool IsLongform => BSA.HeaderType == VersionType.SSE; + + public ulong Hash => BinaryPrimitives.ReadUInt64LittleEndian(_data); + + public uint FileCount => BinaryPrimitives.ReadUInt32LittleEndian(_data.Slice(0x8)); + + public uint Unknown => IsLongform ? + BinaryPrimitives.ReadUInt32LittleEndian(_data.Slice(0xC)) : + 0; + + public ulong Offset => IsLongform ? + BinaryPrimitives.ReadUInt64LittleEndian(_data.Slice(0x10)) : + BinaryPrimitives.ReadUInt32LittleEndian(_data.Slice(0xC)); + + public static int HeaderLength(VersionType version) + { + return version switch { - _unk = src.ReadUInt32(); - _offset = src.ReadUInt64(); + VersionType.SSE => 0x18, + _ => 0x10, + }; + } + + internal void ProcessFileRecordHeadersBlock(BinaryReader rdr, int fileCountTally) + { + _prevFileCount = fileCountTally; + var totalFileLen = checked((int)(FileCount * FileRecord.HeaderLength)); + + ReadOnlyMemorySlice data; + if (BSA.HasFolderNames) + { + var len = rdr.ReadByte(); + data = rdr.ReadBytes(len + totalFileLen); + _nameData = data.Slice(0, len); + data = data.Slice(len); } else { - _offset = src.ReadUInt32(); + data = rdr.ReadBytes(totalFileLen); } + + _files = new Lazy( + isThreadSafe: true, + valueFactory: () => ParseFileRecords(data)); } - internal void LoadFileRecordBlock(BSAReader bsa, BinaryReader src) + private FileRecord[] ParseFileRecords(ReadOnlyMemorySlice data) { - if (bsa.HasFolderNames) Name = src.ReadStringLen(bsa.HeaderType); - - _files = new List(); - for (var idx = 0; idx < _fileCount; idx += 1) - _files.Add(new FileRecord(bsa, this, src, idx)); + var fileCount = FileCount; + var ret = new FileRecord[fileCount]; + for (var idx = 0; idx < fileCount; idx += 1) + { + var fileData = data.Slice(idx * FileRecord.HeaderLength, FileRecord.HeaderLength); + ret[idx] = new FileRecord(this, fileData, idx, idx + _prevFileCount, FileNameBlock); + } + return ret; } } } diff --git a/Compression.BSA/Utils.cs b/Compression.BSA/Utils.cs index 32893261..a718ff16 100644 --- a/Compression.BSA/Utils.cs +++ b/Compression.BSA/Utils.cs @@ -30,9 +30,7 @@ namespace Compression.BSA public static string ReadStringLen(this BinaryReader rdr, VersionType version) { var len = rdr.ReadByte(); - if (len == 0) - //rdr.ReadByte(); - return ""; + if (len == 0) return string.Empty; var bytes = rdr.ReadBytes(len - 1); rdr.ReadByte(); @@ -61,6 +59,18 @@ namespace Compression.BSA return GetEncoding(version).GetString(acc.ToArray()); } + public static string ReadStringLenTerm(this ReadOnlyMemorySlice bytes, VersionType version) + { + if (bytes.Length <= 1) return string.Empty; + return GetEncoding(version).GetString(bytes.Slice(1, bytes[0])); + } + + public static string ReadStringTerm(this ReadOnlyMemorySlice bytes, VersionType version) + { + if (bytes.Length <= 1) return string.Empty; + return GetEncoding(version).GetString(bytes[0..^1]); + } + /// /// Returns bytes for a \0 terminated string /// diff --git a/Wabbajack.Common/Extensions/StreamExt.cs b/Wabbajack.Common/Extensions/StreamExt.cs new file mode 100644 index 00000000..dbb1b612 --- /dev/null +++ b/Wabbajack.Common/Extensions/StreamExt.cs @@ -0,0 +1,15 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace Wabbajack.Common +{ + public static class StreamExt + { + public static long Remaining(this Stream stream) + { + return stream.Length - stream.Position; + } + } +} diff --git a/Wabbajack.Common/Paths/RelativePath.cs b/Wabbajack.Common/Paths/RelativePath.cs index d4c9ba6f..21aa1238 100644 --- a/Wabbajack.Common/Paths/RelativePath.cs +++ b/Wabbajack.Common/Paths/RelativePath.cs @@ -13,7 +13,7 @@ namespace Wabbajack.Common private readonly string? _nullable_path; private string _path => _nullable_path ?? string.Empty; - public RelativePath(string path) + public RelativePath(string path, bool skipValidation = false) { if (string.IsNullOrWhiteSpace(path)) { @@ -28,7 +28,10 @@ namespace Wabbajack.Common } _nullable_path = trimmed; - Validate(); + if (!skipValidation) + { + Validate(); + } } public override string ToString() diff --git a/Wabbajack.sln b/Wabbajack.sln index 745caef4..bcd18b18 100644 --- a/Wabbajack.sln +++ b/Wabbajack.sln @@ -57,6 +57,7 @@ Global {B3F3FB6E-B9EB-4F49-9875-D78578BC7AE5}.Debug|x64.ActiveCfg = Debug|x64 {B3F3FB6E-B9EB-4F49-9875-D78578BC7AE5}.Debug|x64.Build.0 = Debug|x64 {B3F3FB6E-B9EB-4F49-9875-D78578BC7AE5}.Release|Any CPU.ActiveCfg = Release|x64 + {B3F3FB6E-B9EB-4F49-9875-D78578BC7AE5}.Release|Any CPU.Build.0 = Release|x64 {B3F3FB6E-B9EB-4F49-9875-D78578BC7AE5}.Release|x64.ActiveCfg = Release|x64 {B3F3FB6E-B9EB-4F49-9875-D78578BC7AE5}.Release|x64.Build.0 = Release|x64 {FF5D892F-8FF4-44FC-8F7F-CD58F307AD1B}.Debug|Any CPU.ActiveCfg = Debug|x64 @@ -64,6 +65,7 @@ Global {FF5D892F-8FF4-44FC-8F7F-CD58F307AD1B}.Debug|x64.ActiveCfg = Debug|x64 {FF5D892F-8FF4-44FC-8F7F-CD58F307AD1B}.Debug|x64.Build.0 = Debug|x64 {FF5D892F-8FF4-44FC-8F7F-CD58F307AD1B}.Release|Any CPU.ActiveCfg = Release|x64 + {FF5D892F-8FF4-44FC-8F7F-CD58F307AD1B}.Release|Any CPU.Build.0 = Release|x64 {FF5D892F-8FF4-44FC-8F7F-CD58F307AD1B}.Release|x64.ActiveCfg = Release|x64 {FF5D892F-8FF4-44FC-8F7F-CD58F307AD1B}.Release|x64.Build.0 = Release|x64 {0A820830-A298-497D-85E0-E9A89EFEF5FE}.Debug|Any CPU.ActiveCfg = Debug|x64 @@ -99,6 +101,7 @@ Global {89281BA1-67C8-48D2-9D6E-0F5CC85AD8C9}.Debug|x64.ActiveCfg = Debug|x64 {89281BA1-67C8-48D2-9D6E-0F5CC85AD8C9}.Debug|x64.Build.0 = Debug|x64 {89281BA1-67C8-48D2-9D6E-0F5CC85AD8C9}.Release|Any CPU.ActiveCfg = Release|x64 + {89281BA1-67C8-48D2-9D6E-0F5CC85AD8C9}.Release|Any CPU.Build.0 = Release|x64 {89281BA1-67C8-48D2-9D6E-0F5CC85AD8C9}.Release|x64.ActiveCfg = Release|x64 {89281BA1-67C8-48D2-9D6E-0F5CC85AD8C9}.Release|x64.Build.0 = Release|x64 {F72C17EC-0881-4455-8B0E-E1CC4FFD642E}.Debug|Any CPU.ActiveCfg = Debug|x64 @@ -120,6 +123,7 @@ Global {685D8BB1-D178-4D2C-85C7-C54A36FB7454}.Debug|x64.ActiveCfg = Debug|x64 {685D8BB1-D178-4D2C-85C7-C54A36FB7454}.Debug|x64.Build.0 = Debug|x64 {685D8BB1-D178-4D2C-85C7-C54A36FB7454}.Release|Any CPU.ActiveCfg = Release|x64 + {685D8BB1-D178-4D2C-85C7-C54A36FB7454}.Release|Any CPU.Build.0 = Release|x64 {685D8BB1-D178-4D2C-85C7-C54A36FB7454}.Release|x64.ActiveCfg = Release|x64 {685D8BB1-D178-4D2C-85C7-C54A36FB7454}.Release|x64.Build.0 = Release|x64 {D6856DBF-C959-4867-A8A8-343DA2D2715E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU