Rework performance profiles of WJ

This commit is contained in:
Timothy Baldridge
2020-09-12 14:23:03 -06:00
parent 8f4a090a03
commit b6dbcc2368
17 changed files with 299 additions and 209 deletions

View File

@ -42,6 +42,8 @@ namespace Wabbajack.VirtualFileSystem
public WorkQueue Queue { get; }
public bool UseExtendedHashes { get; set; }
public bool FavorPerfOverRAM { get; set; }
public Context(WorkQueue queue, bool extendedHashes = false)
{
@ -234,6 +236,7 @@ namespace Wabbajack.VirtualFileSystem
private List<HashRelativePath> _knownFiles = new List<HashRelativePath>();
private Dictionary<Hash, AbsolutePath> _knownArchives = new Dictionary<Hash, AbsolutePath>();
public void AddKnown(IEnumerable<HashRelativePath> known, Dictionary<Hash, AbsolutePath> archives)
{
_knownFiles.AddRange(known);

View File

@ -26,6 +26,12 @@ namespace Wabbajack.VirtualFileSystem
Definitions.FileType._7Z);
private static Extension OMODExtension = new Extension(".omod");
/// <summary>
/// When true, will allow 7z to use multiple threads and cache more data in memory, potentially
/// using many GB of RAM during extraction but vastly reducing extraction times in the process.
/// </summary>
public static bool FavorPerfOverRAM { get; set; }
public static async Task<Dictionary<RelativePath, T>> GatheringExtract<T>(IStreamFactory sFn,

View File

@ -18,10 +18,10 @@ namespace Wabbajack.VirtualFileSystem
private Predicate<RelativePath> _shouldExtract;
private Func<RelativePath, IStreamFactory, ValueTask<T>> _mapFn;
private Dictionary<RelativePath, T> _results;
private Dictionary<uint, (RelativePath, ulong)> _indexes;
private Stream _stream;
private Definitions.FileType _sig;
private Exception _killException;
private uint _itemsCount;
public GatheringExtractor(Stream stream, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IStreamFactory, ValueTask<T>> mapfn)
{
@ -41,14 +41,9 @@ namespace Wabbajack.VirtualFileSystem
try
{
_archive = ArchiveFile.Open(_stream, _sig).Result;
_indexes = _archive.Entries
.Select((entry, idx) => (entry, (uint)idx))
.Where(f => !f.entry.IsFolder)
.Select(t => ((RelativePath)t.entry.FileName, t.Item2, t.entry.Size))
.Where(t => _shouldExtract(t.Item1))
.ToDictionary(t => t.Item2, t => (t.Item1, t.Size));
ulong checkPos = 1024 * 32;
_archive._archive.Open(_archive._archiveStream, ref checkPos, null);
_itemsCount = _archive._archive.GetNumberOfItems();
_archive._archive.Extract(null, 0xFFFFFFFF, 0, this);
_archive.Dispose();
if (_killException != null)
@ -86,21 +81,22 @@ namespace Wabbajack.VirtualFileSystem
public int GetStream(uint index, out ISequentialOutStream outStream, AskMode askExtractMode)
{
if (_indexes.ContainsKey(index))
var entry = _archive.GetEntry(index);
var path = (RelativePath)entry.FileName;
if (!_shouldExtract(path))
{
var path = _indexes[index].Item1;
Utils.Status($"Extracting {path}", Percent.FactoryPutInRange(_results.Count, _indexes.Count));
// Empty files are never extracted via a write call, so we have to fake that now
if (_indexes[index].Item2 == 0)
{
var result = _mapFn(path, new MemoryStreamFactory(new MemoryStream(), path)).Result;
_results.Add(path, result);
}
outStream = new GatheringExtractorStream<T>(this, index);
outStream = null;
return 0;
}
outStream = null;
Utils.Status($"Extracting {path}", Percent.FactoryPutInRange(_results.Count, _itemsCount));
// Empty files are never extracted via a write call, so we have to fake that now
if (entry.Size == 0)
{
var result = _mapFn(path, new MemoryStreamFactory(new MemoryStream(), path)).Result;
_results.Add(path, result);
}
outStream = new GatheringExtractorStream<T>(this, entry, path);
return 0;
}
@ -117,25 +113,23 @@ namespace Wabbajack.VirtualFileSystem
private class GatheringExtractorStream<T> : ISequentialOutStream, IOutStream
{
private GatheringExtractor<T> _extractor;
private uint _index;
private bool _written;
private ulong _totalSize;
private Stream _tmpStream;
private TempFile _tmpFile;
private IStreamFactory _factory;
private bool _diskCached;
private RelativePath _path;
public GatheringExtractorStream(GatheringExtractor<T> extractor, uint index)
public GatheringExtractorStream(GatheringExtractor<T> extractor, Entry entry, RelativePath path)
{
_path = path;
_extractor = extractor;
_index = index;
_totalSize = extractor._indexes[index].Item2;
_diskCached = _totalSize >= 500_000_000;
_totalSize = entry.Size;
_diskCached = _totalSize >= int.MaxValue - 1024;
}
private IPath GetPath()
{
return _extractor._indexes[_index].Item1;
return _path;
}
public int Write(byte[] data, uint size, IntPtr processedSize)
@ -167,7 +161,7 @@ namespace Wabbajack.VirtualFileSystem
private void WriteSingleCall(byte[] data, in uint size)
{
var result = _extractor._mapFn(_extractor._indexes[_index].Item1, new MemoryBufferFactory(data, (int)size, GetPath())).Result;
var result = _extractor._mapFn(_path, new MemoryBufferFactory(data, (int)size, GetPath())).Result;
AddResult(result);
Cleanup();
}
@ -180,7 +174,7 @@ namespace Wabbajack.VirtualFileSystem
private void AddResult(T result)
{
_extractor._results.Add(_extractor._indexes[_index].Item1, result);
_extractor._results.Add(_path, result);
}
private void WriteMemoryCached(byte[] data, in uint size)
@ -193,7 +187,7 @@ namespace Wabbajack.VirtualFileSystem
_tmpStream.Flush();
_tmpStream.Position = 0;
var result = _extractor._mapFn(_extractor._indexes[_index].Item1, new MemoryStreamFactory((MemoryStream)_tmpStream, GetPath())).Result;
var result = _extractor._mapFn(_path, new MemoryStreamFactory((MemoryStream)_tmpStream, GetPath())).Result;
AddResult(result);
Cleanup();
}
@ -213,7 +207,7 @@ namespace Wabbajack.VirtualFileSystem
_tmpStream.Flush();
_tmpStream.Close();
var result = _extractor._mapFn(_extractor._indexes[_index].Item1, new NativeFileStreamFactory(_tmpFile.Path, GetPath())).Result;
var result = _extractor._mapFn(_path, new NativeFileStreamFactory(_tmpFile.Path, GetPath())).Result;
AddResult(result);
Cleanup();
}

View File

@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
@ -13,21 +14,123 @@ namespace Wabbajack.VirtualFileSystem.SevenZipExtractor
{
private SevenZipHandle _sevenZipHandle;
internal IInArchive _archive;
private InStreamWrapper _archiveStream;
public InStreamWrapper _archiveStream;
private IList<Entry> _entries;
private static readonly AbsolutePath LibraryFilePath = @"Extractors\7z.dll".RelativeTo(AbsolutePath.EntryPoint);
private static SignatureChecker _checker = new SignatureChecker(Formats.FileTypeGuidMapping.Keys.ToArray());
public static async Task<ArchiveFile> Open(Stream archiveStream, Definitions.FileType format)
{
var self = new ArchiveFile();
self.InitializeAndValidateLibrary();
self._archive = self._sevenZipHandle.CreateInArchive(Formats.FileTypeGuidMapping[format]);
if (!FileExtractor2.FavorPerfOverRAM)
{
self.SetCompressionProperties(new Dictionary<string, string>() {{"mt", "off"}});
}
self._archiveStream = new InStreamWrapper(archiveStream);
return self;
}
/// <summary>
/// Sets the compression properties
/// </summary>
private void SetCompressionProperties(Dictionary<string, string> CustomParameters)
{
{
ISetProperties setter;
try
{
setter = (ISetProperties)_archive;
}
catch (InvalidCastException _)
{
return;
}
var names = new List<IntPtr>(1 + CustomParameters.Count);
var values = new List<PropVariant>(1 + CustomParameters.Count);
//var sp = new SecurityPermission(SecurityPermissionFlag.UnmanagedCode);
//sp.Demand();
#region Initialize compression properties
names.Add(Marshal.StringToBSTR("x"));
values.Add(new PropVariant());
foreach (var pair in CustomParameters)
{
names.Add(Marshal.StringToBSTR(pair.Key));
var pv = new PropVariant();
#region List of parameters to cast as integers
var integerParameters = new HashSet<string>
{
"fb",
"pass",
"o",
"yx",
"a",
"mc",
"lc",
"lp",
"pb",
"cp"
};
#endregion
if (integerParameters.Contains(pair.Key))
{
pv.VarType = VarEnum.VT_UI4;
pv.UInt32Value = Convert.ToUInt32(pair.Value, CultureInfo.InvariantCulture);
}
else
{
pv.VarType = VarEnum.VT_BSTR;
pv.pointerValue = Marshal.StringToBSTR(pair.Value);
}
values.Add(pv);
}
#endregion
#region Set compression level
var clpv = values[0];
clpv.VarType = VarEnum.VT_UI4;
clpv.UInt32Value = 0;
values[0] = clpv;
#endregion
var namesHandle = GCHandle.Alloc(names.ToArray(), GCHandleType.Pinned);
var valuesHandle = GCHandle.Alloc(values.ToArray(), GCHandleType.Pinned);
try
{
setter?.SetProperties(namesHandle.AddrOfPinnedObject(), valuesHandle.AddrOfPinnedObject(),
names.Count);
}
finally
{
namesHandle.Free();
valuesHandle.Free();
}
}
}
public IList<Entry> Entries
{
get
@ -51,47 +154,54 @@ namespace Wabbajack.VirtualFileSystem.SevenZipExtractor
for (uint fileIndex = 0; fileIndex < itemsCount; fileIndex++)
{
string fileName = this.GetProperty<string>(fileIndex, ItemPropId.kpidPath);
bool isFolder = this.GetProperty<bool>(fileIndex, ItemPropId.kpidIsFolder);
bool isEncrypted = this.GetProperty<bool>(fileIndex, ItemPropId.kpidEncrypted);
ulong size = this.GetProperty<ulong>(fileIndex, ItemPropId.kpidSize);
ulong packedSize = this.GetProperty<ulong>(fileIndex, ItemPropId.kpidPackedSize);
DateTime creationTime = this.GetPropertySafe<DateTime>(fileIndex, ItemPropId.kpidCreationTime);
DateTime lastWriteTime = this.GetPropertySafe<DateTime>(fileIndex, ItemPropId.kpidLastWriteTime);
DateTime lastAccessTime = this.GetPropertySafe<DateTime>(fileIndex, ItemPropId.kpidLastAccessTime);
uint crc = this.GetPropertySafe<uint>(fileIndex, ItemPropId.kpidCRC);
uint attributes = this.GetPropertySafe<uint>(fileIndex, ItemPropId.kpidAttributes);
string comment = this.GetPropertySafe<string>(fileIndex, ItemPropId.kpidComment);
string hostOS = this.GetPropertySafe<string>(fileIndex, ItemPropId.kpidHostOS);
string method = this.GetPropertySafe<string>(fileIndex, ItemPropId.kpidMethod);
bool isSplitBefore = this.GetPropertySafe<bool>(fileIndex, ItemPropId.kpidSplitBefore);
bool isSplitAfter = this.GetPropertySafe<bool>(fileIndex, ItemPropId.kpidSplitAfter);
this._entries.Add(new Entry(this._archive, fileIndex)
{
FileName = fileName,
IsFolder = isFolder,
IsEncrypted = isEncrypted,
Size = size,
PackedSize = packedSize,
CreationTime = creationTime,
LastWriteTime = lastWriteTime,
LastAccessTime = lastAccessTime,
CRC = crc,
Attributes = attributes,
Comment = comment,
HostOS = hostOS,
Method = method,
IsSplitBefore = isSplitBefore,
IsSplitAfter = isSplitAfter
});
var entry = GetEntry(fileIndex);
this._entries.Add(entry);
}
return this._entries;
}
}
internal Entry GetEntry(uint fileIndex)
{
string fileName = this.GetProperty<string>(fileIndex, ItemPropId.kpidPath);
bool isFolder = this.GetProperty<bool>(fileIndex, ItemPropId.kpidIsFolder);
bool isEncrypted = this.GetProperty<bool>(fileIndex, ItemPropId.kpidEncrypted);
ulong size = this.GetProperty<ulong>(fileIndex, ItemPropId.kpidSize);
ulong packedSize = this.GetProperty<ulong>(fileIndex, ItemPropId.kpidPackedSize);
DateTime creationTime = this.GetPropertySafe<DateTime>(fileIndex, ItemPropId.kpidCreationTime);
DateTime lastWriteTime = this.GetPropertySafe<DateTime>(fileIndex, ItemPropId.kpidLastWriteTime);
DateTime lastAccessTime = this.GetPropertySafe<DateTime>(fileIndex, ItemPropId.kpidLastAccessTime);
uint crc = this.GetPropertySafe<uint>(fileIndex, ItemPropId.kpidCRC);
uint attributes = this.GetPropertySafe<uint>(fileIndex, ItemPropId.kpidAttributes);
string comment = this.GetPropertySafe<string>(fileIndex, ItemPropId.kpidComment);
string hostOS = this.GetPropertySafe<string>(fileIndex, ItemPropId.kpidHostOS);
string method = this.GetPropertySafe<string>(fileIndex, ItemPropId.kpidMethod);
bool isSplitBefore = this.GetPropertySafe<bool>(fileIndex, ItemPropId.kpidSplitBefore);
bool isSplitAfter = this.GetPropertySafe<bool>(fileIndex, ItemPropId.kpidSplitAfter);
var entry = new Entry(this._archive, fileIndex)
{
FileName = fileName,
IsFolder = isFolder,
IsEncrypted = isEncrypted,
Size = size,
PackedSize = packedSize,
CreationTime = creationTime,
LastWriteTime = lastWriteTime,
LastAccessTime = lastAccessTime,
CRC = crc,
Attributes = attributes,
Comment = comment,
HostOS = hostOS,
Method = method,
IsSplitBefore = isSplitBefore,
IsSplitAfter = isSplitAfter
};
return entry;
}
private T GetPropertySafe<T>(uint fileIndex, ItemPropId name)
{
try

View File

@ -26,6 +26,7 @@ namespace Wabbajack.VirtualFileSystem.SevenZipExtractor
[FieldOffset(8)] public IntPtr pointerValue;
[FieldOffset(8)] public byte byteValue;
[FieldOffset(8)] public long longValue;
[FieldOffset(8)] public UInt32 UInt32Value;
[FieldOffset(8)] public System.Runtime.InteropServices.ComTypes.FILETIME filetime;
[FieldOffset(8)] public PropArray propArray;
@ -35,6 +36,10 @@ namespace Wabbajack.VirtualFileSystem.SevenZipExtractor
{
return (VarEnum) this.vt;
}
set
{
vt = (ushort)value;
}
}
public void Clear()
@ -304,6 +309,24 @@ namespace Wabbajack.VirtualFileSystem.SevenZipExtractor
kpidUserDefined = 0x10000
}
/// <summary>
/// 7-zip ISetProperties interface for setting various archive properties
/// </summary>
[ComImport]
[Guid("23170F69-40C1-278A-0000-000600030000")]
[InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
internal interface ISetProperties
{
/// <summary>
/// Sets the archive properties
/// </summary>
/// <param name="names">The names of the properties</param>
/// <param name="values">The values of the properties</param>
/// <param name="numProperties">The properties count</param>
/// <returns></returns>
int SetProperties(IntPtr names, IntPtr values, int numProperties);
}
[ComImport]
@ -395,7 +418,7 @@ namespace Wabbajack.VirtualFileSystem.SevenZipExtractor
ArchivePropId propID,
ref PropVariant value); // PROPVARIANT
internal class StreamWrapper : IDisposable
public class StreamWrapper : IDisposable
{
protected Stream BaseStream;
@ -420,7 +443,7 @@ namespace Wabbajack.VirtualFileSystem.SevenZipExtractor
}
}
internal class InStreamWrapper : StreamWrapper, ISequentialInStream, IInStream
public class InStreamWrapper : StreamWrapper, ISequentialInStream, IInStream
{
public InStreamWrapper(Stream baseStream) : base(baseStream)
{