Rework file extraction to combine the old and new methods

This commit is contained in:
Timothy Baldridge
2020-10-09 21:02:58 -06:00
parent e17b577e5a
commit e557e46556
24 changed files with 267 additions and 1343 deletions

View File

@ -0,0 +1,33 @@
using System;
using System.IO;
using System.Threading.Tasks;
using Wabbajack.Common;
namespace Wabbajack.VirtualFileSystem.ExtractedFiles
{
public class ExtractedMemoryFile : IExtractedFile
{
private IStreamFactory _factory;
public ExtractedMemoryFile(IStreamFactory factory)
{
_factory = factory;
}
public ValueTask<Stream> GetStream()
{
return _factory.GetStream();
}
public DateTime LastModifiedUtc => _factory.LastModifiedUtc;
public IPath Name => _factory.Name;
public async ValueTask Move(AbsolutePath newPath)
{
await using var stream = await _factory.GetStream();
await newPath.WriteAllAsync(stream);
}
public bool CanMove { get; set; } = true;
}
}

View File

@ -0,0 +1,26 @@
using System.Threading.Tasks;
using Wabbajack.Common;
namespace Wabbajack.VirtualFileSystem.ExtractedFiles
{
public class ExtractedNativeFile : NativeFileStreamFactory, IExtractedFile
{
public bool CanMove { get; set; } = true;
public ExtractedNativeFile(AbsolutePath file, IPath path) : base(file, path)
{
}
public ExtractedNativeFile(AbsolutePath file) : base(file)
{
}
public async ValueTask Move(AbsolutePath newPath)
{
if (CanMove)
await _file.MoveToAsync(newPath, overwrite: true);
else
await _file.CopyToAsync(newPath);
}
}
}

View File

@ -0,0 +1,19 @@
using System.IO;
using System.Threading.Tasks;
using Wabbajack.Common;
namespace Wabbajack.VirtualFileSystem.ExtractedFiles
{
public interface IExtractedFile : IStreamFactory
{
/// <summary>
/// Possibly destructive move operation. Should greatly optimize file copies when the file
/// exists on the same disk as the newPath. Performs a copy if a move is not possible.
/// </summary>
/// <param name="newPath">destination to move the entry to</param>
/// <returns></returns>
public ValueTask Move(AbsolutePath newPath);
public bool CanMove { get; set; }
}
}

View File

@ -1,15 +1,16 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reactive.Linq;
using System.Threading.Tasks;
using Compression.BSA;
using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
using OMODFramework;
using SharpCompress.Archives.SevenZip;
using SharpCompress.Readers;
using Wabbajack.Common;
using Wabbajack.Common.FileSignatures;
using Wabbajack.VirtualFileSystem.SevenZipExtractor;
using Wabbajack.Common.StatusFeed;
using Wabbajack.Common.StatusFeed.Errors;
using Wabbajack.VirtualFileSystem.ExtractedFiles;
using Utils = Wabbajack.Common.Utils;
namespace Wabbajack.VirtualFileSystem
@ -47,9 +48,14 @@ namespace Wabbajack.VirtualFileSystem
public static bool FavorPerfOverRAM { get; set; }
public static async Task<Dictionary<RelativePath, T>> GatheringExtract<T>(IStreamFactory sFn,
Predicate<RelativePath> shouldExtract, Func<RelativePath, IStreamFactory, ValueTask<T>> mapfn)
public static async Task<Dictionary<RelativePath, T>> GatheringExtract<T>(WorkQueue queue, IStreamFactory sFn,
Predicate<RelativePath> shouldExtract, Func<RelativePath, IExtractedFile, ValueTask<T>> mapfn,
AbsolutePath? tempFolder = null,
HashSet<RelativePath> onlyFiles = null)
{
if (tempFolder == null)
tempFolder = TempFolder.BaseFolder;
if (sFn is NativeFileStreamFactory)
{
Utils.Log($"Extracting {sFn.Name}");
@ -58,6 +64,8 @@ namespace Wabbajack.VirtualFileSystem
var sig = await ArchiveSigs.MatchesAsync(archive);
archive.Position = 0;
Dictionary<RelativePath, T> results = new Dictionary<RelativePath, T>();
switch (sig)
{
case Definitions.FileType.RAR_OLD:
@ -67,33 +75,42 @@ namespace Wabbajack.VirtualFileSystem
{
if (sFn.Name.FileName.Extension == OMODExtension)
{
return await GatheringExtractWithOMOD(archive, shouldExtract, mapfn);
results = await GatheringExtractWithOMOD(archive, shouldExtract, mapfn);
}
else
{
return await GatheringExtractWith7Zip<T>(sFn, (Definitions.FileType)sig, shouldExtract,
mapfn);
results = await GatheringExtractWith7Zip<T>(queue, sFn, (Definitions.FileType)sig, shouldExtract,
mapfn, tempFolder.Value, onlyFiles);
}
break;
}
case Definitions.FileType.BSA:
case Definitions.FileType.BA2:
return await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
results = await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
break;
case Definitions.FileType.TES3:
if (sFn.Name.FileName.Extension == BSAExtension)
return await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
results = await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
else
throw new Exception($"Invalid file format {sFn.Name}");
break;
default:
throw new Exception($"Invalid file format {sFn.Name}");
}
if (onlyFiles != null && onlyFiles.Count != results.Count)
{
throw new Exception(
$"Sanity check error extracting {sFn.Name} - {results.Count} results, expected {onlyFiles.Count}");
}
return results;
}
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithOMOD<T>(Stream archive, Predicate<RelativePath> shouldExtract, Func<RelativePath,IStreamFactory,ValueTask<T>> mapfn)
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithOMOD<T>(Stream archive, Predicate<RelativePath> shouldExtract, Func<RelativePath,IExtractedFile,ValueTask<T>> mapfn)
{
var tmpFile = new TempFile();
await tmpFile.Path.WriteAllAsync(archive);
@ -113,7 +130,7 @@ namespace Wabbajack.VirtualFileSystem
var path = file.RelativeTo(dest.Dir);
if (!shouldExtract(path)) continue;
var result = await mapfn(path, new NativeFileStreamFactory(file, path));
var result = await mapfn(path, new ExtractedNativeFile(file, path));
results.Add(path, result);
}
@ -141,7 +158,7 @@ namespace Wabbajack.VirtualFileSystem
}
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithBSA<T>(IStreamFactory sFn, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IStreamFactory,ValueTask<T>> mapfn)
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithBSA<T>(IStreamFactory sFn, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IExtractedFile,ValueTask<T>> mapfn)
{
var archive = await BSADispatch.OpenRead(sFn, sig);
var results = new Dictionary<RelativePath, T>();
@ -150,21 +167,123 @@ namespace Wabbajack.VirtualFileSystem
if (!shouldExtract(entry.Path))
continue;
var result = await mapfn(entry.Path, await entry.GetStreamFactory());
var result = await mapfn(entry.Path, new ExtractedMemoryFile(await entry.GetStreamFactory()));
results.Add(entry.Path, result);
}
return results;
}
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWith7Zip<T>(IStreamFactory sf, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IStreamFactory,ValueTask<T>> mapfn)
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWith7Zip<T>(WorkQueue queue, IStreamFactory sf, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IExtractedFile,ValueTask<T>> mapfn,
AbsolutePath tempPath, HashSet<RelativePath> onlyFiles)
{
return await new GatheringExtractor<T>(sf, sig, shouldExtract, mapfn).Extract();
TempFile tmpFile = null;
var dest = tempPath.Combine(Guid.NewGuid().ToString());
dest.CreateDirectory();
TempFile spoolFile = null;
AbsolutePath source;
try
{
if (sf.Name is AbsolutePath abs)
{
source = abs;
}
else
{
spoolFile = new TempFile(tempPath.Combine(Guid.NewGuid().ToString())
.WithExtension(source.Extension));
await using var s = await sf.GetStream();
await spoolFile.Path.WriteAllAsync(s);
}
Utils.Log(new GenericInfo($"Extracting {(string)source.FileName}",
$"The contents of {(string)source.FileName} are being extracted to {(string)source.FileName} using 7zip.exe"));
var process = new ProcessHelper {Path = @"Extractors\7z.exe".RelativeTo(AbsolutePath.EntryPoint),};
if (onlyFiles != null)
{
//It's stupid that we have to do this, but 7zip's file pattern matching isn't very fuzzy
IEnumerable<string> AllVariants(string input)
{
yield return $"\"{input}\"";
yield return $"\"\\{input}\"";
}
tmpFile = new TempFile();
await tmpFile.Path.WriteAllLinesAsync(onlyFiles.SelectMany(f => AllVariants((string)f)).ToArray());
process.Arguments = new object[]
{
"x", "-bsp1", "-y", $"-o\"{dest}\"", source, $"@\"{tmpFile.Path}\"", "-mmt=off"
};
}
else
{
process.Arguments = new object[] {"x", "-bsp1", "-y", $"-o\"{dest}\"", source, "-mmt=off"};
}
var result = process.Output.Where(d => d.Type == ProcessHelper.StreamType.Output)
.ForEachAsync(p =>
{
var (_, line) = p;
if (line == null)
return;
if (line.Length <= 4 || line[3] != '%') return;
int.TryParse(line.Substring(0, 3), out var percentInt);
Utils.Status($"Extracting {(string)source.FileName} - {line.Trim()}",
Percent.FactoryPutInRange(percentInt / 100d));
});
var exitCode = await process.Start();
if (exitCode != 0)
{
Utils.ErrorThrow(new _7zipReturnError(exitCode, source, dest, ""));
}
else
{
Utils.Status($"Extracting {source.FileName} - done", Percent.One, alsoLog: true);
}
var results = await dest.EnumerateFiles()
.PMap(queue, async f =>
{
var path = f.RelativeTo(dest);
if (!shouldExtract(path)) return ((RelativePath, T))default;
var file = new ExtractedNativeFile(f);
var result = await mapfn(path, file);
await f.DeleteAsync();
return (path, result);
});
return results.Where(d => d.Item1 != default)
.ToDictionary(d => d.Item1, d => d.Item2);
}
finally
{
await dest.DeleteDirectory();
if (tmpFile != null)
{
await tmpFile.DisposeAsync();
}
if (spoolFile != null)
{
await spoolFile.DisposeAsync();
}
}
}
public static async Task ExtractAll(AbsolutePath src, AbsolutePath dest)
public static async Task ExtractAll(WorkQueue queue, AbsolutePath src, AbsolutePath dest)
{
await GatheringExtract(new NativeFileStreamFactory(src), _ => true, async (path, factory) =>
await GatheringExtract(queue, new NativeFileStreamFactory(src), _ => true, async (path, factory) =>
{
var abs = path.RelativeTo(dest);
abs.Parent.CreateDirectory();

View File

@ -1,329 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reactive.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using Compression.BSA;
using Wabbajack.Common;
using Wabbajack.Common.FileSignatures;
using Wabbajack.Common.StatusFeed.Errors;
using Wabbajack.VirtualFileSystem.SevenZipExtractor;
namespace Wabbajack.VirtualFileSystem
{
public class GatheringExtractor<T> : IArchiveExtractCallback
{
private ArchiveFile _archive;
private Predicate<RelativePath> _shouldExtract;
private Func<RelativePath, IStreamFactory, ValueTask<T>> _mapFn;
private Dictionary<RelativePath, T> _results;
private Definitions.FileType _sig;
private Exception _killException;
private uint _itemsCount;
private IStreamFactory _streamFactory;
public GatheringExtractor(IStreamFactory sF, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IStreamFactory, ValueTask<T>> mapfn)
{
_shouldExtract = shouldExtract;
_mapFn = mapfn;
_results = new Dictionary<RelativePath, T>();
_streamFactory = sF;
_sig = sig;
}
public async Task<Dictionary<RelativePath, T>> Extract()
{
var source = new TaskCompletionSource<bool>();
var th = new Thread(() =>
{
try
{
using var stream = _streamFactory.GetStream().Result;
_archive = ArchiveFile.Open(stream, _sig).Result;
ulong checkPos = (ulong)stream.Length;
var oresult = _archive._archive.Open(_archive._archiveStream, ref checkPos, new ArchiveCallback());
// Can't read this with the COM interface for some reason
if (oresult != 0)
{
var _ = ExtractSlow(source, _streamFactory);
return;
}
_itemsCount = _archive._archive.GetNumberOfItems();
var result = _archive._archive.Extract(null, 0xFFFFFFFF, 0, this);
_archive.Dispose();
if (_killException != null)
{
source.SetException(_killException);
}
else
{
source.SetResult(true);
}
}
catch (Exception ex)
{
source.SetException(ex);
}
}) {Priority = ThreadPriority.BelowNormal, Name = "7Zip Extraction Worker Thread"};
th.Start();
await source.Task;
return _results;
}
private async Task ExtractSlow(TaskCompletionSource<bool> tcs, IStreamFactory streamFactory)
{
try
{
TempFile tempFile = null;
AbsolutePath source;
if (streamFactory is NativeFileStreamFactory nsf)
{
source = (AbsolutePath)nsf.Name;
}
else
{
await using var stream = await streamFactory.GetStream();
tempFile = new TempFile();
await tempFile.Path.WriteAllAsync(stream);
}
var dest = await TempFolder.Create();
Utils.Log(
$"The contents of {(string)source.FileName} are being extracted to {(string)source.FileName} using 7zip.exe");
var process = new ProcessHelper {Path = @"Extractors\7z.exe".RelativeTo(AbsolutePath.EntryPoint),};
process.Arguments = new object[] {"x", "-bsp1", "-y", $"-o\"{dest.Dir}\"", source, "-mmt=off"};
var _ = process.Output.Where(d => d.Type == ProcessHelper.StreamType.Output)
.ForEachAsync(p =>
{
var (_, line) = p;
if (line == null)
return;
if (line.Length <= 4 || line[3] != '%') return;
int.TryParse(line.Substring(0, 3), out var percentInt);
Utils.Status($"Extracting {(string)source.FileName} - {line.Trim()}",
Percent.FactoryPutInRange(percentInt / 100d));
});
var exitCode = await process.Start();
if (exitCode != 0)
{
Utils.ErrorThrow(new _7zipReturnError(exitCode, source, dest.Dir, ""));
}
else
{
Utils.Status($"Extracting {source.FileName} - done", Percent.One, alsoLog: true);
}
if (tempFile != null)
{
await tempFile.DisposeAsync();
}
foreach (var file in dest.Dir.EnumerateFiles())
{
var relPath = file.RelativeTo(dest.Dir);
if (!_shouldExtract(relPath)) continue;
var result = await _mapFn(relPath, new NativeFileStreamFactory(file));
_results[relPath] = result;
await file.DeleteAsync();
}
tcs.SetResult(true);
}
catch (Exception ex)
{
tcs.SetException(ex);
}
}
public void SetTotal(ulong total)
{
}
public void SetCompleted(ref ulong completeValue)
{
}
public int GetStream(uint index, out ISequentialOutStream outStream, AskMode askExtractMode)
{
var entry = _archive.GetEntry(index);
var path = (RelativePath)entry.FileName;
if (entry.IsFolder || !_shouldExtract(path))
{
outStream = null;
return 0;
}
Utils.Status($"Extracting {path}", Percent.FactoryPutInRange(_results.Count, _itemsCount));
// Empty files are never extracted via a write call, so we have to fake that now
if (entry.Size == 0)
{
var result = _mapFn(path, new MemoryStreamFactory(new MemoryStream(), path)).Result;
_results.Add(path, result);
}
outStream = new GatheringExtractorStream<T>(this, entry, path);
return 0;
}
public void PrepareOperation(AskMode askExtractMode)
{
}
public void SetOperationResult(OperationResult resultEOperationResult)
{
}
private class GatheringExtractorStream<T> : ISequentialOutStream, IOutStream
{
private GatheringExtractor<T> _extractor;
private ulong _totalSize;
private Stream _tmpStream;
private TempFile _tmpFile;
private bool _diskCached;
private RelativePath _path;
public GatheringExtractorStream(GatheringExtractor<T> extractor, Entry entry, RelativePath path)
{
_path = path;
_extractor = extractor;
_totalSize = entry.Size;
_diskCached = _totalSize >= int.MaxValue - 1024;
}
private IPath GetPath()
{
return _path;
}
public int Write(byte[] data, uint size, IntPtr processedSize)
{
try
{
if (size == _totalSize)
WriteSingleCall(data, size);
else if (_diskCached)
WriteDiskCached(data, size);
else
WriteMemoryCached(data, size);
if (processedSize != IntPtr.Zero)
{
Marshal.WriteInt32(processedSize, (int)size);
}
return 0;
}
catch (Exception ex)
{
Utils.Log($"Error during extraction {ex}");
_extractor.Kill(ex);
return 1;
}
}
private void WriteSingleCall(byte[] data, in uint size)
{
var result = _extractor._mapFn(_path, new MemoryBufferFactory(data, (int)size, GetPath())).Result;
AddResult(result);
Cleanup();
}
private void Cleanup()
{
_tmpStream?.Dispose();
_tmpFile?.DisposeAsync().AsTask().Wait();
}
private void AddResult(T result)
{
_extractor._results.Add(_path, result);
}
private void WriteMemoryCached(byte[] data, in uint size)
{
if (_tmpStream == null)
_tmpStream = new MemoryStream();
_tmpStream.Write(data, 0, (int)size);
if (_tmpStream.Length != (long)_totalSize) return;
_tmpStream.Flush();
_tmpStream.Position = 0;
var result = _extractor._mapFn(_path, new MemoryStreamFactory((MemoryStream)_tmpStream, GetPath())).Result;
AddResult(result);
Cleanup();
}
private void WriteDiskCached(byte[] data, in uint size)
{
if (_tmpFile == null)
{
_tmpFile = new TempFile();
_tmpStream = _tmpFile.Path.Create().Result;
}
_tmpStream.Write(data, 0, (int)size);
if (_tmpStream.Length != (long)_totalSize) return;
_tmpStream.Flush();
_tmpStream.Close();
var result = _extractor._mapFn(_path, new NativeFileStreamFactory(_tmpFile.Path, GetPath())).Result;
AddResult(result);
Cleanup();
}
public void Seek(long offset, uint seekOrigin, IntPtr newPosition)
{
}
public int SetSize(long newSize)
{
return 0;
}
}
private void Kill(Exception ex)
{
_killException = ex;
}
class ArchiveCallback : IArchiveOpenCallback
{
public void SetTotal(IntPtr files, IntPtr bytes)
{
}
public void SetCompleted(IntPtr files, IntPtr bytes)
{
}
}
}
}