Rework file extraction to combine the old and new methods

This commit is contained in:
Timothy Baldridge
2020-10-09 21:02:58 -06:00
parent e17b577e5a
commit e557e46556
24 changed files with 267 additions and 1343 deletions

View File

@ -1,15 +1,16 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reactive.Linq;
using System.Threading.Tasks;
using Compression.BSA;
using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
using OMODFramework;
using SharpCompress.Archives.SevenZip;
using SharpCompress.Readers;
using Wabbajack.Common;
using Wabbajack.Common.FileSignatures;
using Wabbajack.VirtualFileSystem.SevenZipExtractor;
using Wabbajack.Common.StatusFeed;
using Wabbajack.Common.StatusFeed.Errors;
using Wabbajack.VirtualFileSystem.ExtractedFiles;
using Utils = Wabbajack.Common.Utils;
namespace Wabbajack.VirtualFileSystem
@ -47,9 +48,14 @@ namespace Wabbajack.VirtualFileSystem
public static bool FavorPerfOverRAM { get; set; }
public static async Task<Dictionary<RelativePath, T>> GatheringExtract<T>(IStreamFactory sFn,
Predicate<RelativePath> shouldExtract, Func<RelativePath, IStreamFactory, ValueTask<T>> mapfn)
public static async Task<Dictionary<RelativePath, T>> GatheringExtract<T>(WorkQueue queue, IStreamFactory sFn,
Predicate<RelativePath> shouldExtract, Func<RelativePath, IExtractedFile, ValueTask<T>> mapfn,
AbsolutePath? tempFolder = null,
HashSet<RelativePath> onlyFiles = null)
{
if (tempFolder == null)
tempFolder = TempFolder.BaseFolder;
if (sFn is NativeFileStreamFactory)
{
Utils.Log($"Extracting {sFn.Name}");
@ -58,6 +64,8 @@ namespace Wabbajack.VirtualFileSystem
var sig = await ArchiveSigs.MatchesAsync(archive);
archive.Position = 0;
Dictionary<RelativePath, T> results = new Dictionary<RelativePath, T>();
switch (sig)
{
case Definitions.FileType.RAR_OLD:
@ -67,33 +75,42 @@ namespace Wabbajack.VirtualFileSystem
{
if (sFn.Name.FileName.Extension == OMODExtension)
{
return await GatheringExtractWithOMOD(archive, shouldExtract, mapfn);
results = await GatheringExtractWithOMOD(archive, shouldExtract, mapfn);
}
else
{
return await GatheringExtractWith7Zip<T>(sFn, (Definitions.FileType)sig, shouldExtract,
mapfn);
results = await GatheringExtractWith7Zip<T>(queue, sFn, (Definitions.FileType)sig, shouldExtract,
mapfn, tempFolder.Value, onlyFiles);
}
break;
}
case Definitions.FileType.BSA:
case Definitions.FileType.BA2:
return await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
results = await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
break;
case Definitions.FileType.TES3:
if (sFn.Name.FileName.Extension == BSAExtension)
return await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
results = await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
else
throw new Exception($"Invalid file format {sFn.Name}");
break;
default:
throw new Exception($"Invalid file format {sFn.Name}");
}
if (onlyFiles != null && onlyFiles.Count != results.Count)
{
throw new Exception(
$"Sanity check error extracting {sFn.Name} - {results.Count} results, expected {onlyFiles.Count}");
}
return results;
}
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithOMOD<T>(Stream archive, Predicate<RelativePath> shouldExtract, Func<RelativePath,IStreamFactory,ValueTask<T>> mapfn)
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithOMOD<T>(Stream archive, Predicate<RelativePath> shouldExtract, Func<RelativePath,IExtractedFile,ValueTask<T>> mapfn)
{
var tmpFile = new TempFile();
await tmpFile.Path.WriteAllAsync(archive);
@ -113,7 +130,7 @@ namespace Wabbajack.VirtualFileSystem
var path = file.RelativeTo(dest.Dir);
if (!shouldExtract(path)) continue;
var result = await mapfn(path, new NativeFileStreamFactory(file, path));
var result = await mapfn(path, new ExtractedNativeFile(file, path));
results.Add(path, result);
}
@ -141,7 +158,7 @@ namespace Wabbajack.VirtualFileSystem
}
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithBSA<T>(IStreamFactory sFn, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IStreamFactory,ValueTask<T>> mapfn)
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithBSA<T>(IStreamFactory sFn, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IExtractedFile,ValueTask<T>> mapfn)
{
var archive = await BSADispatch.OpenRead(sFn, sig);
var results = new Dictionary<RelativePath, T>();
@ -150,21 +167,123 @@ namespace Wabbajack.VirtualFileSystem
if (!shouldExtract(entry.Path))
continue;
var result = await mapfn(entry.Path, await entry.GetStreamFactory());
var result = await mapfn(entry.Path, new ExtractedMemoryFile(await entry.GetStreamFactory()));
results.Add(entry.Path, result);
}
return results;
}
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWith7Zip<T>(IStreamFactory sf, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IStreamFactory,ValueTask<T>> mapfn)
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWith7Zip<T>(WorkQueue queue, IStreamFactory sf, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IExtractedFile,ValueTask<T>> mapfn,
AbsolutePath tempPath, HashSet<RelativePath> onlyFiles)
{
return await new GatheringExtractor<T>(sf, sig, shouldExtract, mapfn).Extract();
TempFile tmpFile = null;
var dest = tempPath.Combine(Guid.NewGuid().ToString());
dest.CreateDirectory();
TempFile spoolFile = null;
AbsolutePath source;
try
{
if (sf.Name is AbsolutePath abs)
{
source = abs;
}
else
{
spoolFile = new TempFile(tempPath.Combine(Guid.NewGuid().ToString())
.WithExtension(source.Extension));
await using var s = await sf.GetStream();
await spoolFile.Path.WriteAllAsync(s);
}
Utils.Log(new GenericInfo($"Extracting {(string)source.FileName}",
$"The contents of {(string)source.FileName} are being extracted to {(string)source.FileName} using 7zip.exe"));
var process = new ProcessHelper {Path = @"Extractors\7z.exe".RelativeTo(AbsolutePath.EntryPoint),};
if (onlyFiles != null)
{
//It's stupid that we have to do this, but 7zip's file pattern matching isn't very fuzzy
IEnumerable<string> AllVariants(string input)
{
yield return $"\"{input}\"";
yield return $"\"\\{input}\"";
}
tmpFile = new TempFile();
await tmpFile.Path.WriteAllLinesAsync(onlyFiles.SelectMany(f => AllVariants((string)f)).ToArray());
process.Arguments = new object[]
{
"x", "-bsp1", "-y", $"-o\"{dest}\"", source, $"@\"{tmpFile.Path}\"", "-mmt=off"
};
}
else
{
process.Arguments = new object[] {"x", "-bsp1", "-y", $"-o\"{dest}\"", source, "-mmt=off"};
}
var result = process.Output.Where(d => d.Type == ProcessHelper.StreamType.Output)
.ForEachAsync(p =>
{
var (_, line) = p;
if (line == null)
return;
if (line.Length <= 4 || line[3] != '%') return;
int.TryParse(line.Substring(0, 3), out var percentInt);
Utils.Status($"Extracting {(string)source.FileName} - {line.Trim()}",
Percent.FactoryPutInRange(percentInt / 100d));
});
var exitCode = await process.Start();
if (exitCode != 0)
{
Utils.ErrorThrow(new _7zipReturnError(exitCode, source, dest, ""));
}
else
{
Utils.Status($"Extracting {source.FileName} - done", Percent.One, alsoLog: true);
}
var results = await dest.EnumerateFiles()
.PMap(queue, async f =>
{
var path = f.RelativeTo(dest);
if (!shouldExtract(path)) return ((RelativePath, T))default;
var file = new ExtractedNativeFile(f);
var result = await mapfn(path, file);
await f.DeleteAsync();
return (path, result);
});
return results.Where(d => d.Item1 != default)
.ToDictionary(d => d.Item1, d => d.Item2);
}
finally
{
await dest.DeleteDirectory();
if (tmpFile != null)
{
await tmpFile.DisposeAsync();
}
if (spoolFile != null)
{
await spoolFile.DisposeAsync();
}
}
}
public static async Task ExtractAll(AbsolutePath src, AbsolutePath dest)
public static async Task ExtractAll(WorkQueue queue, AbsolutePath src, AbsolutePath dest)
{
await GatheringExtract(new NativeFileStreamFactory(src), _ => true, async (path, factory) =>
await GatheringExtract(queue, new NativeFileStreamFactory(src), _ => true, async (path, factory) =>
{
var abs = path.RelativeTo(dest);
abs.Parent.CreateDirectory();