wabbajack/Wabbajack.VirtualFileSystem/FileExtractor2/FileExtractor.cs

285 lines
11 KiB
C#
Raw Normal View History

2020-09-04 21:00:37 +00:00
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reactive.Linq;
2020-09-04 21:00:37 +00:00
using System.Threading.Tasks;
using Compression.BSA;
2020-09-08 22:15:33 +00:00
using OMODFramework;
2020-09-04 21:00:37 +00:00
using Wabbajack.Common;
using Wabbajack.Common.FileSignatures;
using Wabbajack.Common.StatusFeed;
using Wabbajack.Common.StatusFeed.Errors;
using Wabbajack.VirtualFileSystem.ExtractedFiles;
2020-09-08 22:15:33 +00:00
using Utils = Wabbajack.Common.Utils;
2020-09-04 21:00:37 +00:00
namespace Wabbajack.VirtualFileSystem
{
public static class FileExtractor2
{
public static readonly SignatureChecker ArchiveSigs = new(Definitions.FileType.TES3,
2020-09-04 21:00:37 +00:00
Definitions.FileType.BSA,
Definitions.FileType.BA2,
Definitions.FileType.ZIP,
//Definitions.FileType.EXE,
2020-09-06 03:19:05 +00:00
Definitions.FileType.RAR_OLD,
Definitions.FileType.RAR_NEW,
2020-09-04 21:00:37 +00:00
Definitions.FileType._7Z);
2020-09-18 03:27:59 +00:00
2021-01-29 13:14:19 +00:00
private static Extension OMODExtension = new(".omod");
private static Extension FOMODExtension = new(".fomod");
2021-01-29 13:14:19 +00:00
private static Extension BSAExtension = new(".bsa");
2020-09-18 03:27:59 +00:00
public static readonly HashSet<Extension> ExtractableExtensions = new HashSet<Extension>
{
2021-01-29 13:14:19 +00:00
new(".bsa"),
new(".ba2"),
new(".7z"),
new(".7zip"),
new(".rar"),
new(".zip"),
2020-10-13 04:22:40 +00:00
OMODExtension,
FOMODExtension
2020-09-18 03:27:59 +00:00
};
2020-09-12 20:23:03 +00:00
/// <summary>
/// When true, will allow 7z to use multiple threads and cache more data in memory, potentially
/// using many GB of RAM during extraction but vastly reducing extraction times in the process.
/// </summary>
public static bool FavorPerfOverRAM { get; set; }
2020-09-04 21:00:37 +00:00
public static async Task<Dictionary<RelativePath, T>> GatheringExtract<T>(WorkQueue queue, IStreamFactory sFn,
Predicate<RelativePath> shouldExtract, Func<RelativePath, IExtractedFile, ValueTask<T>> mapfn,
AbsolutePath? tempFolder = null,
HashSet<RelativePath> onlyFiles = null)
2020-09-04 21:00:37 +00:00
{
if (tempFolder == null)
tempFolder = TempFolder.BaseFolder;
if (sFn is NativeFileStreamFactory)
{
Utils.Log($"Extracting {sFn.Name}");
}
2020-09-04 21:00:37 +00:00
await using var archive = await sFn.GetStream();
var sig = await ArchiveSigs.MatchesAsync(archive);
archive.Position = 0;
Dictionary<RelativePath, T> results = new Dictionary<RelativePath, T>();
2020-09-04 21:00:37 +00:00
switch (sig)
{
2020-09-06 03:19:05 +00:00
case Definitions.FileType.RAR_OLD:
case Definitions.FileType.RAR_NEW:
2020-09-08 22:15:33 +00:00
case Definitions.FileType._7Z:
2020-09-04 21:00:37 +00:00
case Definitions.FileType.ZIP:
2020-09-08 22:15:33 +00:00
{
if (sFn.Name.FileName.Extension == OMODExtension)
{
results = await GatheringExtractWithOMOD(archive, shouldExtract, mapfn);
2020-09-08 22:15:33 +00:00
}
else
{
results = await GatheringExtractWith7Zip<T>(queue, sFn, (Definitions.FileType)sig, shouldExtract,
mapfn, tempFolder.Value, onlyFiles);
2020-09-08 22:15:33 +00:00
}
break;
2020-09-08 22:15:33 +00:00
}
case Definitions.FileType.BSA:
case Definitions.FileType.BA2:
results = await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
break;
2020-09-18 03:27:59 +00:00
case Definitions.FileType.TES3:
if (sFn.Name.FileName.Extension == BSAExtension)
results = await GatheringExtractWithBSA(sFn, (Definitions.FileType)sig, shouldExtract, mapfn);
2020-09-18 03:27:59 +00:00
else
throw new Exception($"Invalid file format {sFn.Name}");
break;
2020-09-04 21:00:37 +00:00
default:
throw new Exception($"Invalid file format {sFn.Name}");
2020-09-04 21:00:37 +00:00
}
if (onlyFiles != null && onlyFiles.Count != results.Count)
{
throw new Exception(
$"Sanity check error extracting {sFn.Name} - {results.Count} results, expected {onlyFiles.Count}");
}
return results;
2020-09-04 21:00:37 +00:00
}
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithOMOD<T>(Stream archive, Predicate<RelativePath> shouldExtract, Func<RelativePath,IExtractedFile,ValueTask<T>> mapfn)
2020-09-08 22:15:33 +00:00
{
var tmpFile = new TempFile();
await tmpFile.Path.WriteAllAsync(archive);
var dest = await TempFolder.Create();
Utils.Log($"Extracting {(string)tmpFile.Path}");
2021-03-26 11:11:43 +00:00
using var omod = new OMOD((string) tmpFile.Path);
2020-09-08 22:15:33 +00:00
var results = new Dictionary<RelativePath, T>();
2021-03-26 11:11:43 +00:00
omod.ExtractFilesParallel((string) dest.Dir, 4);
if (omod.HasEntryFile(OMODEntryFileType.PluginsCRC))
omod.ExtractFiles(false, (string) dest.Dir);
var files = omod.GetDataFiles();
if (omod.HasEntryFile(OMODEntryFileType.PluginsCRC))
files.UnionWith(omod.GetPluginFiles());
foreach (var compressedFile in files)
2020-09-08 22:15:33 +00:00
{
2021-03-26 11:11:43 +00:00
var abs = compressedFile.Name.RelativeTo(dest.Dir);
var rel = abs.RelativeTo(dest.Dir);
if (!shouldExtract(rel)) continue;
2020-09-08 22:15:33 +00:00
2021-03-26 11:11:43 +00:00
var result = await mapfn(rel, new ExtractedNativeFile(abs));
results.Add(rel, result);
2020-09-08 22:15:33 +00:00
}
2021-03-26 11:11:43 +00:00
2020-09-08 22:15:33 +00:00
return results;
}
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWithBSA<T>(IStreamFactory sFn, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IExtractedFile,ValueTask<T>> mapfn)
{
var archive = await BSADispatch.OpenRead(sFn, sig);
var results = new Dictionary<RelativePath, T>();
foreach (var entry in archive.Files)
{
if (!shouldExtract(entry.Path))
continue;
var result = await mapfn(entry.Path, new ExtractedMemoryFile(await entry.GetStreamFactory()));
results.Add(entry.Path, result);
}
return results;
}
private static async Task<Dictionary<RelativePath,T>> GatheringExtractWith7Zip<T>(WorkQueue queue, IStreamFactory sf, Definitions.FileType sig, Predicate<RelativePath> shouldExtract, Func<RelativePath,IExtractedFile,ValueTask<T>> mapfn,
AbsolutePath tempPath, HashSet<RelativePath> onlyFiles)
2020-09-04 21:00:37 +00:00
{
TempFile tmpFile = null;
var dest = tempPath.Combine(Guid.NewGuid().ToString());
dest.CreateDirectory();
TempFile spoolFile = null;
AbsolutePath source;
try
{
if (sf.Name is AbsolutePath abs)
{
source = abs;
}
else
{
spoolFile = new TempFile(tempPath.Combine(Guid.NewGuid().ToString())
2021-01-29 04:02:26 +00:00
.WithExtension(sf.Name.FileName.Extension));
await using var s = await sf.GetStream();
await spoolFile.Path.WriteAllAsync(s);
source = spoolFile.Path;
}
Utils.Log(new GenericInfo($"Extracting {(string)source.FileName}",
$"The contents of {(string)source.FileName} are being extracted to {(string)source.FileName} using 7zip.exe"));
var process = new ProcessHelper {Path = @"Extractors\7z.exe".RelativeTo(AbsolutePath.EntryPoint),};
if (onlyFiles != null)
{
//It's stupid that we have to do this, but 7zip's file pattern matching isn't very fuzzy
IEnumerable<string> AllVariants(string input)
{
yield return $"\"{input}\"";
yield return $"\"\\{input}\"";
}
tmpFile = new TempFile();
await tmpFile.Path.WriteAllLinesAsync(onlyFiles.SelectMany(f => AllVariants((string)f)).ToArray());
process.Arguments = new object[]
{
"x", "-bsp1", "-y", $"-o\"{dest}\"", source, $"@\"{tmpFile.Path}\"", "-mmt=off"
};
}
else
{
process.Arguments = new object[] {"x", "-bsp1", "-y", $"-o\"{dest}\"", source, "-mmt=off"};
}
var result = process.Output.Where(d => d.Type == ProcessHelper.StreamType.Output)
.ForEachAsync(p =>
{
var (_, line) = p;
if (line == null)
return;
if (line.Length <= 4 || line[3] != '%') return;
int.TryParse(line.Substring(0, 3), out var percentInt);
Utils.Status($"Extracting {(string)source.FileName} - {line.Trim()}",
Percent.FactoryPutInRange(percentInt / 100d));
});
var exitCode = await process.Start();
if (exitCode != 0)
{
Utils.ErrorThrow(new _7zipReturnError(exitCode, source, dest, ""));
}
else
{
Utils.Status($"Extracting {source.FileName} - done", Percent.One, alsoLog: true);
}
var results = await dest.EnumerateFiles()
.PMap(queue, async f =>
{
var path = f.RelativeTo(dest);
if (!shouldExtract(path)) return ((RelativePath, T))default;
var file = new ExtractedNativeFile(f);
var result = await mapfn(path, file);
await f.DeleteAsync();
return (path, result);
});
return results.Where(d => d.Item1 != default)
.ToDictionary(d => d.Item1, d => d.Item2);
}
finally
{
await dest.DeleteDirectory();
if (tmpFile != null)
{
await tmpFile.DisposeAsync();
}
if (spoolFile != null)
{
await spoolFile.DisposeAsync();
}
}
2020-09-06 03:19:05 +00:00
}
public static async Task ExtractAll(WorkQueue queue, AbsolutePath src, AbsolutePath dest)
2020-09-06 03:19:05 +00:00
{
await GatheringExtract(queue, new NativeFileStreamFactory(src), _ => true, async (path, factory) =>
2020-09-06 03:19:05 +00:00
{
var abs = path.RelativeTo(dest);
abs.Parent.CreateDirectory();
await using var stream = await factory.GetStream();
await abs.WriteAllAsync(stream);
return 0;
});
2020-09-04 21:00:37 +00:00
}
}
}