wabbajack/Wabbajack.FileExtractor/FileExtractor.cs

326 lines
12 KiB
C#
Raw Normal View History

2021-09-27 12:42:46 +00:00
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reactive.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using OMODFramework;
using Wabbajack.Common;
using Wabbajack.Common.FileSignatures;
using Wabbajack.Compression.BSA;
using Wabbajack.DTOs.Streams;
using Wabbajack.FileExtractor.ExtractedFiles;
using Wabbajack.Paths;
using Wabbajack.Paths.IO;
using Wabbajack.RateLimiter;
2021-10-23 16:51:17 +00:00
namespace Wabbajack.FileExtractor;
public class FileExtractor
2021-09-27 12:42:46 +00:00
{
2021-10-23 16:51:17 +00:00
public static readonly SignatureChecker ArchiveSigs = new(FileType.TES3,
FileType.BSA,
FileType.BA2,
FileType.ZIP,
//FileType.EXE,
FileType.RAR_OLD,
FileType.RAR_NEW,
FileType._7Z);
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
private static readonly Extension OMODExtension = new(".omod");
private static readonly Extension FOMODExtension = new(".fomod");
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
private static readonly Extension BSAExtension = new(".bsa");
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
public static readonly HashSet<Extension> ExtractableExtensions = new()
{
new Extension(".bsa"),
new Extension(".ba2"),
new Extension(".7z"),
new Extension(".7zip"),
new Extension(".rar"),
new Extension(".zip"),
OMODExtension,
FOMODExtension
};
private readonly IResource<FileExtractor> _limiter;
private readonly ILogger<FileExtractor> _logger;
private readonly TemporaryFileManager _manager;
private readonly ParallelOptions _parallelOptions;
public FileExtractor(ILogger<FileExtractor> logger, ParallelOptions parallelOptions, TemporaryFileManager manager,
IResource<FileExtractor> limiter)
{
_logger = logger;
_parallelOptions = parallelOptions;
_manager = manager;
_limiter = limiter;
}
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
public async Task<IDictionary<RelativePath, T>> GatheringExtract<T>(
IStreamFactory sFn,
Predicate<RelativePath> shouldExtract,
Func<RelativePath, IExtractedFile, ValueTask<T>> mapfn,
CancellationToken token,
2021-11-02 13:40:59 +00:00
HashSet<RelativePath>? onlyFiles = null,
Action<Percent>? progressFunction = null)
2021-10-23 16:51:17 +00:00
{
if (sFn is NativeFileStreamFactory) _logger.LogInformation("Extracting {file}", sFn.Name);
await using var archive = await sFn.GetStream();
var sig = await ArchiveSigs.MatchesAsync(archive);
archive.Position = 0;
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
IDictionary<RelativePath, T> results;
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
switch (sig)
{
case FileType.RAR_OLD:
case FileType.RAR_NEW:
case FileType._7Z:
case FileType.ZIP:
2021-09-27 12:42:46 +00:00
{
2021-10-23 16:51:17 +00:00
if (sFn.Name.FileName.Extension == OMODExtension)
{
results = await GatheringExtractWithOMOD(archive, shouldExtract, mapfn, token);
}
else
2021-09-27 12:42:46 +00:00
{
2021-10-23 16:51:17 +00:00
await using var tempFolder = _manager.CreateFolder();
results = await GatheringExtractWith7Zip(sFn, shouldExtract,
2021-11-02 13:40:59 +00:00
mapfn, onlyFiles, token, progressFunction);
2021-09-27 12:42:46 +00:00
}
2021-10-23 16:51:17 +00:00
break;
2021-09-27 12:42:46 +00:00
}
2021-10-23 16:51:17 +00:00
case FileType.BSA:
case FileType.BA2:
results = await GatheringExtractWithBSA(sFn, (FileType) sig, shouldExtract, mapfn, token);
break;
case FileType.TES3:
if (sFn.Name.FileName.Extension == BSAExtension)
results = await GatheringExtractWithBSA(sFn, (FileType) sig, shouldExtract, mapfn, token);
else
throw new Exception($"Invalid file format {sFn.Name}");
break;
default:
throw new Exception($"Invalid file format {sFn.Name}");
2021-09-27 12:42:46 +00:00
}
2021-10-23 16:51:17 +00:00
if (onlyFiles != null && onlyFiles.Count != results.Count)
throw new Exception(
$"Sanity check error extracting {sFn.Name} - {results.Count} results, expected {onlyFiles.Count}");
return results;
}
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
private async Task<Dictionary<RelativePath, T>> GatheringExtractWithOMOD<T>
(Stream archive, Predicate<RelativePath> shouldExtract, Func<RelativePath, IExtractedFile, ValueTask<T>> mapfn,
CancellationToken token)
{
var tmpFile = _manager.CreateFile();
await tmpFile.Path.WriteAllAsync(archive, CancellationToken.None);
var dest = _manager.CreateFolder();
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
using var omod = new OMOD(tmpFile.Path.ToString());
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var results = new Dictionary<RelativePath, T>();
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
omod.ExtractFilesParallel(dest.Path.ToString(), 4, cancellationToken: token);
if (omod.HasEntryFile(OMODEntryFileType.PluginsCRC))
omod.ExtractFiles(false, dest.Path.ToString());
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var files = omod.GetDataFiles();
if (omod.HasEntryFile(OMODEntryFileType.PluginsCRC))
files.UnionWith(omod.GetPluginFiles());
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
foreach (var compressedFile in files)
{
var abs = compressedFile.Name.ToRelativePath().RelativeTo(dest.Path);
var rel = abs.RelativeTo(dest.Path);
if (!shouldExtract(rel)) continue;
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var result = await mapfn(rel, new ExtractedNativeFile(abs));
results.Add(rel, result);
2021-09-27 12:42:46 +00:00
}
2021-10-23 16:51:17 +00:00
return results;
}
2021-09-27 12:42:46 +00:00
2021-10-23 18:36:35 +00:00
public async Task<Dictionary<RelativePath, T>> GatheringExtractWithBSA<T>(IStreamFactory sFn,
2021-10-23 16:51:17 +00:00
FileType sig,
Predicate<RelativePath> shouldExtract,
Func<RelativePath, IExtractedFile, ValueTask<T>> mapFn,
CancellationToken token)
{
var archive = await BSADispatch.Open(sFn, sig);
var results = new Dictionary<RelativePath, T>();
foreach (var entry in archive.Files)
{
if (token.IsCancellationRequested) break;
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
if (!shouldExtract(entry.Path))
continue;
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var result = await mapFn(entry.Path, new ExtractedMemoryFile(await entry.GetStreamFactory(token)));
results.Add(entry.Path, result);
2021-09-27 12:42:46 +00:00
}
2021-10-23 18:36:35 +00:00
_logger.LogInformation("Finished extracting {Name}", sFn.Name);
2021-10-23 16:51:17 +00:00
return results;
}
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
public async Task<IDictionary<RelativePath, T>> GatheringExtractWith7Zip<T>(IStreamFactory sf,
Predicate<RelativePath> shouldExtract,
Func<RelativePath, IExtractedFile, ValueTask<T>> mapfn,
IReadOnlyCollection<RelativePath>? onlyFiles,
2021-11-02 13:40:59 +00:00
CancellationToken token,
Action<Percent>? progressFunction = null)
2021-10-23 16:51:17 +00:00
{
TemporaryPath? tmpFile = null;
await using var dest = _manager.CreateFolder();
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
TemporaryPath? spoolFile = null;
AbsolutePath source;
2021-10-23 18:36:35 +00:00
var job = await _limiter.Begin($"Extracting {sf.Name}", 0, token);
2021-10-23 16:51:17 +00:00
try
{
if (sf.Name is AbsolutePath abs)
2021-09-27 12:42:46 +00:00
{
2021-10-23 16:51:17 +00:00
source = abs;
}
else
{
spoolFile = _manager.CreateFile(sf.Name.FileName.Extension);
await using var s = await sf.GetStream();
await spoolFile.Value.Path.WriteAllAsync(s, token);
source = spoolFile.Value.Path;
}
2021-09-27 12:42:46 +00:00
2021-10-23 18:36:35 +00:00
_logger.LogInformation("Extracting {Source}", source.FileName);
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var initialPath = "";
if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
initialPath = @"Extractors\windows-x64\7z.exe";
else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
initialPath = @"Extractors\linux-x64\7zz";
else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
initialPath = @"Extractors\mac\7zz";
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var process = new ProcessHelper
{Path = initialPath.ToRelativePath().RelativeTo(KnownFolders.EntryPoint)};
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
if (onlyFiles != null)
{
//It's stupid that we have to do this, but 7zip's file pattern matching isn't very fuzzy
IEnumerable<string> AllVariants(string input)
2021-09-27 12:42:46 +00:00
{
2021-10-23 16:51:17 +00:00
var forward = input.Replace("\\", "/");
yield return $"\"{input}\"";
yield return $"\"\\{input}\"";
yield return $"\"{forward}\"";
yield return $"\"/{forward}\"";
2021-09-27 12:42:46 +00:00
}
2021-10-23 16:51:17 +00:00
tmpFile = _manager.CreateFile();
await tmpFile.Value.Path.WriteAllLinesAsync(onlyFiles.SelectMany(f => AllVariants((string) f)),
token);
process.Arguments = new object[]
2021-09-27 12:42:46 +00:00
{
2021-10-23 16:51:17 +00:00
"x", "-bsp1", "-y", $"-o\"{dest}\"", source, $"@\"{tmpFile.Value.ToString()}\"", "-mmt=off"
};
}
else
{
process.Arguments = new object[] {"x", "-bsp1", "-y", $"-o\"{dest}\"", source, "-mmt=off"};
}
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
_logger.LogInformation("{prog} {args}", process.Path, process.Arguments);
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var totalSize = source.Size();
var lastPercent = 0;
job.Size = totalSize;
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var result = process.Output.Where(d => d.Type == ProcessHelper.StreamType.Output)
.ForEachAsync(p =>
{
var (_, line) = p;
if (line == null)
return;
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
if (line.Length <= 4 || line[3] != '%') return;
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
if (!int.TryParse(line[..3], out var percentInt)) return;
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var oldPosition = lastPercent == 0 ? 0 : totalSize / lastPercent;
var newPosition = percentInt == 0 ? 0 : totalSize / percentInt;
var throughput = newPosition - oldPosition;
job.ReportNoWait((int) throughput);
2021-11-02 13:40:59 +00:00
progressFunction?.Invoke(Percent.FactoryPutInRange(lastPercent, 100));
2021-10-23 16:51:17 +00:00
lastPercent = percentInt;
}, token);
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
var exitCode = await process.Start();
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
/*
if (exitCode != 0)
{
Utils.ErrorThrow(new _7zipReturnError(exitCode, source, dest, ""));
2021-09-27 12:42:46 +00:00
}
2021-10-23 16:51:17 +00:00
else
2021-09-27 12:42:46 +00:00
{
2021-10-23 16:51:17 +00:00
Utils.Status($"Extracting {source.FileName} - done", Percent.One, alsoLog: true);
}*/
2021-09-27 12:42:46 +00:00
2021-10-23 18:36:35 +00:00
job.Dispose();
2021-10-23 16:51:17 +00:00
var results = await dest.Path.EnumerateFiles()
.PMapAll(async f =>
{
var path = f.RelativeTo(dest.Path);
if (!shouldExtract(path)) return ((RelativePath, T)) default;
var file = new ExtractedNativeFile(f);
var mapResult = await mapfn(path, file);
f.Delete();
return (path, mapResult);
})
.Where(d => d.Item1 != default)
.ToDictionary(d => d.Item1, d => d.Item2);
2021-09-27 12:42:46 +00:00
2021-10-23 16:51:17 +00:00
return results;
}
finally
2021-09-27 12:42:46 +00:00
{
2021-10-23 18:36:35 +00:00
job.Dispose();
2021-10-23 16:51:17 +00:00
if (tmpFile != null) await tmpFile.Value.DisposeAsync();
if (spoolFile != null) await spoolFile.Value.DisposeAsync();
2021-09-27 12:42:46 +00:00
}
}
2021-10-23 16:51:17 +00:00
public async Task ExtractAll(AbsolutePath src, AbsolutePath dest, CancellationToken token,
2021-11-02 13:40:59 +00:00
Predicate<RelativePath>? filterFn = null, Action<Percent>? updateProgress = null)
2021-10-23 16:51:17 +00:00
{
filterFn ??= _ => true;
await GatheringExtract(new NativeFileStreamFactory(src), filterFn, async (path, factory) =>
{
var abs = path.RelativeTo(dest);
abs.Parent.CreateDirectory();
await using var stream = await factory.GetStream();
await abs.WriteAllAsync(stream, token);
return 0;
2021-11-03 05:03:41 +00:00
}, token, progressFunction: updateProgress);
2021-10-23 16:51:17 +00:00
}
2021-09-27 12:42:46 +00:00
}