diff --git a/Wabbajack.Common/Data.cs b/Wabbajack.Common/Data.cs index 850a77f4..0548d128 100644 --- a/Wabbajack.Common/Data.cs +++ b/Wabbajack.Common/Data.cs @@ -92,11 +92,26 @@ namespace Wabbajack.Common /// /// MurMur3 hash of the archive this file comes from /// - public string ArchiveHash; + public string[] ArchiveHashPath; /// /// The relative path of the file in the archive /// public string From; + + private string _fullPath = null; + [JsonIgnore] + public string FullPath + { + get + { + if (_fullPath == null) { + var path = ArchiveHashPath.ToList(); + path.Add(From); + _fullPath = String.Join("|", path); + } + return _fullPath; + } + } } public class CreateBSA : Directive @@ -207,6 +222,7 @@ namespace Wabbajack.Common public string Name; public string Meta; public string AbsolutePath; + public List HashPath; } /// @@ -228,6 +244,11 @@ namespace Wabbajack.Common public long Size; } + public class IndexedArchiveEntry : IndexedEntry + { + public string[] HashPath; + } + /// /// Data found inside a BSA file in an archive /// diff --git a/Wabbajack.Common/FileExtractor.cs b/Wabbajack.Common/FileExtractor.cs new file mode 100644 index 00000000..ea80cdd1 --- /dev/null +++ b/Wabbajack.Common/FileExtractor.cs @@ -0,0 +1,153 @@ +using Compression.BSA; +using ICSharpCode.SharpZipLib.Zip; +using SevenZipExtractor; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Wabbajack.Common +{ + public class FileExtractor + { + public class Entry + { + public string Name; + public ulong Size; + } + + public static void Extract(string file, Func f, bool leave_open = false) + { + if (Path.GetExtension(file) == ".bsa") + { + ExtractAsBSA(file, f, leave_open); + } + else if (Path.GetExtension(file) == ".zip") + { + ExtractViaNetZip(file, f, leave_open); + } + else + { + ExtractVia7Zip(file, f, leave_open); + } + } + + private static void ExtractAsBSA(string file, Func f, bool leave_open) + { + using (var ar = new BSAReader(file)) + { + foreach (var entry in ar.Files) + { + var stream = f(new Entry() + { + Name = entry.Path, + Size = (ulong)entry.Size + }); + if (stream == null) continue; + + var data = entry.GetData(); + stream.Write(data, 0, data.Length); + + if (!leave_open) + stream.Dispose(); + } + } + } + + private static void ExtractVia7Zip(string file, Func f, bool leave_open) + { + using (var af = new ArchiveFile(file)) + { + af.Extract(entry => + { + if (entry.IsFolder) return null; + return f(new Entry() + { + Name = entry.FileName, + Size = entry.Size + }); + }, leave_open); + } + } + + private const int ZIP_BUFFER_SIZE = 1024 * 8; + private static void ExtractViaNetZip(string file, Func f, bool leave_open) + { + using (var s = new ZipFile(File.OpenRead(file))) + { + s.IsStreamOwner = true; + s.UseZip64 = UseZip64.On; + + if (s.OfType().FirstOrDefault(e => !e.CanDecompress) != null) + { + ExtractVia7Zip(file, f, leave_open); + return; + } + + foreach (ZipEntry entry in s) + { + if (!entry.IsFile) continue; + var stream = f(new Entry() + { + Name = entry.Name.Replace('/', '\\'), + Size = (ulong)entry.Size + }); + + if (stream == null) continue; + + using (var instr = s.GetInputStream(entry)) + { + instr.CopyTo(stream); + } + + if (!leave_open) stream.Dispose(); + + } + } + } + + + public static void DeepExtract(string file, IEnumerable files, Func fnc, bool leave_open = false, int depth = 1) + { + // Files we need to extract at this level + var files_for_level = files.Where(f => f.ArchiveHashPath.Length == depth).ToDictionary(e => e.From); + // Archives we need to extract at this level + var archives_for_level = files.Where(f => f.ArchiveHashPath.Length > depth) + .GroupBy(f => f.ArchiveHashPath[depth]) + .ToDictionary(f => f.Key); + + var disk_archives = new Dictionary(); + + Extract(file, e => + { + Stream a = Stream.Null; + Stream b = Stream.Null; + + if (files_for_level.TryGetValue(e.Name, out var fe)) + { + a = fnc(fe, e); + } + + if (archives_for_level.TryGetValue(e.Name, out var archive)) + { + var name = Path.GetTempFileName() + Path.GetExtension(e.Name); + disk_archives.Add(e.Name, name); + b = File.OpenWrite(name); + } + + if (a == null && b == null) return null; + + return new SplittingStream(a, leave_open, b, false); + + }); + + foreach (var archive in disk_archives) + { + DeepExtract(archive.Value, archives_for_level[archive.Key], fnc, leave_open, depth + 1); + File.Delete(archive.Value); + } + } + } +} diff --git a/Wabbajack.Common/SplittingStream.cs b/Wabbajack.Common/SplittingStream.cs new file mode 100644 index 00000000..858988b8 --- /dev/null +++ b/Wabbajack.Common/SplittingStream.cs @@ -0,0 +1,71 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Wabbajack.Common +{ + public class SplittingStream : Stream + { + private Stream _a; + private Stream _b; + private bool _leave_a_open; + private bool _leave_b_open; + + public override bool CanRead => false; + + public override bool CanSeek => false; + + public override bool CanWrite => true; + + public override long Length => throw new NotImplementedException(); + + public override long Position { get => throw new NotImplementedException(); set => throw new NotImplementedException(); } + + public SplittingStream(Stream a, bool leave_a_open, Stream b, bool leave_b_open) + { + _a = a; + _b = b; + _leave_a_open = leave_a_open; + _leave_b_open = leave_b_open; + } + + public override void Flush() + { + _a.Flush(); + _b.Flush(); + } + + public override int Read(byte[] buffer, int offset, int count) + { + throw new NotImplementedException(); + } + + public override long Seek(long offset, SeekOrigin origin) + { + throw new NotImplementedException(); + } + + public override void SetLength(long value) + { + throw new NotImplementedException(); + } + + public override void Write(byte[] buffer, int offset, int count) + { + _a.Write(buffer, offset, count); + _b.Write(buffer, offset, count); + } + + protected override void Dispose(bool disposing) + { + if (disposing) + { + if (!_leave_a_open) _a.Dispose(); + if (!_leave_b_open) _b.Dispose(); + } + } + } +} diff --git a/Wabbajack.Common/Wabbajack.Common.csproj b/Wabbajack.Common/Wabbajack.Common.csproj index 3ec0e215..5083f1d0 100644 --- a/Wabbajack.Common/Wabbajack.Common.csproj +++ b/Wabbajack.Common/Wabbajack.Common.csproj @@ -79,6 +79,7 @@ + diff --git a/Wabbajack.Common/XXHashOutputStream.cs b/Wabbajack.Common/XXHashOutputStream.cs new file mode 100644 index 00000000..564c92d7 --- /dev/null +++ b/Wabbajack.Common/XXHashOutputStream.cs @@ -0,0 +1,62 @@ +using NeoSmart.Hashing.XXHash; +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Wabbajack.Common +{ + public class XXHashOutputStream : Stream + { + private XXHash64 _hasher; + + public XXHashOutputStream() + { + _hasher = new XXHash64(); + } + + public override bool CanRead => false; + + public override bool CanSeek => false; + + public override bool CanWrite => true; + + public override long Length => throw new NotImplementedException(); + + public override long Position { get => throw new NotImplementedException(); set => throw new NotImplementedException(); } + + public override void Flush() + { + } + + public override int Read(byte[] buffer, int offset, int count) + { + throw new NotImplementedException(); + } + + public override long Seek(long offset, SeekOrigin origin) + { + throw new NotImplementedException(); + } + + public override void SetLength(long value) + { + throw new NotImplementedException(); + } + + public override void Write(byte[] buffer, int offset, int count) + { + _hasher.Update(buffer, offset, count); + } + + public string Result + { + get + { + return BitConverter.GetBytes(_hasher.Result).ToBase64(); + } + } + } +} diff --git a/Wabbajack/Compiler.cs b/Wabbajack/Compiler.cs index b13eaa26..1f0441d6 100644 --- a/Wabbajack/Compiler.cs +++ b/Wabbajack/Compiler.cs @@ -60,6 +60,8 @@ namespace Wabbajack public List IndexedArchives; + public List IndexedFiles { get; private set; } + public void Info(string msg, params object[] args) { if (args.Length > 0) @@ -98,6 +100,43 @@ namespace Wabbajack IndexedArchives = Directory.EnumerateFiles(MO2DownloadsFolder) .Where(file => Consts.SupportedArchives.Contains(Path.GetExtension(file))) .PMap(file => LoadArchive(file)); + IndexedFiles = FlattenFiles(IndexedArchives); + Info($"Found {IndexedFiles.Count} files in archives"); + } + + private List FlattenFiles(IEnumerable archives) + { + return archives.PMap(e => FlattenArchiveEntries(e, null, new string[0])) + .SelectMany(e => e) + .ToList(); + } + + private IEnumerable FlattenArchiveEntries(IndexedArchiveCache archive, string name, string[] path) + { + var new_path = new string[path.Length + 1]; + Array.Copy(path, 0, new_path, 0, path.Length); + new_path[path.Length] = path.Length == 0 ? archive.Hash : name; + + foreach (var e in archive.Entries) + { + yield return new IndexedArchiveEntry() + { + Path = e.Path, + Size = e.Size, + Hash = e.Hash, + HashPath = new_path + }; + } + if (archive.InnerArchives != null) { + foreach (var inner in archive.InnerArchives) + { + foreach (var entry in FlattenArchiveEntries(inner.Value, inner.Key, new_path)) + { + yield return entry; + } + } + } + } @@ -285,7 +324,7 @@ namespace Wabbajack private void BuildPatches() { var groups = InstallDirectives.OfType() - .GroupBy(p => p.ArchiveHash) + .GroupBy(p => p.ArchiveHashPath[0]) .ToList(); Info("Patching building patches from {0} archives", groups.Count); @@ -302,17 +341,19 @@ namespace Wabbajack private void BuildArchivePatches(string archive_sha, IEnumerable group, Dictionary absolute_paths) { var archive = IndexedArchives.First(a => a.Hash == archive_sha); - var paths = group.Select(g => g.From).ToHashSet(); + var paths = group.Select(g => g.FullPath).ToHashSet(); var streams = new Dictionary(); Status($"Extracting {paths.Count} patch files from {archive.Name}"); // First we fetch the source files from the input archive - FileExtractor.Extract(archive.AbsolutePath, entry => + + FileExtractor.DeepExtract(archive.AbsolutePath, group, (fe, entry) => { - if (!paths.Contains(entry.Name)) return null; + if (!paths.Contains(fe.FullPath)) return null; var result = new MemoryStream(); - streams.Add(entry.Name, result); + streams.Add(fe.FullPath, result); return result; + }, false); var extracted = streams.ToDictionary(k => k.Key, v => v.Value.ToArray()); @@ -322,7 +363,7 @@ namespace Wabbajack group.PMap(entry => { Info("Patching {0}", entry.To); - var ss = extracted[entry.From]; + var ss = extracted[entry.FullPath]; using (var origin = new MemoryStream(ss)) using (var output = new MemoryStream()) { @@ -358,10 +399,11 @@ namespace Wabbajack private void GatherArchives() { + Info($"Building a list of archives based on the files required"); var archives = IndexedArchives.GroupBy(a => a.Hash).ToDictionary(k => k.Key, k => k.First()); var shas = InstallDirectives.OfType() - .Select(a => a.ArchiveHash) + .Select(a => a.ArchiveHashPath[0]) .Distinct(); SelectedArchives = shas.PMap(sha => ResolveArchive(sha, archives)); @@ -639,9 +681,11 @@ namespace Wabbajack private Func IncludePatches() { - var indexed = (from archive in IndexedArchives - from entry in archive.Entries - select new { archive = archive, entry = entry }) + var archive_shas = IndexedArchives.GroupBy(e => e.Hash) + .ToDictionary(e => e.Key); + var indexed = (from entry in IndexedFiles + select new { archive = archive_shas[entry.HashPath[0]].First(), + entry = entry }) .GroupBy(e => Path.GetFileName(e.entry.Path).ToLower()) .ToDictionary(e => e.Key); @@ -653,7 +697,7 @@ namespace Wabbajack var e = source.EvolveTo(); e.From = found.entry.Path; - e.ArchiveHash = found.archive.Hash; + e.ArchiveHashPath = found.entry.HashPath; e.To = source.Path; return e; } @@ -778,9 +822,12 @@ namespace Wabbajack private Func DirectMatch() { - var indexed = (from archive in IndexedArchives - from entry in archive.Entries - select new { archive = archive, entry = entry }) + var archive_shas = IndexedArchives.GroupBy(e => e.Hash) + .ToDictionary(e => e.Key); + + var indexed = (from entry in IndexedFiles + select new { archive = archive_shas[entry.HashPath[0]].First(), + entry = entry }) .GroupBy(e => e.entry.Hash) .ToDictionary(e => e.Key); @@ -810,7 +857,7 @@ namespace Wabbajack match = found.OrderByDescending(f => new FileInfo(f.archive.AbsolutePath).LastWriteTime) .FirstOrDefault(); - result.ArchiveHash = match.archive.Hash; + result.ArchiveHashPath = match.entry.HashPath; result.From = match.entry.Path; return result; } diff --git a/Wabbajack/Installer.cs b/Wabbajack/Installer.cs index 8f457d08..c627388b 100644 --- a/Wabbajack/Installer.cs +++ b/Wabbajack/Installer.cs @@ -166,7 +166,7 @@ namespace Wabbajack Info("Grouping Install Files"); var grouped = ModList.Directives .OfType() - .GroupBy(e => e.ArchiveHash) + .GroupBy(e => e.ArchiveHashPath[0]) .ToDictionary(k => k.Key); var archives = ModList.Archives .Select(a => new { Archive = a, AbsolutePath = HashedArchives.GetOrDefault(a.Hash) }) @@ -181,12 +181,13 @@ namespace Wabbajack private void InstallArchive(Archive archive, string absolutePath, IGrouping grouping) { Status("Extracting {0}", archive.Name); - var files = grouping.GroupBy(e => e.From) + var files = grouping.GroupBy(e => e.FullPath) .ToDictionary(e => e.Key); - FileExtractor.Extract(absolutePath, entry => + FileExtractor.DeepExtract(absolutePath, files.Select(f => f.Value.First()), + (fe, entry) => { - if (files.TryGetValue(entry.Name, out var directives)) + if (files.TryGetValue(fe.FullPath, out var directives)) { var directive = directives.First(); var absolute = Path.Combine(Outputfolder, directive.To);