Merge pull request #7 from halgari/issue-4

Add support for nested archives
This commit is contained in:
Timothy Baldridge 2019-08-09 14:51:49 -06:00 committed by GitHub
commit 671d633ea1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 401 additions and 26 deletions

View File

@ -251,7 +251,7 @@ namespace Compression.BSA
private int _offset;
private FolderRecord _folder;
private string _name;
private uint _originalSize;
private uint? _originalSize;
public FileRecord(BSAReader bsa, FolderRecord folderRecord, BinaryReader src)
{
@ -296,11 +296,36 @@ namespace Compression.BSA
{
get
{
if (Compressed) return (int)_originalSize;
if (Compressed)
{
if (_originalSize == null)
LoadOriginalSize();
return (int)_originalSize;
}
return _size;
}
}
private void LoadOriginalSize()
{
using (var in_file = File.OpenRead(_bsa._fileName))
using (var rdr = new BinaryReader(in_file))
{
rdr.BaseStream.Position = _offset;
string _name;
int file_size = _size;
if (_bsa.HasNameBlobs)
{
var name_size = rdr.ReadByte();
file_size -= name_size + 1;
rdr.BaseStream.Position = _offset + 1 + name_size;
}
_originalSize = rdr.ReadUInt32();
}
}
public ulong Hash {
get
{

View File

@ -92,11 +92,26 @@ namespace Wabbajack.Common
/// <summary>
/// MurMur3 hash of the archive this file comes from
/// </summary>
public string ArchiveHash;
public string[] ArchiveHashPath;
/// <summary>
/// The relative path of the file in the archive
/// </summary>
public string From;
private string _fullPath = null;
[JsonIgnore]
public string FullPath
{
get
{
if (_fullPath == null) {
var path = ArchiveHashPath.ToList();
path.Add(From);
_fullPath = String.Join("|", path);
}
return _fullPath;
}
}
}
public class CreateBSA : Directive
@ -194,7 +209,11 @@ namespace Wabbajack.Common
public class IndexedArchiveCache
{
public string Hash;
public int Version;
public List<IndexedEntry> Entries;
[JsonProperty(NullValueHandling = NullValueHandling.Ignore)]
public Dictionary<string, IndexedArchiveCache> InnerArchives;
}
public class IndexedArchive : IndexedArchiveCache
@ -203,6 +222,7 @@ namespace Wabbajack.Common
public string Name;
public string Meta;
public string AbsolutePath;
public List<string> HashPath;
}
/// <summary>
@ -224,6 +244,11 @@ namespace Wabbajack.Common
public long Size;
}
public class IndexedArchiveEntry : IndexedEntry
{
public string[] HashPath;
}
/// <summary>
/// Data found inside a BSA file in an archive
/// </summary>

View File

@ -0,0 +1,153 @@
using Compression.BSA;
using ICSharpCode.SharpZipLib.Zip;
using SevenZipExtractor;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Wabbajack.Common
{
public class FileExtractor
{
public class Entry
{
public string Name;
public ulong Size;
}
public static void Extract(string file, Func<Entry, Stream> f, bool leave_open = false)
{
if (Path.GetExtension(file) == ".bsa")
{
ExtractAsBSA(file, f, leave_open);
}
else if (Path.GetExtension(file) == ".zip")
{
ExtractViaNetZip(file, f, leave_open);
}
else
{
ExtractVia7Zip(file, f, leave_open);
}
}
private static void ExtractAsBSA(string file, Func<Entry, Stream> f, bool leave_open)
{
using (var ar = new BSAReader(file))
{
foreach (var entry in ar.Files)
{
var stream = f(new Entry()
{
Name = entry.Path,
Size = (ulong)entry.Size
});
if (stream == null) continue;
var data = entry.GetData();
stream.Write(data, 0, data.Length);
if (!leave_open)
stream.Dispose();
}
}
}
private static void ExtractVia7Zip(string file, Func<Entry, Stream> f, bool leave_open)
{
using (var af = new ArchiveFile(file))
{
af.Extract(entry =>
{
if (entry.IsFolder) return null;
return f(new Entry()
{
Name = entry.FileName,
Size = entry.Size
});
}, leave_open);
}
}
private const int ZIP_BUFFER_SIZE = 1024 * 8;
private static void ExtractViaNetZip(string file, Func<Entry, Stream> f, bool leave_open)
{
using (var s = new ZipFile(File.OpenRead(file)))
{
s.IsStreamOwner = true;
s.UseZip64 = UseZip64.On;
if (s.OfType<ZipEntry>().FirstOrDefault(e => !e.CanDecompress) != null)
{
ExtractVia7Zip(file, f, leave_open);
return;
}
foreach (ZipEntry entry in s)
{
if (!entry.IsFile) continue;
var stream = f(new Entry()
{
Name = entry.Name.Replace('/', '\\'),
Size = (ulong)entry.Size
});
if (stream == null) continue;
using (var instr = s.GetInputStream(entry))
{
instr.CopyTo(stream);
}
if (!leave_open) stream.Dispose();
}
}
}
public static void DeepExtract(string file, IEnumerable<FromArchive> files, Func<FromArchive, Entry, Stream> fnc, bool leave_open = false, int depth = 1)
{
// Files we need to extract at this level
var files_for_level = files.Where(f => f.ArchiveHashPath.Length == depth).ToDictionary(e => e.From);
// Archives we need to extract at this level
var archives_for_level = files.Where(f => f.ArchiveHashPath.Length > depth)
.GroupBy(f => f.ArchiveHashPath[depth])
.ToDictionary(f => f.Key);
var disk_archives = new Dictionary<string, string>();
Extract(file, e =>
{
Stream a = Stream.Null;
Stream b = Stream.Null;
if (files_for_level.TryGetValue(e.Name, out var fe))
{
a = fnc(fe, e);
}
if (archives_for_level.TryGetValue(e.Name, out var archive))
{
var name = Path.GetTempFileName() + Path.GetExtension(e.Name);
disk_archives.Add(e.Name, name);
b = File.OpenWrite(name);
}
if (a == null && b == null) return null;
return new SplittingStream(a, leave_open, b, false);
});
foreach (var archive in disk_archives)
{
DeepExtract(archive.Value, archives_for_level[archive.Key], fnc, leave_open, depth + 1);
File.Delete(archive.Value);
}
}
}
}

View File

@ -0,0 +1,71 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Wabbajack.Common
{
public class SplittingStream : Stream
{
private Stream _a;
private Stream _b;
private bool _leave_a_open;
private bool _leave_b_open;
public override bool CanRead => false;
public override bool CanSeek => false;
public override bool CanWrite => true;
public override long Length => throw new NotImplementedException();
public override long Position { get => throw new NotImplementedException(); set => throw new NotImplementedException(); }
public SplittingStream(Stream a, bool leave_a_open, Stream b, bool leave_b_open)
{
_a = a;
_b = b;
_leave_a_open = leave_a_open;
_leave_b_open = leave_b_open;
}
public override void Flush()
{
_a.Flush();
_b.Flush();
}
public override int Read(byte[] buffer, int offset, int count)
{
throw new NotImplementedException();
}
public override long Seek(long offset, SeekOrigin origin)
{
throw new NotImplementedException();
}
public override void SetLength(long value)
{
throw new NotImplementedException();
}
public override void Write(byte[] buffer, int offset, int count)
{
_a.Write(buffer, offset, count);
_b.Write(buffer, offset, count);
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
if (!_leave_a_open) _a.Dispose();
if (!_leave_b_open) _b.Dispose();
}
}
}
}

View File

@ -79,6 +79,7 @@
<Compile Include="DynamicIniData.cs" />
<Compile Include="FileExtractor.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="SplittingStream.cs" />
<Compile Include="Utils.cs" />
<Compile Include="WorkQueue.cs" />
</ItemGroup>
@ -86,6 +87,10 @@
<None Include="packages.config" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Compression.BSA\Compression.BSA.csproj">
<Project>{ff5d892f-8ff4-44fc-8f7f-cd58f307ad1b}</Project>
<Name>Compression.BSA</Name>
</ProjectReference>
<ProjectReference Include="..\SevenZipExtractor\SevenZipExtractor.csproj">
<Project>{8aa97f58-5044-4bba-b8d9-a74b6947a660}</Project>
<Name>SevenZipExtractor</Name>

View File

@ -60,6 +60,8 @@ namespace Wabbajack
public List<IndexedArchive> IndexedArchives;
public List<IndexedArchiveEntry> IndexedFiles { get; private set; }
public void Info(string msg, params object[] args)
{
if (args.Length > 0)
@ -98,19 +100,66 @@ namespace Wabbajack
IndexedArchives = Directory.EnumerateFiles(MO2DownloadsFolder)
.Where(file => Consts.SupportedArchives.Contains(Path.GetExtension(file)))
.PMap(file => LoadArchive(file));
IndexedFiles = FlattenFiles(IndexedArchives);
Info($"Found {IndexedFiles.Count} files in archives");
}
private List<IndexedArchiveEntry> FlattenFiles(IEnumerable<IndexedArchive> archives)
{
return archives.PMap(e => FlattenArchiveEntries(e, null, new string[0]))
.SelectMany(e => e)
.ToList();
}
private IEnumerable<IndexedArchiveEntry> FlattenArchiveEntries(IndexedArchiveCache archive, string name, string[] path)
{
var new_path = new string[path.Length + 1];
Array.Copy(path, 0, new_path, 0, path.Length);
new_path[path.Length] = path.Length == 0 ? archive.Hash : name;
foreach (var e in archive.Entries)
{
yield return new IndexedArchiveEntry()
{
Path = e.Path,
Size = e.Size,
Hash = e.Hash,
HashPath = new_path
};
}
if (archive.InnerArchives != null) {
foreach (var inner in archive.InnerArchives)
{
foreach (var entry in FlattenArchiveEntries(inner.Value, inner.Key, new_path))
{
yield return entry;
}
}
}
}
private const int ARCHIVE_CONTENTS_VERSION = 1;
private IndexedArchive LoadArchive(string file)
{
TOP:
string metaname = file + ".archive_contents";
if (metaname.FileExists() && new FileInfo(metaname).LastWriteTime >= new FileInfo(file).LastWriteTime)
{
Status("Loading Archive Index for {0}", Path.GetFileName(file));
var info = metaname.FromJSON<IndexedArchive>();
if (info.Version != ARCHIVE_CONTENTS_VERSION)
{
File.Delete(metaname);
goto TOP;
}
info.Name = Path.GetFileName(file);
info.AbsolutePath = file;
var ini_name = file + ".meta";
if (ini_name.FileExists())
{
@ -121,16 +170,44 @@ namespace Wabbajack
return info;
}
IndexArchive(file).ToJSON(metaname);
goto TOP;
}
private bool IsArchiveFile(string name)
{
var ext = Path.GetExtension(name);
if (ext == ".bsa" || Consts.SupportedArchives.Contains(ext))
return true;
return false;
}
private IndexedArchiveCache IndexArchive(string file)
{
Status("Indexing {0}", Path.GetFileName(file));
var streams = new Dictionary<string, (SHA256Managed, long)>();
FileExtractor.Extract(file, entry => {
var inner_archives = new Dictionary<string, string>();
FileExtractor.Extract(file, entry =>
{
Stream inner;
if (IsArchiveFile(entry.Name))
{
var name = Path.GetTempFileName() + Path.GetExtension(entry.Name);
inner_archives.Add(entry.Name, name);
inner = File.OpenWrite(name);
}
else
{
inner = Stream.Null;
}
var sha = new SHA256Managed();
var os = new CryptoStream(Stream.Null, sha, CryptoStreamMode.Write);
var os = new CryptoStream(inner, sha, CryptoStreamMode.Write);
streams.Add(entry.Name, (sha, (long)entry.Size));
return os;
});
var indexed = new IndexedArchiveCache();
indexed.Version = ARCHIVE_CONTENTS_VERSION;
indexed.Hash = file.FileSHA256();
indexed.Entries = streams.Select(entry =>
{
@ -144,8 +221,18 @@ namespace Wabbajack
streams.Do(e => e.Value.Item1.Dispose());
indexed.ToJSON(metaname);
return LoadArchive(file);
if (inner_archives.Count > 0)
{
var result = inner_archives.Select(archive =>
{
return (archive.Key, IndexArchive(archive.Value));
}).ToDictionary(e => e.Key, e => e.Item2);
indexed.InnerArchives = result;
inner_archives.Do(e => File.Delete(e.Value));
}
return indexed;
}
public void Compile()
@ -237,7 +324,7 @@ namespace Wabbajack
private void BuildPatches()
{
var groups = InstallDirectives.OfType<PatchedFromArchive>()
.GroupBy(p => p.ArchiveHash)
.GroupBy(p => p.ArchiveHashPath[0])
.ToList();
Info("Patching building patches from {0} archives", groups.Count);
@ -254,17 +341,19 @@ namespace Wabbajack
private void BuildArchivePatches(string archive_sha, IEnumerable<PatchedFromArchive> group, Dictionary<string, string> absolute_paths)
{
var archive = IndexedArchives.First(a => a.Hash == archive_sha);
var paths = group.Select(g => g.From).ToHashSet();
var paths = group.Select(g => g.FullPath).ToHashSet();
var streams = new Dictionary<string, MemoryStream>();
Status($"Extracting {paths.Count} patch files from {archive.Name}");
// First we fetch the source files from the input archive
FileExtractor.Extract(archive.AbsolutePath, entry =>
FileExtractor.DeepExtract(archive.AbsolutePath, group, (fe, entry) =>
{
if (!paths.Contains(entry.Name)) return null;
if (!paths.Contains(fe.FullPath)) return null;
var result = new MemoryStream();
streams.Add(entry.Name, result);
streams.Add(fe.FullPath, result);
return result;
}, false);
var extracted = streams.ToDictionary(k => k.Key, v => v.Value.ToArray());
@ -274,7 +363,7 @@ namespace Wabbajack
group.PMap(entry =>
{
Info("Patching {0}", entry.To);
var ss = extracted[entry.From];
var ss = extracted[entry.FullPath];
using (var origin = new MemoryStream(ss))
using (var output = new MemoryStream())
{
@ -310,10 +399,11 @@ namespace Wabbajack
private void GatherArchives()
{
Info($"Building a list of archives based on the files required");
var archives = IndexedArchives.GroupBy(a => a.Hash).ToDictionary(k => k.Key, k => k.First());
var shas = InstallDirectives.OfType<FromArchive>()
.Select(a => a.ArchiveHash)
.Select(a => a.ArchiveHashPath[0])
.Distinct();
SelectedArchives = shas.PMap(sha => ResolveArchive(sha, archives));
@ -591,9 +681,11 @@ namespace Wabbajack
private Func<RawSourceFile, Directive> IncludePatches()
{
var indexed = (from archive in IndexedArchives
from entry in archive.Entries
select new { archive = archive, entry = entry })
var archive_shas = IndexedArchives.GroupBy(e => e.Hash)
.ToDictionary(e => e.Key);
var indexed = (from entry in IndexedFiles
select new { archive = archive_shas[entry.HashPath[0]].First(),
entry = entry })
.GroupBy(e => Path.GetFileName(e.entry.Path).ToLower())
.ToDictionary(e => e.Key);
@ -605,7 +697,7 @@ namespace Wabbajack
var e = source.EvolveTo<PatchedFromArchive>();
e.From = found.entry.Path;
e.ArchiveHash = found.archive.Hash;
e.ArchiveHashPath = found.entry.HashPath;
e.To = source.Path;
return e;
}
@ -730,9 +822,12 @@ namespace Wabbajack
private Func<RawSourceFile, Directive> DirectMatch()
{
var indexed = (from archive in IndexedArchives
from entry in archive.Entries
select new { archive = archive, entry = entry })
var archive_shas = IndexedArchives.GroupBy(e => e.Hash)
.ToDictionary(e => e.Key);
var indexed = (from entry in IndexedFiles
select new { archive = archive_shas[entry.HashPath[0]].First(),
entry = entry })
.GroupBy(e => e.entry.Hash)
.ToDictionary(e => e.Key);
@ -762,7 +857,7 @@ namespace Wabbajack
match = found.OrderByDescending(f => new FileInfo(f.archive.AbsolutePath).LastWriteTime)
.FirstOrDefault();
result.ArchiveHash = match.archive.Hash;
result.ArchiveHashPath = match.entry.HashPath;
result.From = match.entry.Path;
return result;
}

View File

@ -166,7 +166,7 @@ namespace Wabbajack
Info("Grouping Install Files");
var grouped = ModList.Directives
.OfType<FromArchive>()
.GroupBy(e => e.ArchiveHash)
.GroupBy(e => e.ArchiveHashPath[0])
.ToDictionary(k => k.Key);
var archives = ModList.Archives
.Select(a => new { Archive = a, AbsolutePath = HashedArchives.GetOrDefault(a.Hash) })
@ -181,12 +181,13 @@ namespace Wabbajack
private void InstallArchive(Archive archive, string absolutePath, IGrouping<string, FromArchive> grouping)
{
Status("Extracting {0}", archive.Name);
var files = grouping.GroupBy(e => e.From)
var files = grouping.GroupBy(e => e.FullPath)
.ToDictionary(e => e.Key);
FileExtractor.Extract(absolutePath, entry =>
FileExtractor.DeepExtract(absolutePath, files.Select(f => f.Value.First()),
(fe, entry) =>
{
if (files.TryGetValue(entry.Name, out var directives))
if (files.TryGetValue(fe.FullPath, out var directives))
{
var directive = directives.First();
var absolute = Path.Combine(Outputfolder, directive.To);