wabbajack/Wabbajack.VirtualFileSystem/Context.cs

433 lines
16 KiB
C#
Raw Normal View History

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.IO;
using System.Linq;
using System.Reactive.Linq;
using System.Reactive.Subjects;
using System.Text;
using System.Threading.Tasks;
using Alphaleonis.Win32.Filesystem;
2020-11-02 04:35:44 +00:00
using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
using Wabbajack.Common;
2020-11-02 04:35:44 +00:00
using Wabbajack.Common.StatusFeed.Errors;
using Wabbajack.VirtualFileSystem.ExtractedFiles;
using Directory = Alphaleonis.Win32.Filesystem.Directory;
using File = System.IO.File;
using FileInfo = Alphaleonis.Win32.Filesystem.FileInfo;
using Path = Alphaleonis.Win32.Filesystem.Path;
namespace Wabbajack.VirtualFileSystem
{
public class Context
{
private static Task _cleanupTask;
2020-01-10 13:16:41 +00:00
static Context()
{
Utils.Log("Cleaning VFS, this may take a bit of time");
_cleanupTask = Utils.DeleteDirectory(StagingFolder);
2020-01-10 13:16:41 +00:00
}
public const ulong FileVersion = 0x03;
public const string Magic = "WABBAJACK VFS FILE";
2020-03-23 12:57:18 +00:00
private static readonly AbsolutePath StagingFolder = ((RelativePath)"vfs_staging").RelativeToWorkingDirectory();
public IndexRoot Index { get; private set; } = IndexRoot.Empty;
/// <summary>
/// A stream of tuples of ("Update Title", 0.25) which represent the name of the current task
/// and the current progress.
/// </summary>
public IObservable<(string, float)> ProgressUpdates => _progressUpdates;
private readonly Subject<(string, float)> _progressUpdates = new Subject<(string, float)>();
2019-11-17 04:16:42 +00:00
public StatusUpdateTracker UpdateTracker { get; set; } = new StatusUpdateTracker(1);
public WorkQueue Queue { get; }
public bool UseExtendedHashes { get; set; }
2020-09-12 20:23:03 +00:00
public bool FavorPerfOverRAM { get; set; }
public Context(WorkQueue queue, bool extendedHashes = false)
2019-11-17 04:16:42 +00:00
{
Queue = queue;
UseExtendedHashes = extendedHashes;
2019-11-17 04:16:42 +00:00
}
2020-03-23 12:57:18 +00:00
public static TemporaryDirectory GetTemporaryFolder()
{
2020-03-23 12:57:18 +00:00
return new TemporaryDirectory(((RelativePath)Guid.NewGuid().ToString()).RelativeTo(StagingFolder));
}
2020-03-23 12:57:18 +00:00
public async Task<IndexRoot> AddRoot(AbsolutePath root)
{
await _cleanupTask;
2020-03-23 12:57:18 +00:00
var filtered = Index.AllFiles.Where(file => file.IsNative && ((AbsolutePath) file.Name).Exists).ToList();
var byPath = filtered.ToImmutableDictionary(f => f.Name);
2020-03-23 12:57:18 +00:00
var filesToIndex = root.EnumerateFiles().Distinct().ToList();
var allFiles = await filesToIndex
.PMap(Queue, async f =>
2019-11-16 00:01:37 +00:00
{
if (byPath.TryGetValue(f, out var found))
{
2020-03-23 12:57:18 +00:00
if (found.LastModified == f.LastModifiedUtc.AsUnixTime() && found.Size == f.Size)
2019-11-16 00:01:37 +00:00
return found;
}
2020-09-04 21:00:29 +00:00
return await VirtualFile.Analyze(this, null, new NativeFileStreamFactory(f), f, 0);
2019-11-16 00:01:37 +00:00
});
2019-12-07 02:54:27 +00:00
var newIndex = await IndexRoot.Empty.Integrate(filtered.Concat(allFiles).ToList());
lock (this)
{
Index = newIndex;
}
return newIndex;
}
2020-03-23 12:57:18 +00:00
public async Task<IndexRoot> AddRoots(List<AbsolutePath> roots)
{
await _cleanupTask;
var native = Index.AllFiles.Where(file => file.IsNative).ToDictionary(file => file.FullPath.Base);
2020-03-23 12:57:18 +00:00
var filtered = Index.AllFiles.Where(file => ((AbsolutePath)file.Name).Exists).ToList();
2020-03-23 12:57:18 +00:00
var filesToIndex = roots.SelectMany(root => root.EnumerateFiles()).ToList();
var allFiles = await filesToIndex
.PMap(Queue, async f =>
{
2020-03-23 12:57:18 +00:00
Utils.Status($"Indexing {Path.GetFileName((string)f)}");
if (native.TryGetValue(f, out var found))
{
2020-03-23 12:57:18 +00:00
if (found.LastModified == f.LastModifiedUtc.AsUnixTime() && found.Size == f.Size)
return found;
}
2020-09-04 21:00:29 +00:00
return await VirtualFile.Analyze(this, null, new NativeFileStreamFactory(f), f, 0);
});
2019-12-07 02:54:27 +00:00
var newIndex = await IndexRoot.Empty.Integrate(filtered.Concat(allFiles).ToList());
lock (this)
{
Index = newIndex;
}
return newIndex;
}
class Box<T>
{
public T Value { get; set; }
}
private Func<IObservable<T>, IObservable<T>> ProgressUpdater<T>(string s, float totalCount)
{
if (totalCount == 0)
totalCount = 1;
var box = new Box<float>();
return sub => sub.Select(itm =>
{
box.Value += 1;
_progressUpdates.OnNext((s, box.Value / totalCount));
return itm;
});
}
public async Task WriteToFile(AbsolutePath filename)
{
await using var fs = await filename.Create();
await using var bw = new BinaryWriter(fs, Encoding.UTF8, true);
fs.SetLength(0);
bw.Write(Encoding.ASCII.GetBytes(Magic));
bw.Write(FileVersion);
bw.Write((ulong) Index.AllFiles.Count);
2020-05-25 19:41:16 +00:00
await (await Index.AllFiles
2019-11-17 04:16:42 +00:00
.PMap(Queue, f =>
{
var ms = new MemoryStream();
2020-03-24 12:21:19 +00:00
using var ibw = new BinaryWriter(ms, Encoding.UTF8, true);
f.Write(ibw);
return ms;
}))
2020-05-25 19:41:16 +00:00
.DoAsync(async ms =>
{
var size = ms.Position;
ms.Position = 0;
bw.Write((ulong) size);
await ms.CopyToAsync(fs);
});
Utils.Log($"Wrote {fs.Position.ToFileSizeString()} file as vfs cache file {filename}");
}
public async Task IntegrateFromFile(AbsolutePath filename)
{
try
{
await using var fs = await filename.OpenRead();
using var br = new BinaryReader(fs, Encoding.UTF8, true);
var magic = Encoding.ASCII.GetString(br.ReadBytes(Encoding.ASCII.GetBytes(Magic).Length));
var fileVersion = br.ReadUInt64();
if (fileVersion != FileVersion || magic != Magic)
throw new InvalidDataException("Bad Data Format");
var numFiles = br.ReadUInt64();
var files = Enumerable.Range(0, (int) numFiles)
.Select(idx =>
{
var size = br.ReadUInt64();
var bytes = new byte[size];
br.BaseStream.Read(bytes, 0, (int) size);
return VirtualFile.Read(this, bytes);
}).ToList();
var newIndex = await Index.Integrate(files);
lock (this)
{
Index = newIndex;
}
}
catch (IOException)
{
2020-05-26 11:31:11 +00:00
await filename.DeleteAsync();
}
}
2020-09-04 21:00:29 +00:00
/// <summary>
/// Extract the set of files and call the callback for each, handing it a stream factory and the virtual file,
/// top level archives (native archives) will be processed in parallel. Duplicate files will not be
/// </summary>
2020-10-01 03:50:09 +00:00
/// <<param name="queue"></param>
2020-09-04 21:00:29 +00:00
/// <param name="files"></param>
/// <param name="callback"></param>
/// <returns></returns>
2020-11-03 14:45:08 +00:00
public async Task Extract(WorkQueue queue, HashSet<VirtualFile> files, Func<VirtualFile, IExtractedFile, ValueTask> callback, AbsolutePath? tempFolder = null, StatusUpdateTracker updateTracker = null)
2020-09-04 21:00:29 +00:00
{
var top = new VirtualFile();
var filesByParent = files.SelectMany(f => f.FilesInFullPath)
.Distinct()
.GroupBy(f => f.Parent ?? top)
.ToDictionary(f => f.Key);
async Task HandleFile(VirtualFile file, IExtractedFile sfn)
2020-09-04 21:00:29 +00:00
{
if (filesByParent.ContainsKey(file))
sfn.CanMove = false;
2020-09-04 21:00:29 +00:00
if (files.Contains(file)) await callback(file, sfn);
if (filesByParent.TryGetValue(file, out var children))
{
var fileNames = children.ToDictionary(c => c.RelativeName);
2020-11-02 04:35:44 +00:00
try
{
await FileExtractor2.GatheringExtract(queue, sfn,
r => fileNames.ContainsKey(r),
async (rel, csf) =>
{
await HandleFile(fileNames[rel], csf);
return 0;
},
tempFolder: tempFolder,
onlyFiles: fileNames.Keys.ToHashSet());
}
2020-11-14 14:26:04 +00:00
catch (_7zipReturnError)
2020-11-02 04:35:44 +00:00
{
await using var stream = await sfn.GetStream();
var hash = await stream.xxHashAsync();
if (hash != file.Hash)
2020-09-04 21:00:29 +00:00
{
2020-11-02 04:35:44 +00:00
throw new Exception($"File {file.FullPath} is corrupt, please delete it and retry the installation");
}
throw;
}
2020-09-04 21:00:29 +00:00
}
}
2020-11-03 14:45:08 +00:00
updateTracker ??= new StatusUpdateTracker(1);
await filesByParent[top].PMap(queue, updateTracker, async file => await HandleFile(file, new ExtractedNativeFile(file.AbsoluteName) {CanMove = false}));
2020-09-04 21:00:29 +00:00
}
2019-11-15 13:37:04 +00:00
#region KnownFiles
private List<HashRelativePath> _knownFiles = new List<HashRelativePath>();
2020-03-25 12:47:25 +00:00
private Dictionary<Hash, AbsolutePath> _knownArchives = new Dictionary<Hash, AbsolutePath>();
2020-09-12 20:23:03 +00:00
2020-03-25 12:47:25 +00:00
public void AddKnown(IEnumerable<HashRelativePath> known, Dictionary<Hash, AbsolutePath> archives)
2019-11-15 13:06:34 +00:00
{
2019-11-15 13:37:04 +00:00
_knownFiles.AddRange(known);
2020-03-25 12:47:25 +00:00
foreach (var (key, value) in archives)
_knownArchives.TryAdd(key, value);
2019-11-15 13:06:34 +00:00
}
2019-12-07 02:54:27 +00:00
public async Task BackfillMissing()
2019-11-15 13:06:34 +00:00
{
2020-03-28 02:54:14 +00:00
var newFiles = _knownArchives.ToDictionary(kv => kv.Key,
2020-04-24 13:56:03 +00:00
kv => new VirtualFile
{
Name = kv.Value,
Size = kv.Value.Size,
Hash = kv.Key,
});
newFiles.Values.Do(f => f.FillFullPath(0));
2019-11-15 13:37:04 +00:00
var parentchild = new Dictionary<(VirtualFile, RelativePath), VirtualFile>();
2019-11-15 13:37:04 +00:00
void BackFillOne(HashRelativePath file)
2019-11-15 13:37:04 +00:00
{
var parent = newFiles[file.BaseHash];
2020-03-28 02:54:14 +00:00
foreach (var path in file.Paths)
2019-11-15 13:37:04 +00:00
{
if (parentchild.TryGetValue((parent, path), out var foundParent))
{
parent = foundParent;
continue;
}
var nf = new VirtualFile {Name = path, Parent = parent};
2020-04-24 13:56:03 +00:00
nf.FillFullPath();
2019-11-15 13:37:04 +00:00
parent.Children = parent.Children.Add(nf);
parentchild.Add((parent, path), nf);
parent = nf;
}
}
2020-03-28 02:54:14 +00:00
_knownFiles.Where(f => f.Paths.Length > 0).Do(BackFillOne);
2019-11-15 13:37:04 +00:00
2019-12-07 02:54:27 +00:00
var newIndex = await Index.Integrate(newFiles.Values.ToList());
2019-11-15 13:37:04 +00:00
lock (this)
Index = newIndex;
_knownFiles = new List<HashRelativePath>();
2019-11-15 13:37:04 +00:00
2019-11-15 13:06:34 +00:00
}
2019-11-15 13:37:04 +00:00
#endregion
2019-11-15 13:06:34 +00:00
}
public class DisposableList<T> : List<T>, IDisposable
{
private Action _unstage;
public DisposableList(Action unstage, IEnumerable<T> files) : base(files)
{
_unstage = unstage;
}
public void Dispose()
{
_unstage();
}
}
public class AsyncDisposableList<T> : List<T>, IAsyncDisposable
{
private Func<Task> _unstage;
public AsyncDisposableList(Func<Task> unstage, IEnumerable<T> files) : base(files)
{
_unstage = unstage;
}
public async ValueTask DisposeAsync()
{
await _unstage();
}
}
public class IndexRoot
{
public static IndexRoot Empty = new IndexRoot();
public IndexRoot(ImmutableList<VirtualFile> aFiles,
2020-03-24 21:42:28 +00:00
Dictionary<FullPath, VirtualFile> byFullPath,
2020-03-22 15:50:53 +00:00
ImmutableDictionary<Hash, ImmutableStack<VirtualFile>> byHash,
2020-03-23 12:57:18 +00:00
ImmutableDictionary<AbsolutePath, VirtualFile> byRoot,
2020-03-24 12:21:19 +00:00
ImmutableDictionary<IPath, ImmutableStack<VirtualFile>> byName)
{
AllFiles = aFiles;
ByFullPath = byFullPath;
ByHash = byHash;
ByRootPath = byRoot;
2019-11-15 13:06:34 +00:00
ByName = byName;
}
public IndexRoot()
{
AllFiles = ImmutableList<VirtualFile>.Empty;
2020-03-24 21:42:28 +00:00
ByFullPath = new Dictionary<FullPath, VirtualFile>();
2020-03-22 15:50:53 +00:00
ByHash = ImmutableDictionary<Hash, ImmutableStack<VirtualFile>>.Empty;
2020-03-23 12:57:18 +00:00
ByRootPath = ImmutableDictionary<AbsolutePath, VirtualFile>.Empty;
2020-03-24 12:21:19 +00:00
ByName = ImmutableDictionary<IPath, ImmutableStack<VirtualFile>>.Empty;
}
2019-11-15 13:06:34 +00:00
public ImmutableList<VirtualFile> AllFiles { get; }
2020-03-24 21:42:28 +00:00
public Dictionary<FullPath, VirtualFile> ByFullPath { get; }
2020-03-22 15:50:53 +00:00
public ImmutableDictionary<Hash, ImmutableStack<VirtualFile>> ByHash { get; }
2020-03-24 12:21:19 +00:00
public ImmutableDictionary<IPath, ImmutableStack<VirtualFile>> ByName { get; set; }
2020-03-23 12:57:18 +00:00
public ImmutableDictionary<AbsolutePath, VirtualFile> ByRootPath { get; }
2019-12-07 02:54:27 +00:00
public async Task<IndexRoot> Integrate(ICollection<VirtualFile> files)
{
Utils.Log($"Integrating {files.Count} files");
2020-07-07 20:17:49 +00:00
var allFiles = AllFiles.Concat(files)
.OrderByDescending(f => f.LastModified)
.GroupBy(f => f.FullPath).Select(g => g.Last())
.ToImmutableList();
var byFullPath = Task.Run(() => allFiles.SelectMany(f => f.ThisAndAllChildren)
2020-03-24 21:42:28 +00:00
.ToDictionary(f => f.FullPath));
var byHash = Task.Run(() => allFiles.SelectMany(f => f.ThisAndAllChildren)
2020-03-22 15:50:53 +00:00
.Where(f => f.Hash != Hash.Empty)
.ToGroupedImmutableDictionary(f => f.Hash));
var byName = Task.Run(() => allFiles.SelectMany(f => f.ThisAndAllChildren)
.ToGroupedImmutableDictionary(f => f.Name));
2019-11-15 13:06:34 +00:00
2020-03-24 12:21:19 +00:00
var byRootPath = Task.Run(() => allFiles.ToImmutableDictionary(f => f.AbsoluteName));
var result = new IndexRoot(allFiles,
2019-12-07 02:54:27 +00:00
await byFullPath,
await byHash,
await byRootPath,
await byName);
Utils.Log($"Done integrating");
return result;
2019-11-15 13:06:34 +00:00
}
2020-03-23 12:57:18 +00:00
public VirtualFile FileForArchiveHashPath(HashRelativePath argArchiveHashPath)
2019-11-15 13:06:34 +00:00
{
2020-03-23 12:57:18 +00:00
var cur = ByHash[argArchiveHashPath.BaseHash].First(f => f.Parent == null);
return argArchiveHashPath.Paths.Aggregate(cur, (current, itm) => ByName[itm].First(f => f.Parent == current));
}
}
2020-04-01 23:59:22 +00:00
public class TemporaryDirectory : IAsyncDisposable
{
2020-03-23 12:57:18 +00:00
public TemporaryDirectory(AbsolutePath name)
{
FullName = name;
2020-03-23 12:57:18 +00:00
if (!FullName.Exists)
FullName.CreateDirectory();
}
2020-03-23 12:57:18 +00:00
public AbsolutePath FullName { get; }
2020-04-01 23:59:22 +00:00
public async ValueTask DisposeAsync()
{
2020-04-01 23:59:22 +00:00
if (FullName.Exists)
await Utils.DeleteDirectory(FullName);
}
}
}