wabbajack/Wabbajack.VirtualFileSystem/VirtualFile.cs

514 lines
17 KiB
C#
Raw Normal View History

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
2021-01-05 22:09:32 +00:00
using System.Data;
using System.Data.SQLite;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Threading.Tasks;
using Compression.BSA;
2020-09-06 03:19:05 +00:00
using ICSharpCode.SharpZipLib.Zip.Compression.Streams;
using K4os.Hash.Crc;
using Wabbajack.Common;
2021-06-16 05:16:25 +00:00
using Wabbajack.Common.FileSignatures;
using Wabbajack.ImageHashing;
namespace Wabbajack.VirtualFileSystem
{
public class VirtualFile
{
private static AbsolutePath DBLocation = Consts.LocalAppDataPath.Combine("GlobalVFSCache3.sqlite");
2021-01-05 22:09:32 +00:00
private static string _connectionString;
private static SQLiteConnection _conn;
static VirtualFile()
{
_connectionString = String.Intern($"URI=file:{DBLocation};Pooling=True;Max Pool Size=100; Journal Mode=Memory;");
2021-01-05 22:09:32 +00:00
_conn = new SQLiteConnection(_connectionString);
_conn.Open();
using var cmd = new SQLiteCommand(_conn);
cmd.CommandText = @"CREATE TABLE IF NOT EXISTS VFSCache (
Hash BIGINT PRIMARY KEY,
Contents BLOB)
WITHOUT ROWID";
2021-01-05 22:09:32 +00:00
cmd.ExecuteNonQuery();
}
2020-03-24 12:21:19 +00:00
private IEnumerable<VirtualFile> _thisAndAllChildren;
2020-03-23 12:57:18 +00:00
2020-03-24 12:21:19 +00:00
public IPath Name { get; internal set; }
2020-03-24 12:21:19 +00:00
public RelativePath RelativeName => (RelativePath)Name;
public AbsolutePath AbsoluteName => (AbsolutePath)Name;
public FullPath FullPath { get; private set; }
2020-03-22 15:50:53 +00:00
public Hash Hash { get; internal set; }
2021-06-17 23:09:03 +00:00
public ImageState ImageState { get; internal set; }
public ExtendedHashes ExtendedHashes { get; set; }
public long Size { get; internal set; }
2020-03-23 12:57:18 +00:00
public ulong LastModified { get; internal set; }
2020-03-23 12:57:18 +00:00
public ulong LastAnalyzed { get; internal set; }
public VirtualFile Parent { get; internal set; }
public Context Context { get; set; }
/// <summary>
/// Returns the nesting factor for this file. Native files will have a nesting of 1, the factor
/// goes up for each nesting of a file in an archive.
/// </summary>
public int NestingFactor
{
get
{
var cnt = 0;
var cur = this;
while (cur != null)
{
cnt += 1;
cur = cur.Parent;
}
return cnt;
}
}
public ImmutableList<VirtualFile> Children { get; internal set; } = ImmutableList<VirtualFile>.Empty;
public bool IsArchive => Children != null && Children.Count > 0;
public bool IsNative => Parent == null;
2019-11-24 23:03:36 +00:00
public IEnumerable<VirtualFile> ThisAndAllChildren
{
get
{
if (_thisAndAllChildren == null)
{
_thisAndAllChildren = Children.SelectMany(child => child.ThisAndAllChildren).Append(this).ToList();
}
return _thisAndAllChildren;
}
}
/// <summary>
/// Returns all the virtual files in the path to this file, starting from the root file.
/// </summary>
public IEnumerable<VirtualFile> FilesInFullPath
{
get
{
var stack = ImmutableStack<VirtualFile>.Empty;
var cur = this;
while (cur != null)
{
stack = stack.Push(cur);
cur = cur.Parent;
}
return stack;
}
}
2020-03-24 12:21:19 +00:00
public VirtualFile TopParent => IsNative ? this : Parent.TopParent;
2020-03-24 12:21:19 +00:00
public T ThisAndAllChildrenReduced<T>(T acc, Func<T, VirtualFile, T> fn)
{
acc = fn(acc, this);
return Children.Aggregate(acc, (current, itm) => itm.ThisAndAllChildrenReduced(current, fn));
}
public void ThisAndAllChildrenReduced(Action<VirtualFile> fn)
{
fn(this);
foreach (var itm in Children)
itm.ThisAndAllChildrenReduced(fn);
}
private static VirtualFile ConvertFromIndexedFile(Context context, IndexedVirtualFile file, IPath path, VirtualFile vparent, IStreamFactory extractedFile)
{
var vself = new VirtualFile
{
Context = context,
Name = path,
Parent = vparent,
Size = file.Size,
LastModified = extractedFile.LastModifiedUtc.AsUnixTime(),
LastAnalyzed = DateTime.Now.AsUnixTime(),
2021-06-17 23:09:03 +00:00
Hash = file.Hash,
ImageState = file.ImageState
};
vself.FillFullPath();
vself.Children = file.Children.Select(f => ConvertFromIndexedFile(context, f, f.Name, vself, extractedFile)).ToImmutableList();
return vself;
}
private static bool TryGetFromCache(Context context, VirtualFile parent, IPath path, IStreamFactory extractedFile, Hash hash, out VirtualFile found)
{
2021-01-05 22:09:32 +00:00
using var cmd = new SQLiteCommand(_conn);
cmd.CommandText = @"SELECT Contents FROM VFSCache WHERE Hash = @hash";
cmd.Parameters.AddWithValue("@hash", (long)hash);
using var rdr = cmd.ExecuteReader();
while (rdr.Read())
{
2021-01-05 22:09:32 +00:00
var data = IndexedVirtualFile.Read(rdr.GetStream(0));
found = ConvertFromIndexedFile(context, data, path, parent, extractedFile);
found.Name = path;
found.Hash = hash;
return true;
}
2021-01-05 22:09:32 +00:00
found = default;
return false;
}
private IndexedVirtualFile ToIndexedVirtualFile()
{
2021-06-16 05:16:25 +00:00
return new()
{
Hash = Hash,
2021-06-17 23:09:03 +00:00
ImageState = ImageState,
Name = Name,
Children = Children.Select(c => c.ToIndexedVirtualFile()).ToList(),
Size = Size
};
}
private static SignatureChecker DDSSig = new(Definitions.FileType.DSS);
2020-09-04 21:00:29 +00:00
public static async Task<VirtualFile> Analyze(Context context, VirtualFile parent, IStreamFactory extractedFile,
2020-03-24 21:42:28 +00:00
IPath relPath, int depth = 0)
{
Hash hash;
2020-09-18 03:27:59 +00:00
if (extractedFile is NativeFileStreamFactory)
{
hash = await ((AbsolutePath)extractedFile.Name).FileHashCachedAsync() ?? Hash.Empty;
2020-09-18 03:27:59 +00:00
}
else
{
await using var hstream = await extractedFile.GetStream();
hash = await hstream.xxHashAsync();
}
2020-09-18 03:27:59 +00:00
if (TryGetFromCache(context, parent, relPath, extractedFile, hash, out var vself))
{
return vself;
}
await using var stream = await extractedFile.GetStream();
2020-09-04 21:00:29 +00:00
var sig = await FileExtractor2.ArchiveSigs.MatchesAsync(stream);
2020-09-06 03:19:05 +00:00
stream.Position = 0;
2020-09-04 21:00:29 +00:00
var self = new VirtualFile
{
Context = context,
2020-03-23 12:57:18 +00:00
Name = relPath,
Parent = parent,
2020-09-04 21:00:29 +00:00
Size = stream.Length,
LastModified = extractedFile.LastModifiedUtc.AsUnixTime(),
2020-03-23 12:57:18 +00:00
LastAnalyzed = DateTime.Now.AsUnixTime(),
2021-06-16 05:16:25 +00:00
Hash = hash,
};
2020-03-24 21:42:28 +00:00
if (Consts.TextureExtensions.Contains(relPath.FileName.Extension) && (await DDSSig.MatchesAsync(stream)) != null)
{
try
{
self.ImageState = await ImageState.FromImageStream(stream, relPath.FileName.Extension, false);
stream.Position = 0;
}
catch (Exception)
{
Utils.Log($"Unable to perform perceptual hashing on {relPath.FileName} in {parent.FullPath}");
throw;
}
}
2021-06-16 05:16:25 +00:00
2020-03-24 21:42:28 +00:00
self.FillFullPath(depth);
if (context.UseExtendedHashes)
2020-09-04 21:00:29 +00:00
self.ExtendedHashes = await ExtendedHashes.FromStream(stream);
2019-11-16 00:01:37 +00:00
2020-09-04 21:00:29 +00:00
// Can't extract, so return
2021-07-05 21:26:30 +00:00
if (!sig.HasValue || !FileExtractor2.ExtractableExtensions.Contains(relPath.FileName.Extension))
{
await WriteToCache(self);
return self;
}
try
{
var list = await FileExtractor2.GatheringExtract(context.Queue, extractedFile,
_ => true,
2020-09-04 21:00:29 +00:00
async (path, sfactory) => await Analyze(context, self, sfactory, path, depth + 1));
2020-09-04 21:00:29 +00:00
self.Children = list.Values.ToImmutableList();
}
2020-11-14 14:26:04 +00:00
catch (EndOfStreamException)
{
return self;
}
2020-11-14 14:26:04 +00:00
catch (Exception)
{
Utils.Log($"Error while examining the contents of {relPath.FileName}");
throw;
}
2021-07-05 21:26:30 +00:00
await WriteToCache(self);
return self;
}
private static async Task WriteToCache(VirtualFile self)
{
await using var ms = new MemoryStream();
var ivf = self.ToIndexedVirtualFile();
// Top level path gets renamed when read, we don't want the absolute path
// here else the reader will blow up when it tries to convert the value
ivf.Name = (RelativePath)"not/applicable";
ivf.Write(ms);
2021-01-05 22:09:32 +00:00
ms.Position = 0;
await InsertIntoVFSCache(self.Hash, ms);
}
2021-01-05 22:09:32 +00:00
private static async Task InsertIntoVFSCache(Hash hash, MemoryStream data)
{
await using var cmd = new SQLiteCommand(_conn);
cmd.CommandText = @"INSERT INTO VFSCache (Hash, Contents) VALUES (@hash, @contents)";
cmd.Parameters.AddWithValue("@hash", (long)hash);
var val = new SQLiteParameter("@contents", DbType.Binary) {Value = data.ToArray()};
cmd.Parameters.Add(val);
try
{
await cmd.ExecuteNonQueryAsync();
}
catch (SQLiteException ex)
{
if (ex.Message.StartsWith("constraint failed"))
return;
throw;
}
}
public static void VacuumDatabase()
{
using var cmd = new SQLiteCommand(_conn);
cmd.CommandText = @"VACUUM";
cmd.PrepareAsync();
cmd.ExecuteNonQuery();
}
2021-01-05 22:09:32 +00:00
2020-04-24 13:56:03 +00:00
internal void FillFullPath()
{
int depth = 0;
var self = this;
while (self.Parent != null)
{
depth += 1;
self = self.Parent;
}
FillFullPath(depth);
}
internal void FillFullPath(int depth)
2020-03-24 21:42:28 +00:00
{
if (depth == 0)
{
2020-04-24 13:56:03 +00:00
FullPath = new FullPath((AbsolutePath)Name);
2020-03-24 21:42:28 +00:00
}
else
{
var paths = new RelativePath[depth];
var self = this;
for (var idx = depth; idx != 0; idx -= 1)
{
paths[idx - 1] = self.RelativeName;
self = self.Parent;
}
FullPath = new FullPath(self.AbsoluteName, paths);
}
}
2020-03-22 15:50:53 +00:00
private static async Task<IndexedVirtualFile> TryGetContentsFromServer(Hash hash)
{
try
{
var client = new HttpClient();
2020-03-24 12:21:19 +00:00
var response =
await client.GetAsync($"http://{Consts.WabbajackCacheHostname}/indexed_files/{hash.ToHex()}");
if (!response.IsSuccessStatusCode)
return null;
using (var stream = await response.Content.ReadAsStreamAsync())
{
return stream.FromJson<IndexedVirtualFile>();
}
}
catch (Exception)
{
return null;
}
}
2020-03-24 12:21:19 +00:00
public void Write(BinaryWriter bw)
{
2020-03-24 12:21:19 +00:00
bw.Write(Name);
bw.Write(Size);
bw.Write(LastModified);
bw.Write(LastModified);
bw.Write(Hash);
bw.Write(Children.Count);
foreach (var child in Children)
child.Write(bw);
}
public static VirtualFile Read(Context context, byte[] data)
{
2020-03-23 12:57:18 +00:00
using var ms = new MemoryStream(data);
2020-03-24 12:21:19 +00:00
using var br = new BinaryReader(ms);
return Read(context, null, br);
}
2020-03-24 12:21:19 +00:00
private static VirtualFile Read(Context context, VirtualFile parent, BinaryReader br)
{
2020-03-24 12:21:19 +00:00
var vf = new VirtualFile
{
Name = br.ReadIPath(),
Size = br.ReadInt64(),
LastModified = br.ReadUInt64(),
LastAnalyzed = br.ReadUInt64(),
Hash = br.ReadHash(),
Context = context,
Parent = parent,
Children = ImmutableList<VirtualFile>.Empty
};
2020-03-24 21:42:28 +00:00
vf.FullPath = new FullPath(vf.AbsoluteName, new RelativePath[0]);
2020-03-24 12:21:19 +00:00
var children = br.ReadInt32();
for (var i = 0; i < children; i++)
{
var child = Read(context, vf, br, (AbsolutePath)vf.Name, new RelativePath[0]);
vf.Children = vf.Children.Add(child);
}
return vf;
}
private static VirtualFile Read(Context context, VirtualFile parent, BinaryReader br, AbsolutePath top, RelativePath[] subpaths)
{
var name = (RelativePath)br.ReadIPath();
subpaths = subpaths.Add(name);
var vf = new VirtualFile
{
Name = name,
Size = br.ReadInt64(),
LastModified = br.ReadUInt64(),
LastAnalyzed = br.ReadUInt64(),
Hash = br.ReadHash(),
Context = context,
Parent = parent,
Children = ImmutableList<VirtualFile>.Empty,
FullPath = new FullPath(top, subpaths)
};
var children = br.ReadInt32();
for (var i = 0; i < children; i++)
{
var child = Read(context, vf, br,top, subpaths);
vf.Children = vf.Children.Add(child);
}
return vf;
}
public HashRelativePath MakeRelativePaths()
2019-11-15 13:06:34 +00:00
{
2020-03-24 12:21:19 +00:00
var paths = new RelativePath[FilesInFullPath.Count() - 1];
2019-11-15 13:06:34 +00:00
var idx = 0;
2019-11-15 13:06:34 +00:00
foreach (var itm in FilesInFullPath.Skip(1))
{
2020-03-24 12:21:19 +00:00
paths[idx] = (RelativePath)itm.Name;
2019-11-15 13:06:34 +00:00
idx += 1;
}
2020-03-24 12:21:19 +00:00
var path = new HashRelativePath(FilesInFullPath.First().Hash, paths);
2019-11-15 13:06:34 +00:00
return path;
}
public VirtualFile InSameFolder(RelativePath relativePath)
{
var newPath = FullPath.InSameFolder(relativePath);
return Context.Index.ByFullPath.TryGetValue(newPath, out var found) ? found : null;
}
}
public class ExtendedHashes
{
2020-03-24 12:21:19 +00:00
public string SHA256 { get; set; }
public string SHA1 { get; set; }
public string MD5 { get; set; }
public string CRC { get; set; }
2020-09-04 21:00:29 +00:00
public static async ValueTask<ExtendedHashes> FromStream(Stream stream)
{
var hashes = new ExtendedHashes();
2020-09-04 21:00:29 +00:00
stream.Position = 0;
2020-04-24 13:56:03 +00:00
hashes.SHA256 = System.Security.Cryptography.SHA256.Create().ComputeHash(stream).ToHex();
stream.Position = 0;
hashes.SHA1 = System.Security.Cryptography.SHA1.Create().ComputeHash(stream).ToHex();
stream.Position = 0;
hashes.MD5 = System.Security.Cryptography.MD5.Create().ComputeHash(stream).ToHex();
stream.Position = 0;
var bytes = new byte[1024 * 8];
var crc = new Crc32();
while (true)
{
2020-04-24 13:56:03 +00:00
var read = stream.Read(bytes, 0, bytes.Length);
if (read == 0) break;
crc.Update(bytes, 0, read);
}
2020-04-24 13:56:03 +00:00
hashes.CRC = crc.DigestBytes().ToHex();
return hashes;
}
}
public class CannotStageNativeFile : Exception
{
public CannotStageNativeFile(string cannotStageANativeFile) : base(cannotStageANativeFile)
{
}
}
public class UnstagedFileException : Exception
{
2020-03-23 12:57:18 +00:00
private readonly FullPath _fullPath;
2020-03-23 12:57:18 +00:00
public UnstagedFileException(FullPath fullPath) : base($"File {fullPath} is unstaged, cannot get staged name")
{
_fullPath = fullPath;
}
}
2019-11-24 23:03:36 +00:00
}