Latest changes to patches/mirroring

This commit is contained in:
Timothy Baldridge 2021-12-17 16:40:45 -07:00
parent b9dc50c47c
commit 75aaec5fa2
11 changed files with 425 additions and 30 deletions

View File

@ -61,6 +61,8 @@ internal class Program
services.AddSingleton<IVerb, DownloadCef>();
services.AddSingleton<IVerb, DownloadUrl>();
services.AddSingleton<IVerb, GenerateMetricsReports>();
services.AddSingleton<IVerb, ForceHeal>();
services.AddSingleton<IVerb, MirrorFile>();
}).Build();
var service = host.Services.GetService<CommandLineBuilder>();

View File

@ -3,6 +3,7 @@ using System.CommandLine;
using System.CommandLine.Invocation;
using System.IO;
using System.Linq;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using FluentFTP.Helpers;
@ -13,7 +14,9 @@ using Wabbajack.Downloaders;
using Wabbajack.DTOs;
using Wabbajack.DTOs.ModListValidation;
using Wabbajack.DTOs.ServerResponses;
using Wabbajack.Hashing.xxHash64;
using Wabbajack.Installer;
using Wabbajack.Networking.Http;
using Wabbajack.Networking.WabbajackClientApi;
using Wabbajack.Paths;
using Wabbajack.Paths.IO;
@ -21,19 +24,22 @@ using Wabbajack.VFS;
namespace Wabbajack.CLI.Verbs;
public class ForceHeal
public class ForceHeal : IVerb
{
private readonly ILogger<ForceHeal> _logger;
private readonly Client _client;
private readonly DownloadDispatcher _downloadDispatcher;
private readonly FileHashCache _fileHashCache;
private readonly HttpClient _httpClient;
public ForceHeal(ILogger<ForceHeal> logger, Client client, DownloadDispatcher downloadDispatcher, FileHashCache hashCache)
public ForceHeal(ILogger<ForceHeal> logger, Client client, DownloadDispatcher downloadDispatcher, FileHashCache hashCache,
HttpClient httpClient)
{
_logger = logger;
_client = client;
_downloadDispatcher = downloadDispatcher;
_fileHashCache = hashCache;
_httpClient = httpClient;
}
public Command MakeCommand()
@ -67,6 +73,19 @@ public class ForceHeal
};
validated = await _client.UploadPatch(validated, outData);
_logger.LogInformation("Patch Updated, validating result by downloading patch");
using var patchStream = await _httpClient.GetAsync(validated.PatchUrl);
if (!patchStream.IsSuccessStatusCode)
throw new HttpException(patchStream);
outData.Position = 0;
var originalHash = outData.HashingCopy(Stream.Null, CancellationToken.None);
var hash = await (await patchStream.Content.ReadAsStreamAsync()).HashingCopy(Stream.Null, CancellationToken.None);
if (hash != await originalHash)
{
throw new Exception($"Patch on server does not match patch hash {await originalHash} vs {hash}");
}
_logger.LogInformation("Adding patch to forced_healing.json");
await _client.AddForceHealedPatch(validated);

View File

@ -0,0 +1,38 @@
using System.CommandLine;
using System.CommandLine.Invocation;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using Wabbajack.Networking.WabbajackClientApi;
using Wabbajack.Paths;
namespace Wabbajack.CLI.Verbs;
public class MirrorFile : IVerb
{
private readonly ILogger<MirrorFile> _logger;
private readonly Client _client;
public MirrorFile(ILogger<MirrorFile> logger, Client wjClient)
{
_logger = logger;
_client = wjClient;
}
public Command MakeCommand()
{
var command = new Command("mirror-file");
command.Add(new Option<AbsolutePath>(new[] {"-i", "-input"}, "File to Mirror"));
command.Description = "Mirrors a file to the Wabbajack CDN";
command.Handler = CommandHandler.Create(Run);
return command;
}
public async Task<int> Run(AbsolutePath input)
{
_logger.LogInformation("Generating File Definition for {Name}", input.FileName);
var definition = await _client.GenerateFileDefinition(input);
await _client.UploadMirror(definition, input);
return 0;
}
}

View File

@ -56,13 +56,16 @@ public class Client
_hashLimiter = hashLimiter;
}
private async ValueTask<HttpRequestMessage> MakeMessage(HttpMethod method, Uri uri)
private async ValueTask<HttpRequestMessage> MakeMessage(HttpMethod method, Uri uri, HttpContent? content = null)
{
var msg = new HttpRequestMessage(method, uri);
var key = (await _token.Get())!;
msg.Headers.Add(_configuration.MetricsKeyHeader, key.MetricsKey);
if (!string.IsNullOrWhiteSpace(key.AuthorKey))
msg.Headers.Add(_configuration.AuthorKeyHeader, key.AuthorKey);
if (content != null)
msg.Content = content;
return msg;
}
@ -154,21 +157,22 @@ public class Client
_dtos.Options))!;
}
IEnumerable<PartDefinition> Blocks(long size)
{
for (long block = 0; block * UploadedFileBlockSize < size; block++)
yield return new PartDefinition
{
Index = block,
Size = Math.Min(UploadedFileBlockSize, size - block * UploadedFileBlockSize),
Offset = block * UploadedFileBlockSize
};
}
public async Task<FileDefinition> GenerateFileDefinition(AbsolutePath path)
{
IEnumerable<PartDefinition> Blocks(AbsolutePath path)
{
var size = path.Size();
for (long block = 0; block * UploadedFileBlockSize < size; block++)
yield return new PartDefinition
{
Index = block,
Size = Math.Min(UploadedFileBlockSize, size - block * UploadedFileBlockSize),
Offset = block * UploadedFileBlockSize
};
}
var parts = Blocks(path).ToArray();
var parts = Blocks(path.Size()).ToArray();
var definition = new FileDefinition
{
OriginalFileName = path.FileName,
@ -214,9 +218,34 @@ public class Client
return new Uri($"{_configuration.PatchBaseAddress}{upgradeHash.ToHex()}_{archiveHash.ToHex()}");
}
public async Task<ValidatedArchive> UploadPatch(ValidatedArchive validated, MemoryStream outData)
public async Task<ValidatedArchive> UploadPatch(ValidatedArchive validated, Stream data)
{
throw new NotImplementedException();
_logger.LogInformation("Uploading Patch {From} {To}", validated.Original.Hash, validated.PatchedFrom!.Hash);
var name = $"{validated.Original.Hash.ToHex()}_{validated.PatchedFrom.Hash.ToHex()}";
var blocks = Blocks(data.Length).ToArray();
foreach (var block in blocks)
{
_logger.LogInformation("Uploading Block {Idx}/{Max}", block.Index, blocks.Length);
data.Position = block.Offset;
var blockData = new byte[block.Size];
await data.ReadAsync(blockData);
var hash = await blockData.Hash();
using var result = await _client.SendAsync(await MakeMessage(HttpMethod.Post,
new Uri($"{_configuration.BuildServerUrl}patches?name={name}&start={block.Offset}"),
new ByteArrayContent(blockData)));
if (!result.IsSuccessStatusCode)
throw new HttpException(result);
var resultHash = Hash.FromHex(await result.Content.ReadAsStringAsync());
if (resultHash != hash)
throw new Exception($"Result Hash does not match expected hash {hash} vs {resultHash}");
}
validated.PatchUrl = new Uri($"https://patches.wabbajack.org/{name}");
return validated;
}
public async Task AddForceHealedPatch(ValidatedArchive validated)
@ -249,4 +278,44 @@ public class Client
var sha = oldData.Headers.GetValues(_configuration.ResponseShaHeader).First();
return (sha, (await oldData.Content.ReadFromJsonAsync<T>())!);
}
public async Task UploadMirror(FileDefinition definition, AbsolutePath file)
{
var hashAsHex = definition.Hash.ToHex();
_logger.LogInformation("Starting upload of {Name} ({Hash})", file.FileName, hashAsHex);
using var result = await _client.SendAsync(await MakeMessage(HttpMethod.Put,
new Uri($"{_configuration.BuildServerUrl}mirrored_files/create/{hashAsHex}"),
new StringContent(_dtos.Serialize(definition), Encoding.UTF8, "application/json")));
if (!result.IsSuccessStatusCode)
throw new HttpException(result);
_logger.LogInformation("Uploading Parts");
await using var dataIn = file.Open(FileMode.Open);
foreach (var (part, idx) in definition.Parts.Select((part, idx) => (part, idx)))
{
_logger.LogInformation("Uploading Part {Part}/{Max}", idx, definition.Parts.Length);
dataIn.Position = part.Offset;
var data = new byte[part.Size];
await dataIn.ReadAsync(data);
using var partResult = await _client.SendAsync(await MakeMessage(HttpMethod.Put,
new Uri($"{_configuration.BuildServerUrl}mirrored_files/{hashAsHex}/part/{idx}"),
new ByteArrayContent(data)));
if (!partResult.IsSuccessStatusCode)
throw new HttpException(result);
}
using var finalResult = await _client.SendAsync(await MakeMessage(HttpMethod.Put,
new Uri($"{_configuration.BuildServerUrl}mirrored_files/{hashAsHex}/finish")));
if (!finalResult.IsSuccessStatusCode)
throw new HttpException(result);
}
}

View File

@ -21,6 +21,7 @@ public class Configuration
public Uri UpgradedArchives { get; set; } =
new("https://raw.githubusercontent.com/wabbajack-tools/mod-lists/master/reports/upgraded.json");
public Uri BuildServerUrl { get; set; } = new("https://build.wabbajack.org/");
//public Uri BuildServerUrl { get; set; } = new("https://build.wabbajack.org/");
public Uri BuildServerUrl { get; set; } = new("http://localhost:5000/");
public string PatchBaseAddress { get; set; } = new("https://patches.wabbajack.org/");
}

View File

@ -21,6 +21,7 @@ public class AppSettings
public string AuthoredFilesFolder { get; set; }
public string PatchesFilesFolder { get; set; }
public string MirrorFilesFolder { get; set; }
public string MetricsFolder { get; set; } = "";
public string TarLogPath { get; set; }
public string GitHubKey { get; set; } = "";

View File

@ -0,0 +1,221 @@
using System.IO.Compression;
using System.Net;
using System.Security.Claims;
using Microsoft.AspNetCore.Authorization;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Primitives;
using Wabbajack.BuildServer;
using Wabbajack.Common;
using Wabbajack.DTOs.CDN;
using Wabbajack.DTOs.JsonConverters;
using Wabbajack.Hashing.xxHash64;
using Wabbajack.Paths;
using Wabbajack.Paths.IO;
using Wabbajack.Server.DataModels;
using Wabbajack.Server.DTOs;
using Wabbajack.Server.Services;
namespace Wabbajack.Server.Controllers;
[Authorize(Roles = "Author")]
[Route("/mirrored_files")]
public class MirroredFiles : ControllerBase
{
private readonly DTOSerializer _dtos;
private readonly DiscordWebHook _discord;
private readonly ILogger<MirroredFiles> _logger;
private readonly AppSettings _settings;
public AbsolutePath MirrorFilesLocation => _settings.MirrorFilesFolder.ToAbsolutePath();
public MirroredFiles(ILogger<MirroredFiles> logger, AppSettings settings, DiscordWebHook discord,
DTOSerializer dtos)
{
_logger = logger;
_settings = settings;
_discord = discord;
_dtos = dtos;
}
[HttpPut]
[Route("{hashAsHex}/part/{index}")]
public async Task<IActionResult> UploadFilePart(CancellationToken token, string hashAsHex, long index)
{
var user = User.FindFirstValue(ClaimTypes.Name);
var definition = await ReadDefinition(hashAsHex);
if (definition.Author != user)
return Forbid("File Id does not match authorized user");
_logger.Log(LogLevel.Information,
$"Uploading File part {definition.OriginalFileName} - ({index} / {definition.Parts.Length})");
var part = definition.Parts[index];
await using var ms = new MemoryStream();
await Request.Body.CopyToLimitAsync(ms, (int) part.Size, token);
ms.Position = 0;
if (ms.Length != part.Size)
return BadRequest($"Couldn't read enough data for part {part.Size} vs {ms.Length}");
var hash = await ms.Hash(token);
if (hash != part.Hash)
return BadRequest(
$"Hashes don't match for index {index}. Sizes ({ms.Length} vs {part.Size}). Hashes ({hash} vs {part.Hash}");
ms.Position = 0;
await using var partStream = await CreatePart(hashAsHex, (int)index);
await ms.CopyToAsync(partStream, token);
return Ok(part.Hash.ToBase64());
}
[HttpPut]
[Route("create/{hashAsHex}")]
public async Task<IActionResult> CreateUpload(string hashAsHex)
{
var user = User.FindFirstValue(ClaimTypes.Name);
var definition = (await _dtos.DeserializeAsync<FileDefinition>(Request.Body))!;
_logger.Log(LogLevel.Information, "Creating File upload {Hash}", hashAsHex);
definition.ServerAssignedUniqueId = hashAsHex;
definition.Author = user;
await WriteDefinition(definition);
await _discord.Send(Channel.Ham,
new DiscordMessage
{
Content =
$"{user} has started mirroring {definition.OriginalFileName} ({definition.Size.ToFileSizeString()})"
});
return Ok(definition.ServerAssignedUniqueId);
}
[HttpPut]
[Route("{hashAsHex}/finish")]
public async Task<IActionResult> FinishUpload(string hashAsHex)
{
var user = User.FindFirstValue(ClaimTypes.Name);
var definition = await ReadDefinition(hashAsHex);
if (definition.Author != user)
return Forbid("File Id does not match authorized user");
_logger.Log(LogLevel.Information, "Finalizing file upload {Hash}", hashAsHex);
await _discord.Send(Channel.Ham,
new DiscordMessage
{
Content =
$"{user} has finished uploading {definition.OriginalFileName} ({definition.Size.ToFileSizeString()})"
});
var host = _settings.TestMode ? "test-files" : "authored-files";
return Ok($"https://{host}.wabbajack.org/{definition.MungedName}");
}
[HttpDelete]
[Route("{hashAsHex}")]
public async Task<IActionResult> DeleteMirror(string hashAsHex)
{
var user = User.FindFirstValue(ClaimTypes.Name);
var definition = await ReadDefinition(hashAsHex);
await _discord.Send(Channel.Ham,
new DiscordMessage
{
Content =
$"{user} is deleting {hashAsHex}, {definition.Size.ToFileSizeString()} to be freed"
});
_logger.Log(LogLevel.Information, "Deleting upload {Hash}", hashAsHex);
RootPath(hashAsHex).DeleteDirectory();
return Ok();
}
[HttpGet]
[AllowAnonymous]
[Route("")]
public async Task<IActionResult> MirroredFilesGet()
{
var files = await AllMirroredFiles();
foreach (var file in files)
file.Parts = Array.Empty<PartDefinition>();
return Ok(_dtos.Serialize(files));
}
public IEnumerable<AbsolutePath> AllDefinitions => MirrorFilesLocation.EnumerateFiles("definition.json.gz");
public async Task<FileDefinition[]> AllMirroredFiles()
{
var defs = new List<FileDefinition>();
foreach (var file in AllDefinitions)
{
defs.Add(await ReadDefinition(file));
}
return defs.ToArray();
}
public async Task<FileDefinition> ReadDefinition(string hashAsHex)
{
return await ReadDefinition(RootPath(hashAsHex).Combine("definition.json.gz"));
}
private async Task<FileDefinition> ReadDefinition(AbsolutePath file)
{
var gz = new GZipStream(new MemoryStream(await file.ReadAllBytesAsync()), CompressionMode.Decompress);
var definition = (await _dtos.DeserializeAsync<FileDefinition>(gz))!;
return definition;
}
public async Task WriteDefinition(FileDefinition definition)
{
var path = RootPath(definition.Hash.ToHex()).Combine("definition.json.gz");
path.Parent.CreateDirectory();
path.Parent.Combine("parts").CreateDirectory();
await using var ms = new MemoryStream();
await using (var gz = new GZipStream(ms, CompressionLevel.Optimal, true))
{
await _dtos.Serialize(definition, gz);
}
await path.WriteAllBytesAsync(ms.ToArray());
}
public AbsolutePath RootPath(string hashAsHex)
{
// Make sure it's a true hash before splicing into the path
return MirrorFilesLocation.Combine(Hash.FromHex(hashAsHex).ToHex());
}
[HttpGet]
[AllowAnonymous]
[Route("direct_link/{hashAsHex}")]
public async Task DirectLink(string hashAsHex)
{
var definition = await ReadDefinition(hashAsHex);
Response.Headers.ContentDisposition =
new StringValues($"attachment; filename={definition.OriginalFileName}");
Response.Headers.ContentType = new StringValues("application/octet-stream");
foreach (var part in definition.Parts)
{
await using var partStream = await StreamForPart(hashAsHex, (int)part.Index);
await partStream.CopyToAsync(Response.Body);
}
}
public async Task<Stream> StreamForPart(string hashAsHex, int part)
{
return RootPath(hashAsHex).Combine("parts", part.ToString()).Open(FileMode.Open);
}
public async Task<Stream> CreatePart(string hashAsHex, int part)
{
return RootPath(hashAsHex).Combine("parts", part.ToString()).Open(FileMode.Create, FileAccess.Write, FileShare.None);
}
}

View File

@ -5,11 +5,13 @@ using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Threading.Tasks;
using FluentFTP.Helpers;
using Microsoft.Extensions.Logging;
using Wabbajack.BuildServer;
using Wabbajack.Common;
using Wabbajack.DTOs.CDN;
using Wabbajack.DTOs.JsonConverters;
using Wabbajack.Hashing.xxHash64;
using Wabbajack.Paths;
using Wabbajack.Paths.IO;
@ -50,9 +52,9 @@ public class AuthorFiles
return defs.ToArray();
}
public async Task<Stream> StreamForPart(string mungedName, int part)
public async Task<Stream> StreamForPart(string hashAsHex, int part)
{
return AuthorFilesLocation.Combine(mungedName, "parts", part.ToString()).Open(FileMode.Open);
return AuthorFilesLocation.Combine(hashAsHex, "parts", part.ToString()).Open(FileMode.Open);
}
public async Task<Stream> CreatePart(string mungedName, int part)
@ -100,11 +102,11 @@ public class AuthorFiles
folder.DeleteDirectory();
}
public async Task<FileDefinition> ReadDefinitionForServerId(string serverAssignedUniqueId)
public async Task<FileDefinition> ReadDefinitionForServerId(string hashAsHex)
{
if (_byServerId.TryGetValue(serverAssignedUniqueId, out var found))
return found;
await AllAuthoredFiles();
return _byServerId[serverAssignedUniqueId];
var data = await ReadDefinition(_settings.MirrorFilesFolder.ToAbsolutePath().Combine(hashAsHex).Combine("definition.json.gz"));
if (data.Hash != Hash.FromHex(hashAsHex))
throw new Exception($"Definition hex does not match {data.Hash.ToHex()} vs {hashAsHex}");
return data;
}
}

View File

@ -12,6 +12,7 @@
"AuthoredFilesFolder": "c:\\tmp\\server_authored_files",
"AuthorAPIKeyFile": "c:\\tmp\\author_keys.txt",
"PatchesFilesFolder": "c:\\tmp\\patches",
"MirrorFilesFolder": "c:\\tmp\\mirrors",
"GitHubKey": ""
},
"AllowedHosts": "*"

View File

@ -17,12 +17,10 @@ public static class ProtectedData
static ProtectedData()
{
_deviceKey = Task.Run(async () =>
{
var id = Encoding.UTF8.GetBytes(new DeviceIdBuilder()
.AddMacAddress()
.AddUserName()
.ToString());
var id = Encoding.UTF8.GetBytes(KnownFolders.AppDataLocal.ToString());
var hash1 = await id.Hash();
var hash2 = new Hash((ulong) hash1 ^ 42);

43
docs/ListHealing.md Normal file
View File

@ -0,0 +1,43 @@
## Overview of 3.0 "Auto-healing" or "Force Healing"
In the past with the Nexus deleting files every day, we saw a need
for rapid fully automatic healing for Wabbajack lists. This code was
brittle, quite complex, and hard to debug. However, these days with
the Nexus no longer deleting files, we have an opportunity to simplify
the process.
### Parts in play
* List Validation service - a GitHub action with some static storage, and rights to log into all our download soruces
* Storage Server - the backing store behind the Wabbajack CDN consists of 3 storage spaces:
* Patches - a directory of files stored as `{from_hash_hex}_{to_hash_hex}`
* Mirrors - a directory of files in the CDN multi-parts format stored as `{file_hash_hex}`
* Authored files - a directory of files in the CDN multi-parts format
### Multi-Parts Format
The structure for CDN files in this format is:
* `./definition.json.gz` - JSON data storing the hash of the files, and the hash of each part
* `./parts/{idx}` - each part stored as `0`, `1`, etc. Each file is uncompressed and roughly 2MB
### File Validation Process
The workflow for list validation in 3.0 is as follows:
1) Load the `configs/forced_healing.json` file that contains mirrored and patch files specified by list authors
2) Download every modlist and archive it for future use, if already downloaded, don't redownload
3) For each modlist, load it, and start validating the files
4) For each file that passes, return `Valid`
5) If the file fails, check the mirrors list for a match, if it matches return `Mirrored`
6) If the file fails, check the patches list for a match,
* If one is found, validate the new file in the patch, if it fails try the next patch
7) If all patches fail to match, return `Invalid`
8) Write out reports for all modlists
### List Author Interaction
List authors now have two controls they can use:
* `wabbajack-cli.exe force-heal -o <old_file> -i <new-file>`
* Creates a patch for back porting `<new-file>` to `<old-file>`
* Uploads the patch
* Adds the patch go the `config/forced_healing.json` file
* `wabbajack-cli.exe mirror-file -f <file>`
* Uploads a file as a mirror
* Adds the file to the `config/forced_healing.json` file
* Note: using this to violate author copyrights is strictly forbidden do not mirror files without seeking prior approval from WJ staff.