Switch CDN storage to R2/S3

This commit is contained in:
Timothy Baldridge 2023-10-12 06:04:38 -06:00
parent 0cba392b66
commit 3cc50d9ff3
9 changed files with 269 additions and 67 deletions

View File

@ -58,7 +58,14 @@ public static class IEnumerableExtensions
return data; return data;
} }
public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> coll, int size) /// <summary>
/// Splits the collection into `size` parts
/// </summary>
/// <param name="coll"></param>
/// <param name="count"></param>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> coll, int count)
{ {
var asList = coll.ToList(); var asList = coll.ToList();
@ -70,7 +77,30 @@ public static class IEnumerableExtensions
} }
} }
return Enumerable.Range(0, size).Select(offset => SkipEnumerable(asList, offset, size)); return Enumerable.Range(0, count).Select(offset => SkipEnumerable(asList, offset, count));
}
/// <summary>
/// Split the collection into `size` parts
/// </summary>
/// <param name="coll"></param>
/// <param name="size"></param>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
public static IEnumerable<IEnumerable<T>> Batch<T>(this IEnumerable<T> coll, int size)
{
List<T> current = new();
foreach (var itm in coll)
{
current.Add(itm);
if (current.Count == size)
{
yield return current;
current = new List<T>();
}
}
if (current.Count > 0)
yield return current;
} }

View File

@ -93,7 +93,7 @@ public class Client
{ {
_logger.LogError("HTTP Error: {Result}", result); _logger.LogError("HTTP Error: {Result}", result);
await SendMetric("rebound", "Error", false); await SendMetric("rebound", "Error", false);
Environment.Exit(0); // Environment.Exit(0);
} }
} }

View File

@ -1,4 +1,5 @@
using Microsoft.Extensions.Configuration; using Amazon.S3;
using Microsoft.Extensions.Configuration;
using Wabbajack.Paths; using Wabbajack.Paths;
namespace Wabbajack.BuildServer; namespace Wabbajack.BuildServer;
@ -26,8 +27,6 @@ public class AppSettings
public string DiscordKey { get; set; } public string DiscordKey { get; set; }
public string AuthoredFilesFolder { get; set; }
public string PatchesFilesFolder { get; set; } public string PatchesFilesFolder { get; set; }
public string MirrorFilesFolder { get; set; } public string MirrorFilesFolder { get; set; }
public string NexusCacheFolder { get; set; } public string NexusCacheFolder { get; set; }
@ -37,6 +36,19 @@ public class AppSettings
public CouchDBSetting CesiDB { get; set; } public CouchDBSetting CesiDB { get; set; }
public CouchDBSetting MetricsDB { get; set; } public CouchDBSetting MetricsDB { get; set; }
public S3Settings AuthoredFilesS3 { get; set; }
}
public class S3Settings
{
public string AccessKey { get; set; }
public string SecretKey { get; set; }
public string ServiceURL { get; set; }
public string BucketName { get; set; }
public string BucketCacheFile { get; set; }
} }
public class CouchDBSetting public class CouchDBSetting

View File

@ -147,7 +147,7 @@ public class AuthorControls : ControllerBase
public async Task<IActionResult> HomePage() public async Task<IActionResult> HomePage()
{ {
var user = User.FindFirstValue(ClaimTypes.Name); var user = User.FindFirstValue(ClaimTypes.Name);
var files = (await _authorFiles.AllAuthoredFiles()) var files = _authorFiles.AllDefinitions
.Where(af => af.Definition.Author == user) .Where(af => af.Definition.Author == user)
.Select(af => new .Select(af => new
{ {

View File

@ -68,8 +68,7 @@ public class AuthoredFiles : ControllerBase
$"Hashes don't match for index {index}. Sizes ({ms.Length} vs {part.Size}). Hashes ({hash} vs {part.Hash}"); $"Hashes don't match for index {index}. Sizes ({ms.Length} vs {part.Size}). Hashes ({hash} vs {part.Hash}");
ms.Position = 0; ms.Position = 0;
await using var partStream = await _authoredFiles.CreatePart(definition.MungedName, (int)index); await _authoredFiles.WritePart(definition.MungedName, (int) index, ms);
await ms.CopyToAsync(partStream, token);
return Ok(part.Hash.ToBase64()); return Ok(part.Hash.ToBase64());
} }
@ -123,7 +122,7 @@ public class AuthoredFiles : ControllerBase
public async Task<IActionResult> DeleteUpload(string serverAssignedUniqueId) public async Task<IActionResult> DeleteUpload(string serverAssignedUniqueId)
{ {
var user = User.FindFirstValue(ClaimTypes.Name); var user = User.FindFirstValue(ClaimTypes.Name);
var definition = (await _authoredFiles.AllAuthoredFiles()) var definition = _authoredFiles.AllDefinitions
.First(f => f.Definition.ServerAssignedUniqueId == serverAssignedUniqueId) .First(f => f.Definition.ServerAssignedUniqueId == serverAssignedUniqueId)
.Definition; .Definition;
if (definition.Author != user) if (definition.Author != user)
@ -145,12 +144,12 @@ public class AuthoredFiles : ControllerBase
[Route("")] [Route("")]
public async Task<ContentResult> UploadedFilesGet() public async Task<ContentResult> UploadedFilesGet()
{ {
var files = await _authoredFiles.AllAuthoredFiles(); var files = _authoredFiles.AllDefinitions
.ToArray();
var response = _authoredFilesTemplate(new var response = _authoredFilesTemplate(new
{ {
Files = files.OrderByDescending(f => f.Updated).ToArray(), Files = files.OrderByDescending(f => f.Updated).ToArray(),
TotalSpace = _authoredFiles.TotalSpace.Bytes().Humanize("#.##"), UsedSpace = _authoredFiles.UsedSpace.Bytes().Humanize("#.##"),
FreeSpace = _authoredFiles.FreeSpace.Bytes().Humanize("#.##")
}); });
return new ContentResult return new ContentResult
{ {
@ -172,10 +171,13 @@ public class AuthoredFiles : ControllerBase
Response.Headers.ContentType = new StringValues("application/octet-stream"); Response.Headers.ContentType = new StringValues("application/octet-stream");
Response.Headers.ContentLength = definition.Size; Response.Headers.ContentLength = definition.Size;
Response.Headers.ETag = definition.MungedName + "_direct"; Response.Headers.ETag = definition.MungedName + "_direct";
foreach (var part in definition.Parts)
foreach (var part in definition.Parts.OrderBy(p => p.Index))
{ {
await using var partStream = await _authoredFiles.StreamForPart(mungedName, (int)part.Index); await _authoredFiles.StreamForPart(mungedName, (int)part.Index, async stream =>
await partStream.CopyToAsync(Response.Body); {
await stream.CopyToAsync(Response.Body);
});
} }
} }
} }

View File

@ -1,6 +1,11 @@
using System.Collections.Concurrent;
using System.Diagnostics;
using System.IO.Compression; using System.IO.Compression;
using System.Web; using System.Web;
using Amazon.S3;
using Amazon.S3.Model;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using Microsoft.IO;
using Wabbajack.BuildServer; using Wabbajack.BuildServer;
using Wabbajack.Common; using Wabbajack.Common;
using Wabbajack.DTOs.CDN; using Wabbajack.DTOs.CDN;
@ -16,88 +21,207 @@ public class AuthorFiles
private readonly ILogger<AuthorFiles> _logger; private readonly ILogger<AuthorFiles> _logger;
private readonly AppSettings _settings; private readonly AppSettings _settings;
private readonly DTOSerializer _dtos; private readonly DTOSerializer _dtos;
private Dictionary<string, FileDefinition> _byServerId = new(); private ConcurrentDictionary<string, FileDefinition> _byServerId = new();
private readonly IAmazonS3 _s3;
private readonly ConcurrentDictionary<string,FileDefinitionMetadata> _fileCache;
private readonly string _bucketName;
private ConcurrentDictionary<RelativePath, long> _allObjects = new();
private HashSet<RelativePath> _mangledNames;
private readonly RecyclableMemoryStreamManager _streamPool;
private readonly HttpClient _httpClient;
public AbsolutePath AuthorFilesLocation => _settings.AuthoredFilesFolder.ToAbsolutePath(); private Uri _baseUri => new($"https://r2.wabbajack.org/");
public AuthorFiles(ILogger<AuthorFiles> logger, AppSettings settings, DTOSerializer dtos) public AuthorFiles(ILogger<AuthorFiles> logger, AppSettings settings, DTOSerializer dtos, IAmazonS3 s3, HttpClient client)
{ {
_httpClient = client;
_s3 = s3;
_logger = logger; _logger = logger;
_settings = settings; _settings = settings;
_dtos = dtos; _dtos = dtos;
_fileCache = new ConcurrentDictionary<string, FileDefinitionMetadata>();
_bucketName = settings.AuthoredFilesS3.BucketName;
_ = PrimeCache();
_streamPool = new RecyclableMemoryStreamManager();
} }
public IEnumerable<AbsolutePath> AllDefinitions => AuthorFilesLocation.EnumerateFiles("definition.json.gz"); private async Task PrimeCache()
/// <summary>
/// Total unused space available for authored files
/// </summary>
public long FreeSpace => new DriveInfo(AuthorFilesLocation.ToString()).AvailableFreeSpace;
/// <summary>
/// Total space available for authored files
/// </summary>
public long TotalSpace => new DriveInfo(AuthorFilesLocation.ToString()).TotalSize;
/// <summary>
///
/// </summary>
/// <returns></returns>
public async Task<FileDefinitionMetadata[]> AllAuthoredFiles()
{ {
var defs = new List<FileDefinitionMetadata>(); try
foreach (var file in AllDefinitions)
{ {
defs.Add(new FileDefinitionMetadata var cacheFile = _settings.AuthoredFilesS3.BucketCacheFile.ToAbsolutePath();
if (!cacheFile.FileExists())
{ {
Definition = await ReadDefinition(file), var allObjects = await AllObjects().ToArrayAsync();
Updated = file.LastModifiedUtc() foreach (var obje in allObjects)
{
_allObjects.TryAdd(obje.Key.ToRelativePath(), obje.LastModified.ToFileTimeUtc());
}
SaveBucketCacheFile(cacheFile);
}
else
{
LoadBucketCacheFile(cacheFile);
}
_mangledNames = _allObjects
.Where(f => f.Key.EndsWith("definition.json.gz"))
.Select(f => f.Key.Parent)
.ToHashSet();
await Parallel.ForEachAsync(_mangledNames, async (name, _) =>
{
if (!_allObjects.TryGetValue(name.Combine("definition.json.gz"), out var value))
return;
_logger.LogInformation("Priming {Name}", name);
var definition = await PrimeDefinition(name);
var metadata = new FileDefinitionMetadata()
{
Definition = definition,
Updated = DateTime.FromFileTimeUtc(value)
};
_fileCache.TryAdd(definition.MungedName, metadata);
_byServerId.TryAdd(definition.ServerAssignedUniqueId!, definition);
}); });
}
_byServerId = defs.ToDictionary(f => f.Definition.ServerAssignedUniqueId!, f => f.Definition); _logger.LogInformation("Finished priming cache, {Count} files {Size} GB cached", _fileCache.Count,
return defs.ToArray(); _fileCache.Sum(s => s.Value.Definition.Size) / (1024 * 1024 * 1024));
}
public async Task<Stream> StreamForPart(string mungedName, int part) }
catch (Exception ex)
{ {
return AuthorFilesLocation.Combine(mungedName, "parts", part.ToString()).Open(FileMode.Open); _logger.LogCritical(ex, "Failed to prime cache");
}
} }
public async Task<Stream> CreatePart(string mungedName, int part) private void SaveBucketCacheFile(AbsolutePath cacheFile)
{ {
return AuthorFilesLocation.Combine(mungedName, "parts", part.ToString()).Open(FileMode.Create, FileAccess.Write, FileShare.None); using var file = cacheFile.Open(FileMode.Create, FileAccess.Write);
using var sw = new StreamWriter(file);
foreach(var entry in _allObjects)
{
sw.WriteLine($"{entry.Key}||{entry.Value}");
}
}
private void LoadBucketCacheFile(AbsolutePath cacheFile)
{
using var file = cacheFile.Open(FileMode.Open, FileAccess.Read);
using var sr = new StreamReader(file);
while (!sr.EndOfStream)
{
var line = sr.ReadLine();
var parts = line!.Split("||");
_allObjects.TryAdd(parts[0].ToRelativePath(), long.Parse(parts[1]));
}
}
private async Task<FileDefinition> PrimeDefinition(RelativePath name)
{
var uri = _baseUri + $"{name}/definition.json.gz";
using var response = await _httpClient.GetAsync(uri);
return await ReadDefinition(await response.Content.ReadAsStreamAsync());
}
private async IAsyncEnumerable<S3Object> AllObjects()
{
var sw = Stopwatch.StartNew();
var total = 0;
_logger.Log(LogLevel.Information, "Listing all objects in S3");
var results = await _s3.ListObjectsV2Async(new ListObjectsV2Request()
{
BucketName = _bucketName,
});
TOP:
total += results.S3Objects.Count;
_logger.Log(LogLevel.Information, "Got {S3ObjectsCount} objects, {Total} total", results.S3Objects.Count, total);
foreach (var result in results.S3Objects)
{
yield return result;
}
if (results.IsTruncated)
{
results = await _s3.ListObjectsV2Async(new ListObjectsV2Request
{
ContinuationToken = results.NextContinuationToken,
BucketName = _bucketName,
});
goto TOP;
}
_logger.LogInformation("Finished listing all objects in S3 in {Elapsed}", sw.Elapsed);
}
public IEnumerable<FileDefinitionMetadata> AllDefinitions => _fileCache.Values;
/// <summary>
/// Used space in bytes
/// </summary>
public long UsedSpace => _fileCache.Sum(s => s.Value.Definition.Size);
public async Task StreamForPart(string mungedName, int part, Func<Stream, Task> func)
{
var definition = _fileCache[mungedName].Definition;
if (part >= definition.Parts.Length)
throw new ArgumentOutOfRangeException(nameof(part));
var uri = _baseUri + $"{mungedName}/parts/{part}";
using var response = await _httpClient.GetAsync(uri);
await func(await response.Content.ReadAsStreamAsync());
}
public async Task WritePart(string mungedName, int part, Stream ms)
{
await _s3.PutObjectAsync(new PutObjectRequest
{
BucketName = _bucketName,
Key = mungedName.ToRelativePath().Combine("parts", part.ToString()).ToString().Replace("\\", "/"),
InputStream = ms,
DisablePayloadSigning = true
});
} }
public async Task WriteDefinition(FileDefinition definition) public async Task WriteDefinition(FileDefinition definition)
{ {
var path = AuthorFilesLocation.Combine(definition.MungedName, "definition.json.gz");
path.Parent.CreateDirectory();
path.Parent.Combine("parts").CreateDirectory();
await using var ms = new MemoryStream(); await using var ms = new MemoryStream();
await using (var gz = new GZipStream(ms, CompressionLevel.Optimal, true)) await using (var gz = new GZipStream(ms, CompressionLevel.Optimal, true))
{ {
await _dtos.Serialize(definition, gz); await _dtos.Serialize(definition, gz);
} }
ms.Position = 0;
await path.WriteAllBytesAsync(ms.ToArray()); await _s3.PutObjectAsync(new PutObjectRequest
{
BucketName = _bucketName,
Key = definition.MungedName.ToRelativePath().Combine("definition.json.gz").ToString().Replace("\\", "/"),
InputStream = ms,
DisablePayloadSigning = true
});
_fileCache.TryAdd(definition.MungedName, new FileDefinitionMetadata
{
Definition = definition,
Updated = DateTime.UtcNow
});
_byServerId.TryAdd(definition.ServerAssignedUniqueId!, definition);
} }
public async Task<FileDefinition> ReadDefinition(string mungedName) public async Task<FileDefinition> ReadDefinition(string mungedName)
{ {
return await ReadDefinition(AuthorFilesLocation.Combine(mungedName, "definition.json.gz")); return _fileCache[mungedName].Definition;
} }
public bool IsDefinition(string mungedName) public bool IsDefinition(string mungedName)
{ {
return AuthorFilesLocation.Combine(mungedName, "definition.json.gz").FileExists(); return _fileCache.ContainsKey(mungedName);
} }
private async Task<FileDefinition> ReadDefinition(AbsolutePath file)
private async Task<FileDefinition> ReadDefinition(Stream stream)
{ {
var gz = new GZipStream(new MemoryStream(await file.ReadAllBytesAsync()), CompressionMode.Decompress); var gz = new GZipStream(stream, CompressionMode.Decompress);
var definition = (await _dtos.DeserializeAsync<FileDefinition>(gz))!; var definition = (await _dtos.DeserializeAsync<FileDefinition>(gz))!;
return definition; return definition;
} }
@ -111,15 +235,33 @@ public class AuthorFiles
public async Task DeleteFile(FileDefinition definition) public async Task DeleteFile(FileDefinition definition)
{ {
var folder = AuthorFilesLocation.Combine(definition.MungedName); var allFiles = _allObjects.Where(f => f.Key.TopParent.ToString() == definition.MungedName)
folder.DeleteDirectory(); .Select(f => f.Key).ToList();
foreach (var batch in allFiles.Batch(512))
{
var batchedArray = batch.ToHashSet();
_logger.LogInformation("Deleting {Count} files for prefix {Prefix}", batchedArray.Count, definition.MungedName);
await _s3.DeleteObjectsAsync(new DeleteObjectsRequest
{
BucketName = _bucketName,
Objects = batchedArray.Select(f => new KeyVersion
{
Key = f.ToString().Replace("\\", "/")
}).ToList()
});
foreach (var key in batchedArray)
{
_allObjects.TryRemove(key, out _);
}
} }
public async Task<FileDefinition> ReadDefinitionForServerId(string serverAssignedUniqueId) _byServerId.TryRemove(definition.ServerAssignedUniqueId!, out _);
_fileCache.TryRemove(definition.MungedName, out _);
}
public async ValueTask<FileDefinition> ReadDefinitionForServerId(string serverAssignedUniqueId)
{ {
if (_byServerId.TryGetValue(serverAssignedUniqueId, out var found))
return found;
await AllAuthoredFiles();
return _byServerId[serverAssignedUniqueId]; return _byServerId[serverAssignedUniqueId];
} }

View File

@ -11,7 +11,7 @@
</head> </head>
<body> <body>
<H1>Authored Files:</H1> <H1>Authored Files:</H1>
<H3>{{$.FreeSpace}} remaining of {{$.TotalSpace}} </H3> <H3>{{$.UsedSpace}}</H3>
<table id="inlined-data" class="table table-striped table-bordered" style="width:100%" > <table id="inlined-data" class="table table-striped table-bordered" style="width:100%" >
<thead> <thead>
<tr> <tr>

View File

@ -5,6 +5,8 @@ using System.Runtime.InteropServices;
using System.Text.Json; using System.Text.Json;
using System.Text.Json.Serialization; using System.Text.Json.Serialization;
using System.Threading.Tasks; using System.Threading.Tasks;
using Amazon.Runtime;
using Amazon.S3;
using cesi.DTOs; using cesi.DTOs;
using CouchDB.Driver; using CouchDB.Driver;
using CouchDB.Driver.Options; using CouchDB.Driver.Options;
@ -39,10 +41,10 @@ using Wabbajack.Server.Services;
using Wabbajack.Services.OSIntegrated.TokenProviders; using Wabbajack.Services.OSIntegrated.TokenProviders;
using Wabbajack.Networking.WabbajackClientApi; using Wabbajack.Networking.WabbajackClientApi;
using Wabbajack.Paths.IO; using Wabbajack.Paths.IO;
using Wabbajack.Server.DTOs;
using Wabbajack.VFS; using Wabbajack.VFS;
using YamlDotNet.Serialization.NamingConventions; using YamlDotNet.Serialization.NamingConventions;
using Client = Wabbajack.Networking.GitHub.Client; using Client = Wabbajack.Networking.GitHub.Client;
using Metric = Wabbajack.Server.DTOs.Metric;
namespace Wabbajack.Server; namespace Wabbajack.Server;
@ -93,6 +95,16 @@ public class Startup
services.AddSingleton<TarLog>(); services.AddSingleton<TarLog>();
services.AddAllSingleton<IHttpDownloader, SingleThreadedDownloader>(); services.AddAllSingleton<IHttpDownloader, SingleThreadedDownloader>();
services.AddDownloadDispatcher(useLoginDownloaders:false, useProxyCache:false); services.AddDownloadDispatcher(useLoginDownloaders:false, useProxyCache:false);
services.AddSingleton<IAmazonS3>(s =>
{
var appSettings = s.GetRequiredService<AppSettings>();
var settings = new BasicAWSCredentials(appSettings.AuthoredFilesS3.AccessKey,
appSettings.AuthoredFilesS3.SecretKey);
return new AmazonS3Client(settings, new AmazonS3Config
{
ServiceURL = appSettings.AuthoredFilesS3.ServiceURL,
});
});
services.AddTransient(s => services.AddTransient(s =>
{ {
var settings = s.GetRequiredService<AppSettings>(); var settings = s.GetRequiredService<AppSettings>();
@ -243,5 +255,7 @@ public class Startup
// Trigger the internal update code // Trigger the internal update code
app.ApplicationServices.GetRequiredService<NexusCacheManager>(); app.ApplicationServices.GetRequiredService<NexusCacheManager>();
app.ApplicationServices.GetRequiredService<DiscordBackend>(); app.ApplicationServices.GetRequiredService<DiscordBackend>();
app.ApplicationServices.GetRequiredService<AuthorFiles>();
} }
} }

View File

@ -12,6 +12,7 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="AWSSDK.S3" Version="3.7.205.9" />
<PackageReference Include="cesi.DTOs" Version="1.0.0" /> <PackageReference Include="cesi.DTOs" Version="1.0.0" />
<PackageReference Include="Chronic.Core" Version="0.4.0" /> <PackageReference Include="Chronic.Core" Version="0.4.0" />
<PackageReference Include="Dapper" Version="2.0.123" /> <PackageReference Include="Dapper" Version="2.0.123" />
@ -22,6 +23,7 @@
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="6.0.11" /> <PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="6.0.11" />
<PackageReference Include="Microsoft.AspNetCore.StaticFiles" Version="2.2.0" /> <PackageReference Include="Microsoft.AspNetCore.StaticFiles" Version="2.2.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="6.0.3" /> <PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="6.0.3" />
<PackageReference Include="Microsoft.IO.RecyclableMemoryStream" Version="2.3.2" />
<PackageReference Include="Nettle" Version="1.3.0" /> <PackageReference Include="Nettle" Version="1.3.0" />
<PackageReference Include="System.Data.SqlClient" Version="4.8.5" /> <PackageReference Include="System.Data.SqlClient" Version="4.8.5" />
</ItemGroup> </ItemGroup>