Switch CDN storage to R2/S3

This commit is contained in:
Timothy Baldridge 2023-10-12 06:04:38 -06:00
parent 0cba392b66
commit 3cc50d9ff3
9 changed files with 269 additions and 67 deletions

View File

@ -58,7 +58,14 @@ public static class IEnumerableExtensions
return data;
}
public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> coll, int size)
/// <summary>
/// Splits the collection into `size` parts
/// </summary>
/// <param name="coll"></param>
/// <param name="count"></param>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> coll, int count)
{
var asList = coll.ToList();
@ -70,7 +77,30 @@ public static class IEnumerableExtensions
}
}
return Enumerable.Range(0, size).Select(offset => SkipEnumerable(asList, offset, size));
return Enumerable.Range(0, count).Select(offset => SkipEnumerable(asList, offset, count));
}
/// <summary>
/// Split the collection into `size` parts
/// </summary>
/// <param name="coll"></param>
/// <param name="size"></param>
/// <typeparam name="T"></typeparam>
/// <returns></returns>
public static IEnumerable<IEnumerable<T>> Batch<T>(this IEnumerable<T> coll, int size)
{
List<T> current = new();
foreach (var itm in coll)
{
current.Add(itm);
if (current.Count == size)
{
yield return current;
current = new List<T>();
}
}
if (current.Count > 0)
yield return current;
}

View File

@ -93,7 +93,7 @@ public class Client
{
_logger.LogError("HTTP Error: {Result}", result);
await SendMetric("rebound", "Error", false);
Environment.Exit(0);
// Environment.Exit(0);
}
}

View File

@ -1,4 +1,5 @@
using Microsoft.Extensions.Configuration;
using Amazon.S3;
using Microsoft.Extensions.Configuration;
using Wabbajack.Paths;
namespace Wabbajack.BuildServer;
@ -26,8 +27,6 @@ public class AppSettings
public string DiscordKey { get; set; }
public string AuthoredFilesFolder { get; set; }
public string PatchesFilesFolder { get; set; }
public string MirrorFilesFolder { get; set; }
public string NexusCacheFolder { get; set; }
@ -37,6 +36,19 @@ public class AppSettings
public CouchDBSetting CesiDB { get; set; }
public CouchDBSetting MetricsDB { get; set; }
public S3Settings AuthoredFilesS3 { get; set; }
}
public class S3Settings
{
public string AccessKey { get; set; }
public string SecretKey { get; set; }
public string ServiceURL { get; set; }
public string BucketName { get; set; }
public string BucketCacheFile { get; set; }
}
public class CouchDBSetting

View File

@ -147,7 +147,7 @@ public class AuthorControls : ControllerBase
public async Task<IActionResult> HomePage()
{
var user = User.FindFirstValue(ClaimTypes.Name);
var files = (await _authorFiles.AllAuthoredFiles())
var files = _authorFiles.AllDefinitions
.Where(af => af.Definition.Author == user)
.Select(af => new
{

View File

@ -68,8 +68,7 @@ public class AuthoredFiles : ControllerBase
$"Hashes don't match for index {index}. Sizes ({ms.Length} vs {part.Size}). Hashes ({hash} vs {part.Hash}");
ms.Position = 0;
await using var partStream = await _authoredFiles.CreatePart(definition.MungedName, (int)index);
await ms.CopyToAsync(partStream, token);
await _authoredFiles.WritePart(definition.MungedName, (int) index, ms);
return Ok(part.Hash.ToBase64());
}
@ -123,7 +122,7 @@ public class AuthoredFiles : ControllerBase
public async Task<IActionResult> DeleteUpload(string serverAssignedUniqueId)
{
var user = User.FindFirstValue(ClaimTypes.Name);
var definition = (await _authoredFiles.AllAuthoredFiles())
var definition = _authoredFiles.AllDefinitions
.First(f => f.Definition.ServerAssignedUniqueId == serverAssignedUniqueId)
.Definition;
if (definition.Author != user)
@ -145,12 +144,12 @@ public class AuthoredFiles : ControllerBase
[Route("")]
public async Task<ContentResult> UploadedFilesGet()
{
var files = await _authoredFiles.AllAuthoredFiles();
var files = _authoredFiles.AllDefinitions
.ToArray();
var response = _authoredFilesTemplate(new
{
Files = files.OrderByDescending(f => f.Updated).ToArray(),
TotalSpace = _authoredFiles.TotalSpace.Bytes().Humanize("#.##"),
FreeSpace = _authoredFiles.FreeSpace.Bytes().Humanize("#.##")
UsedSpace = _authoredFiles.UsedSpace.Bytes().Humanize("#.##"),
});
return new ContentResult
{
@ -172,10 +171,13 @@ public class AuthoredFiles : ControllerBase
Response.Headers.ContentType = new StringValues("application/octet-stream");
Response.Headers.ContentLength = definition.Size;
Response.Headers.ETag = definition.MungedName + "_direct";
foreach (var part in definition.Parts)
foreach (var part in definition.Parts.OrderBy(p => p.Index))
{
await using var partStream = await _authoredFiles.StreamForPart(mungedName, (int)part.Index);
await partStream.CopyToAsync(Response.Body);
await _authoredFiles.StreamForPart(mungedName, (int)part.Index, async stream =>
{
await stream.CopyToAsync(Response.Body);
});
}
}
}

View File

@ -1,6 +1,11 @@
using System.Collections.Concurrent;
using System.Diagnostics;
using System.IO.Compression;
using System.Web;
using Amazon.S3;
using Amazon.S3.Model;
using Microsoft.Extensions.Logging;
using Microsoft.IO;
using Wabbajack.BuildServer;
using Wabbajack.Common;
using Wabbajack.DTOs.CDN;
@ -16,88 +21,207 @@ public class AuthorFiles
private readonly ILogger<AuthorFiles> _logger;
private readonly AppSettings _settings;
private readonly DTOSerializer _dtos;
private Dictionary<string, FileDefinition> _byServerId = new();
private ConcurrentDictionary<string, FileDefinition> _byServerId = new();
private readonly IAmazonS3 _s3;
private readonly ConcurrentDictionary<string,FileDefinitionMetadata> _fileCache;
private readonly string _bucketName;
private ConcurrentDictionary<RelativePath, long> _allObjects = new();
private HashSet<RelativePath> _mangledNames;
private readonly RecyclableMemoryStreamManager _streamPool;
private readonly HttpClient _httpClient;
public AbsolutePath AuthorFilesLocation => _settings.AuthoredFilesFolder.ToAbsolutePath();
public AuthorFiles(ILogger<AuthorFiles> logger, AppSettings settings, DTOSerializer dtos)
private Uri _baseUri => new($"https://r2.wabbajack.org/");
public AuthorFiles(ILogger<AuthorFiles> logger, AppSettings settings, DTOSerializer dtos, IAmazonS3 s3, HttpClient client)
{
_httpClient = client;
_s3 = s3;
_logger = logger;
_settings = settings;
_dtos = dtos;
_fileCache = new ConcurrentDictionary<string, FileDefinitionMetadata>();
_bucketName = settings.AuthoredFilesS3.BucketName;
_ = PrimeCache();
_streamPool = new RecyclableMemoryStreamManager();
}
public IEnumerable<AbsolutePath> AllDefinitions => AuthorFilesLocation.EnumerateFiles("definition.json.gz");
/// <summary>
/// Total unused space available for authored files
/// </summary>
public long FreeSpace => new DriveInfo(AuthorFilesLocation.ToString()).AvailableFreeSpace;
/// <summary>
/// Total space available for authored files
/// </summary>
public long TotalSpace => new DriveInfo(AuthorFilesLocation.ToString()).TotalSize;
/// <summary>
///
/// </summary>
/// <returns></returns>
public async Task<FileDefinitionMetadata[]> AllAuthoredFiles()
private async Task PrimeCache()
{
var defs = new List<FileDefinitionMetadata>();
foreach (var file in AllDefinitions)
try
{
defs.Add(new FileDefinitionMetadata
var cacheFile = _settings.AuthoredFilesS3.BucketCacheFile.ToAbsolutePath();
if (!cacheFile.FileExists())
{
Definition = await ReadDefinition(file),
Updated = file.LastModifiedUtc()
var allObjects = await AllObjects().ToArrayAsync();
foreach (var obje in allObjects)
{
_allObjects.TryAdd(obje.Key.ToRelativePath(), obje.LastModified.ToFileTimeUtc());
}
SaveBucketCacheFile(cacheFile);
}
else
{
LoadBucketCacheFile(cacheFile);
}
_mangledNames = _allObjects
.Where(f => f.Key.EndsWith("definition.json.gz"))
.Select(f => f.Key.Parent)
.ToHashSet();
await Parallel.ForEachAsync(_mangledNames, async (name, _) =>
{
if (!_allObjects.TryGetValue(name.Combine("definition.json.gz"), out var value))
return;
_logger.LogInformation("Priming {Name}", name);
var definition = await PrimeDefinition(name);
var metadata = new FileDefinitionMetadata()
{
Definition = definition,
Updated = DateTime.FromFileTimeUtc(value)
};
_fileCache.TryAdd(definition.MungedName, metadata);
_byServerId.TryAdd(definition.ServerAssignedUniqueId!, definition);
});
_logger.LogInformation("Finished priming cache, {Count} files {Size} GB cached", _fileCache.Count,
_fileCache.Sum(s => s.Value.Definition.Size) / (1024 * 1024 * 1024));
}
catch (Exception ex)
{
_logger.LogCritical(ex, "Failed to prime cache");
}
}
private void SaveBucketCacheFile(AbsolutePath cacheFile)
{
using var file = cacheFile.Open(FileMode.Create, FileAccess.Write);
using var sw = new StreamWriter(file);
foreach(var entry in _allObjects)
{
sw.WriteLine($"{entry.Key}||{entry.Value}");
}
}
private void LoadBucketCacheFile(AbsolutePath cacheFile)
{
using var file = cacheFile.Open(FileMode.Open, FileAccess.Read);
using var sr = new StreamReader(file);
while (!sr.EndOfStream)
{
var line = sr.ReadLine();
var parts = line!.Split("||");
_allObjects.TryAdd(parts[0].ToRelativePath(), long.Parse(parts[1]));
}
}
private async Task<FileDefinition> PrimeDefinition(RelativePath name)
{
var uri = _baseUri + $"{name}/definition.json.gz";
using var response = await _httpClient.GetAsync(uri);
return await ReadDefinition(await response.Content.ReadAsStreamAsync());
}
private async IAsyncEnumerable<S3Object> AllObjects()
{
var sw = Stopwatch.StartNew();
var total = 0;
_logger.Log(LogLevel.Information, "Listing all objects in S3");
var results = await _s3.ListObjectsV2Async(new ListObjectsV2Request()
{
BucketName = _bucketName,
});
TOP:
total += results.S3Objects.Count;
_logger.Log(LogLevel.Information, "Got {S3ObjectsCount} objects, {Total} total", results.S3Objects.Count, total);
foreach (var result in results.S3Objects)
{
yield return result;
}
_byServerId = defs.ToDictionary(f => f.Definition.ServerAssignedUniqueId!, f => f.Definition);
return defs.ToArray();
if (results.IsTruncated)
{
results = await _s3.ListObjectsV2Async(new ListObjectsV2Request
{
ContinuationToken = results.NextContinuationToken,
BucketName = _bucketName,
});
goto TOP;
}
_logger.LogInformation("Finished listing all objects in S3 in {Elapsed}", sw.Elapsed);
}
public async Task<Stream> StreamForPart(string mungedName, int part)
public IEnumerable<FileDefinitionMetadata> AllDefinitions => _fileCache.Values;
/// <summary>
/// Used space in bytes
/// </summary>
public long UsedSpace => _fileCache.Sum(s => s.Value.Definition.Size);
public async Task StreamForPart(string mungedName, int part, Func<Stream, Task> func)
{
return AuthorFilesLocation.Combine(mungedName, "parts", part.ToString()).Open(FileMode.Open);
var definition = _fileCache[mungedName].Definition;
if (part >= definition.Parts.Length)
throw new ArgumentOutOfRangeException(nameof(part));
var uri = _baseUri + $"{mungedName}/parts/{part}";
using var response = await _httpClient.GetAsync(uri);
await func(await response.Content.ReadAsStreamAsync());
}
public async Task<Stream> CreatePart(string mungedName, int part)
public async Task WritePart(string mungedName, int part, Stream ms)
{
return AuthorFilesLocation.Combine(mungedName, "parts", part.ToString()).Open(FileMode.Create, FileAccess.Write, FileShare.None);
await _s3.PutObjectAsync(new PutObjectRequest
{
BucketName = _bucketName,
Key = mungedName.ToRelativePath().Combine("parts", part.ToString()).ToString().Replace("\\", "/"),
InputStream = ms,
DisablePayloadSigning = true
});
}
public async Task WriteDefinition(FileDefinition definition)
{
var path = AuthorFilesLocation.Combine(definition.MungedName, "definition.json.gz");
path.Parent.CreateDirectory();
path.Parent.Combine("parts").CreateDirectory();
await using var ms = new MemoryStream();
await using (var gz = new GZipStream(ms, CompressionLevel.Optimal, true))
{
await _dtos.Serialize(definition, gz);
}
await path.WriteAllBytesAsync(ms.ToArray());
ms.Position = 0;
await _s3.PutObjectAsync(new PutObjectRequest
{
BucketName = _bucketName,
Key = definition.MungedName.ToRelativePath().Combine("definition.json.gz").ToString().Replace("\\", "/"),
InputStream = ms,
DisablePayloadSigning = true
});
_fileCache.TryAdd(definition.MungedName, new FileDefinitionMetadata
{
Definition = definition,
Updated = DateTime.UtcNow
});
_byServerId.TryAdd(definition.ServerAssignedUniqueId!, definition);
}
public async Task<FileDefinition> ReadDefinition(string mungedName)
{
return await ReadDefinition(AuthorFilesLocation.Combine(mungedName, "definition.json.gz"));
return _fileCache[mungedName].Definition;
}
public bool IsDefinition(string mungedName)
{
return AuthorFilesLocation.Combine(mungedName, "definition.json.gz").FileExists();
return _fileCache.ContainsKey(mungedName);
}
private async Task<FileDefinition> ReadDefinition(AbsolutePath file)
private async Task<FileDefinition> ReadDefinition(Stream stream)
{
var gz = new GZipStream(new MemoryStream(await file.ReadAllBytesAsync()), CompressionMode.Decompress);
var gz = new GZipStream(stream, CompressionMode.Decompress);
var definition = (await _dtos.DeserializeAsync<FileDefinition>(gz))!;
return definition;
}
@ -111,15 +235,33 @@ public class AuthorFiles
public async Task DeleteFile(FileDefinition definition)
{
var folder = AuthorFilesLocation.Combine(definition.MungedName);
folder.DeleteDirectory();
var allFiles = _allObjects.Where(f => f.Key.TopParent.ToString() == definition.MungedName)
.Select(f => f.Key).ToList();
foreach (var batch in allFiles.Batch(512))
{
var batchedArray = batch.ToHashSet();
_logger.LogInformation("Deleting {Count} files for prefix {Prefix}", batchedArray.Count, definition.MungedName);
await _s3.DeleteObjectsAsync(new DeleteObjectsRequest
{
BucketName = _bucketName,
Objects = batchedArray.Select(f => new KeyVersion
{
Key = f.ToString().Replace("\\", "/")
}).ToList()
});
foreach (var key in batchedArray)
{
_allObjects.TryRemove(key, out _);
}
}
_byServerId.TryRemove(definition.ServerAssignedUniqueId!, out _);
_fileCache.TryRemove(definition.MungedName, out _);
}
public async Task<FileDefinition> ReadDefinitionForServerId(string serverAssignedUniqueId)
public async ValueTask<FileDefinition> ReadDefinitionForServerId(string serverAssignedUniqueId)
{
if (_byServerId.TryGetValue(serverAssignedUniqueId, out var found))
return found;
await AllAuthoredFiles();
return _byServerId[serverAssignedUniqueId];
}

View File

@ -11,7 +11,7 @@
</head>
<body>
<H1>Authored Files:</H1>
<H3>{{$.FreeSpace}} remaining of {{$.TotalSpace}} </H3>
<H3>{{$.UsedSpace}}</H3>
<table id="inlined-data" class="table table-striped table-bordered" style="width:100%" >
<thead>
<tr>

View File

@ -5,6 +5,8 @@ using System.Runtime.InteropServices;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;
using Amazon.Runtime;
using Amazon.S3;
using cesi.DTOs;
using CouchDB.Driver;
using CouchDB.Driver.Options;
@ -39,10 +41,10 @@ using Wabbajack.Server.Services;
using Wabbajack.Services.OSIntegrated.TokenProviders;
using Wabbajack.Networking.WabbajackClientApi;
using Wabbajack.Paths.IO;
using Wabbajack.Server.DTOs;
using Wabbajack.VFS;
using YamlDotNet.Serialization.NamingConventions;
using Client = Wabbajack.Networking.GitHub.Client;
using Metric = Wabbajack.Server.DTOs.Metric;
namespace Wabbajack.Server;
@ -93,6 +95,16 @@ public class Startup
services.AddSingleton<TarLog>();
services.AddAllSingleton<IHttpDownloader, SingleThreadedDownloader>();
services.AddDownloadDispatcher(useLoginDownloaders:false, useProxyCache:false);
services.AddSingleton<IAmazonS3>(s =>
{
var appSettings = s.GetRequiredService<AppSettings>();
var settings = new BasicAWSCredentials(appSettings.AuthoredFilesS3.AccessKey,
appSettings.AuthoredFilesS3.SecretKey);
return new AmazonS3Client(settings, new AmazonS3Config
{
ServiceURL = appSettings.AuthoredFilesS3.ServiceURL,
});
});
services.AddTransient(s =>
{
var settings = s.GetRequiredService<AppSettings>();
@ -243,5 +255,7 @@ public class Startup
// Trigger the internal update code
app.ApplicationServices.GetRequiredService<NexusCacheManager>();
app.ApplicationServices.GetRequiredService<DiscordBackend>();
app.ApplicationServices.GetRequiredService<AuthorFiles>();
}
}

View File

@ -12,6 +12,7 @@
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AWSSDK.S3" Version="3.7.205.9" />
<PackageReference Include="cesi.DTOs" Version="1.0.0" />
<PackageReference Include="Chronic.Core" Version="0.4.0" />
<PackageReference Include="Dapper" Version="2.0.123" />
@ -22,6 +23,7 @@
<PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="6.0.11" />
<PackageReference Include="Microsoft.AspNetCore.StaticFiles" Version="2.2.0" />
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="6.0.3" />
<PackageReference Include="Microsoft.IO.RecyclableMemoryStream" Version="2.3.2" />
<PackageReference Include="Nettle" Version="1.3.0" />
<PackageReference Include="System.Data.SqlClient" Version="4.8.5" />
</ItemGroup>