WIP List Validation code

This commit is contained in:
Timothy Baldridge 2020-04-11 22:18:21 -06:00
parent e04ca9b411
commit f9f2c349c0
12 changed files with 305 additions and 32 deletions

View File

@ -6,6 +6,7 @@ using System.Security.Policy;
using System.Text;
using System.Threading.Tasks;
using HtmlAgilityPack;
using Wabbajack.BuildServer.BackendServices;
using Wabbajack.BuildServer.Model.Models;
using Wabbajack.BuildServer.Models;
using Wabbajack.BuildServer.Models.JobQueue;
@ -36,6 +37,21 @@ namespace Wabbajack.BuildServer.Test
Assert.Equal("test_list", data.First().Links.MachineURL);
}
[Fact]
public async Task CanIngestModLists()
{
var modlist = await MakeModList();
Consts.ModlistMetadataURL = modlist.ToString();
var sql = Fixture.GetService<SqlService>();
var service = new ListIngest(sql, Fixture.GetService<AppSettings>());
await service.Execute();
foreach (var list in ModListMetaData)
{
Assert.True(await sql.HaveIndexedModlist(list.Links.MachineURL, list.DownloadMetadata.Hash));
}
}
[Fact]
public async Task CanValidateModLists()
{
@ -146,7 +162,7 @@ namespace Wabbajack.BuildServer.Test
modListData.ToJson(es);
}
var modListMetaData = new List<ModlistMetadata>
ModListMetaData = new List<ModlistMetadata>
{
new ModlistMetadata
{
@ -168,9 +184,11 @@ namespace Wabbajack.BuildServer.Test
var metadataPath = "test_mod_list_metadata.json".RelativeTo(Fixture.ServerPublicFolder);
modListMetaData.ToJson(metadataPath);
ModListMetaData.ToJson(metadataPath);
return new Uri(MakeURL("test_mod_list_metadata.json"));
}
public List<ModlistMetadata> ModListMetaData { get; set; }
}
}

View File

@ -320,9 +320,9 @@ GO
/****** Object: Table [dbo].[ModLists] Script Date: 4/2/2020 3:59:19 PM ******/
CREATE TABLE [dbo].[ModLists](
[MachineURL] [nvarchar](50) NOT NULL,
[Summary] [nvarchar](max) NOT NULL,
[Hash] [bigint] NOT NULL,
[Metadata] [nvarchar](max) NOT NULL,
[DetailedStatus] [nvarchar](max) NOT NULL,
[Modlist] [nvarchar](max) NOT NULL,
CONSTRAINT [PK_ModLists] PRIMARY KEY CLUSTERED
(
[MachineURL] ASC
@ -330,6 +330,38 @@ CREATE TABLE [dbo].[ModLists](
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
/****** Object: Table [dbo].[ModListArchive] Script Date: 4/11/2020 10:33:20 AM ******/
CREATE TABLE [dbo].[ModListArchives](
[MachineUrl] [nvarchar](50) NOT NULL,
[Hash] [bigint] NOT NULL,
[PrimaryKeyString] [nvarchar](max) NOT NULL,
[Size] [bigint] NOT NULL,
[State] [nvarchar](max) NOT NULL,
CONSTRAINT [PK_ModListArchive] PRIMARY KEY CLUSTERED
(
[MachineUrl] ASC,
[Hash] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
/****** Object: Table [dbo].[ModListArchiveStatus] Script Date: 4/11/2020 9:44:25 PM ******/
CREATE TABLE [dbo].[ModListArchiveStatus](
[PrimaryKeyStringHash] [binary](32) NOT NULL,
[Hash] [bigint] NOT NULL,
[PrimaryKeyString] [nvarchar](max) NOT NULL,
[IsValid] [tinyint] NOT NULL,
CONSTRAINT [PK_ModListArchiveStatus] PRIMARY KEY CLUSTERED
(
[PrimaryKeyStringHash] ASC,
[Hash] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
/****** Object: Table [dbo].[Metrics] Script Date: 3/28/2020 4:58:59 PM ******/
SET ANSI_NULLS ON
GO

View File

@ -0,0 +1,45 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using Wabbajack.BuildServer.Model.Models;
using Wabbajack.Common;
namespace Wabbajack.BuildServer.BackendServices
{
public abstract class ABackendService
{
protected ABackendService(SqlService sql, AppSettings settings, TimeSpan pollRate)
{
Sql = sql;
Settings = settings;
PollRate = pollRate;
}
public TimeSpan PollRate { get; }
public async Task RunLoop(CancellationToken token)
{
while (!token.IsCancellationRequested)
{
try
{
await Execute();
}
catch (Exception ex)
{
Utils.Log($"Error executing {this}");
Utils.Log(ex.ToString());
}
await Task.Delay(PollRate);
}
}
public abstract Task Execute();
protected AppSettings Settings { get; set; }
protected SqlService Sql { get; set; }
}
}

View File

@ -0,0 +1,67 @@
using System;
using System.Collections.Generic;
using System.IO.Compression;
using System.Threading.Tasks;
using Wabbajack.BuildServer.Model.Models;
using Wabbajack.Common;
using Wabbajack.Lib;
using Wabbajack.Lib.Downloaders;
using Wabbajack.Lib.ModListRegistry;
namespace Wabbajack.BuildServer.BackendServices
{
public class ListIngest : ABackendService
{
public ListIngest(SqlService sql, AppSettings settings) : base(sql, settings, TimeSpan.FromMinutes(1))
{
}
public override async Task Execute()
{
var client = new Common.Http.Client();
var lists = await client.GetJsonAsync<List<ModlistMetadata>>(Consts.ModlistMetadataURL);
bool newData = false;
foreach (var list in lists)
{
if (await Sql.HaveIndexedModlist(list.Links.MachineURL, list.DownloadMetadata.Hash))
continue;
var modlistPath = Consts.ModListDownloadFolder.Combine(list.Links.MachineURL + Consts.ModListExtension);
if (list.NeedsDownload(modlistPath))
{
modlistPath.Delete();
var state = DownloadDispatcher.ResolveArchive(list.Links.Download);
Utils.Log($"Downloading {list.Links.MachineURL} - {list.Title}");
await state.Download(modlistPath);
}
else
{
Utils.Log($"No changes detected from downloaded modlist");
}
ModList modlist;
await using (var fs = modlistPath.OpenRead())
using (var zip = new ZipArchive(fs, ZipArchiveMode.Read))
await using (var entry = zip.GetEntry("modlist.json")?.Open())
{
if (entry == null)
{
Utils.Log($"Bad Modlist {list.Links.MachineURL}");
continue;
}
modlist = entry.FromJson<ModList>();
}
newData = true;
await Sql.IngestModList(list.DownloadMetadata.Hash, list, modlist);
}
if (newData)
{
var service = new ValidateNonNexusArchives(Sql, Settings);
await service.Execute();
}
}
}
}

View File

@ -0,0 +1,27 @@
using System;
using System.Threading.Tasks;
using Wabbajack.BuildServer.Model.Models;
using Wabbajack.Common;
namespace Wabbajack.BuildServer.BackendServices
{
public class ValidateNonNexusArchives : ABackendService
{
public ValidateNonNexusArchives(SqlService sql, AppSettings settings) : base(sql, settings, TimeSpan.FromHours(2))
{
}
public override async Task Execute()
{
var archives = await Sql.GetNonNexusModlistArchives();
using var queue = new WorkQueue();
var results = await archives.PMap(queue, async archive =>
{
var isValid = await archive.State.Verify(archive);
return (Archive: archive, IsValid: isValid);
});
await Sql.UpdateNonNexusModlistArchivesStatus(results);
}
}
}

View File

@ -598,20 +598,89 @@ namespace Wabbajack.BuildServer.Model.Models
public async Task UpdateModListStatus(ModListStatus dto)
{
await using var conn = await Open();
await conn.ExecuteAsync(@"MERGE dbo.ModLists AS Target
USING (SELECT @MachineUrl MachineUrl, @Metadata Metadata, @Summary Summary, @DetailedStatus DetailedStatus) AS Source
ON Target.MachineUrl = Source.MachineUrl
WHEN MATCHED THEN UPDATE SET Target.Summary = Source.Summary, Target.Metadata = Source.Metadata, Target.DetailedStatus = Source.DetailedStatus
WHEN NOT MATCHED THEN INSERT (MachineUrl, Summary, Metadata, DetailedStatus) VALUES (@MachineUrl, @Summary, @Metadata, @DetailedStatus);",
new
{
MachineUrl = dto.Metadata.Links.MachineURL,
Metadata = dto.Metadata.ToJson(),
Summary = dto.Summary.ToJson(),
DetailedStatus = dto.DetailedStatus.ToJson()
});
}
public async Task IngestModList(Hash hash, ModlistMetadata metadata, ModList modlist)
{
await using var conn = await Open();
await using var tran = await conn.BeginTransactionAsync();
await conn.ExecuteAsync(@"DELETE FROM dbo.ModLists Where MachineUrl = @MachineUrl",
new {MachineUrl = metadata.Links.MachineURL}, tran);
await conn.ExecuteAsync(
@"INSERT INTO dbo.ModLists (MachineUrl, Hash, Metadata, ModList) VALUES (@MachineUrl, @Hash, @Metadata, @ModList)",
new
{
MachineUrl = metadata.Links.MachineURL,
Hash = hash,
MetaData = metadata.ToJson(),
ModList = modlist.ToJson()
}, tran);
var entries = modlist.Archives.Select(a =>
new
{
MachineUrl = metadata.Links.MachineURL,
Hash = a.Hash,
Size = a.Size,
State = a.State.ToJson(),
PrimaryKeyString = a.State.PrimaryKeyString
}).ToArray();
await conn.ExecuteAsync(@"DELETE FROM dbo.ModListArchives WHERE MachineURL = @machineURL",
new {MachineUrl = metadata.Links.MachineURL}, tran);
foreach (var entry in entries)
{
await conn.ExecuteAsync(
"INSERT INTO dbo.ModListArchives (MachineURL, Hash, Size, PrimaryKeyString, State) VALUES (@MachineURL, @Hash, @Size, @PrimaryKeyString, @State)",
entry, tran);
}
await tran.CommitAsync();
}
public async Task<bool> HaveIndexedModlist(string machineUrl, Hash hash)
{
await using var conn = await Open();
var result = await conn.QueryFirstOrDefaultAsync<string>(
"SELECT MachineURL from dbo.Modlists WHERE MachineURL = @MachineUrl AND Hash = @Hash",
new {MachineUrl = machineUrl, Hash = hash});
return result != null;
}
public async Task<List<Archive>> GetNonNexusModlistArchives()
{
await using var conn = await Open();
var results = await conn.QueryAsync<(Hash Hash, long Size, string State)>(
@"SELECT Hash, Size, State FROM dbo.ModListArchives WHERE PrimaryKeyString NOT LIKE 'NexusDownloader+State|%'");
return results.Select(r => new Archive {
Size = r.Size,
Hash = r.Hash,
State = r.State.FromJsonString<AbstractDownloadState>()
}).ToList();}
public async Task UpdateNonNexusModlistArchivesStatus(IEnumerable<(Archive Archive, bool IsValid)> results)
{
await using var conn = await Open();
var trans = await conn.BeginTransactionAsync();
await conn.ExecuteAsync("DELETE FROM dbo.ModlistArchiveStatus;", transaction:trans);
foreach (var itm in results.DistinctBy(itm => (itm.Archive.Hash, itm.Archive.State.PrimaryKeyString)))
{
await conn.ExecuteAsync(
@"INSERT INTO dbo.ModlistArchiveStatus (PrimaryKeyStringHash, PrimaryKeyString, Hash, IsValid)
VALUES (HASHBYTES('SHA2_256', @PrimaryKeyString), @PrimaryKeyString, @Hash, @IsValid)", new
{
PrimaryKeyString = itm.Archive.State.PrimaryKeyString,
Hash = itm.Archive.Hash,
IsValid = itm.IsValid
}, trans);
}
await trans.CommitAsync();
}
}
}

View File

@ -14,7 +14,7 @@ namespace Wabbajack.Lib.Downloaders
string Name { get; set; }
string Author { get; set; }
string Version { get; set; }
string ImageURL { get; set; }
Uri ImageURL { get; set; }
bool IsNSFW { get; set; }
string Description { get; set; }
@ -55,7 +55,6 @@ namespace Wabbajack.Lib.Downloaders
[JsonIgnore]
public abstract object[] PrimaryKey { get; }
[JsonIgnore]
public string PrimaryKeyString
{
get

View File

@ -84,7 +84,7 @@ namespace Wabbajack.Lib.Downloaders
public string Name { get; set; }
public string Author { get; set; }
public string Version { get; set; }
public string ImageURL { get; set; }
public Uri ImageURL { get; set; }
public virtual bool IsNSFW { get; set; }
public string Description { get; set; }

View File

@ -63,20 +63,25 @@ namespace Wabbajack.Lib.Downloaders
?
.First().InnerHtml);
ImageURL = HttpUtility.HtmlDecode(node
var url = HttpUtility.HtmlDecode(node
.SelectNodes(
"//div[@class='ipsBox ipsSpacer_top ipsSpacer_double']/section/div[@class='ipsPad ipsAreaBackground']/div[@class='ipsCarousel ipsClearfix']/div[@class='ipsCarousel_inner']/ul[@class='cDownloadsCarousel ipsClearfix']/li[@class='ipsCarousel_item ipsAreaBackground_reset ipsPad_half']/span[@class='ipsThumb ipsThumb_medium ipsThumb_bg ipsCursor_pointer']")
?.First().GetAttributeValue("data-fullurl", "none"));
if (!string.IsNullOrWhiteSpace(ImageURL))
if (!string.IsNullOrWhiteSpace(url))
{
ImageURL = new Uri(url);
return true;
}
ImageURL = HttpUtility.HtmlDecode(node
url = HttpUtility.HtmlDecode(node
.SelectNodes(
"//article[@class='ipsColumn ipsColumn_fluid']/div[@class='ipsPad']/section/div[@class='ipsType_richText ipsContained ipsType_break']/p/a/img[@class='ipsImage ipsImage_thumbnailed']")
?.First().GetAttributeValue("src", ""));
if (string.IsNullOrWhiteSpace(ImageURL))
ImageURL = "";
if (!string.IsNullOrWhiteSpace(url))
{
ImageURL = new Uri(url);
}
return true;
}

View File

@ -141,7 +141,7 @@ namespace Wabbajack.Lib.Downloaders
public string Version { get; set; }
public string ImageURL { get; set; }
public Uri ImageURL { get; set; }
public bool IsNSFW { get; set; }

View File

@ -34,16 +34,27 @@ namespace Wabbajack.Lib.NexusApi
public class ModInfo
{
public uint _internal_version { get; set; }
public string game_name { get; set; }
public string mod_id { get; set; }
public string name { get; set; }
public string summary { get; set; }
public string description { get; set; }
public Uri picture_url { get; set; }
public string mod_id { get; set; }
public long game_id { get; set; }
public bool allow_rating { get; set; }
public string domain_name { get; set; }
public long category_id { get; set; }
public string version { get; set; }
public long endorsement_count { get; set; }
public long created_timestamp { get; set; }
public DateTime created_time { get; set; }
public long updated_timestamp { get; set; }
public DateTime updated_time { get; set; }
public string author { get; set; }
public string uploaded_by { get; set; }
public string uploaded_users_profile_url { get; set; }
public string picture_url { get; set; }
public Uri uploaded_users_profile_url { get; set; }
public bool contains_adult_content { get; set; }
public string status { get; set; }
public bool available { get; set; } = true;
}
public class MD5Response

View File

@ -21,7 +21,7 @@ namespace Wabbajack
{
State = state;
ImageObservable = Observable.Return(State.ImageURL)
ImageObservable = Observable.Return(State.ImageURL.ToString())
.ObserveOn(RxApp.TaskpoolScheduler)
.DownloadBitmapImage((ex) => Utils.Log($"Skipping slide for mod {State.Name}"))
.Replay(1)