wabbajack/Wabbajack.Lib/Downloaders/ModDBDownloader.cs
Timothy Baldridge db3b441d19 #### Version - 2.3.6.1 - 12/31/2020
* When IPS4 (e.g. LL) sites based on CEF fail to validate, they no longer hang the app
* If a IPS4 CEF site throws a 503, or 400 error, retry
* Clean out the cookies during IPS4 CEF downloads so that they don't cause 400 errors
* Limit the number of connections to IPS4 sites to 20 per minute (one per 6 seconds)
* If a site *does* timeout, throw a log of the CEF state into `CEFStates` for easier debugging by the WJ team
* Wrote a new CLI utility to stress test the Verification routines.
* Ignore files that have `\Edit Scripts\Export\` in their path
2020-12-30 23:44:58 -07:00

125 lines
4.4 KiB
C#

using System;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using HtmlAgilityPack;
using Newtonsoft.Json;
using Wabbajack.Common;
using Wabbajack.Common.Serialization.Json;
using Wabbajack.Lib.Validation;
namespace Wabbajack.Lib.Downloaders
{
public class ModDBDownloader : IDownloader, IUrlDownloader
{
public async Task<AbstractDownloadState?> GetDownloaderState(dynamic archiveINI, bool quickMode)
{
var url = archiveINI?.General?.directURL;
return GetDownloaderState(url);
}
public AbstractDownloadState? GetDownloaderState(string url)
{
if (url != null && url.StartsWith("https://www.moddb.com/downloads/start"))
{
return new State(url);
}
return null;
}
public async Task Prepare()
{
}
[JsonName("ModDBDownloader")]
public class State : AbstractDownloadState
{
public string Url { get; }
[JsonIgnore]
public override object[] PrimaryKey => new object[] { Url };
public State(string url)
{
Url = url;
}
public override bool IsWhitelisted(ServerWhitelist whitelist)
{
// Everything from Moddb is whitelisted
return true;
}
public override async Task<bool> Download(Archive a, AbsolutePath destination)
{
var urls = await GetDownloadUrls();
Utils.Log($"Found {urls.Length} ModDB mirrors for {a.Name}");
foreach (var (url, idx) in urls.Zip(Enumerable.Range(0, urls.Length), (s, i) => (s, i)))
{
try
{
await new HTTPDownloader.State(url).Download(a, destination);
return true;
}
catch (Exception)
{
if (idx == urls.Length - 1)
throw;
Utils.Log($"Download from {url} failed, trying next mirror");
}
}
return false;
}
private async Task<string[]> GetDownloadUrls(CancellationToken? token = null)
{
var uri = new Uri(Url);
var modId = uri.AbsolutePath.Split('/').Reverse().First(f => int.TryParse(f, out int _));
var mirrorUrl = $"https://www.moddb.com/downloads/start/{modId}/all";
var doc = await new HtmlWeb().LoadFromWebAsync($"https://www.moddb.com/downloads/start/{modId}/all", token ?? CancellationToken.None);
var mirrors = doc.DocumentNode.Descendants().Where(d => d.NodeType == HtmlNodeType.Element && d.HasClass("row"))
.Select(d => new
{
Link = "https://www.moddb.com"+
d.Descendants().Where(s => s.Id == "downloadon")
.Select(i => i.GetAttributeValue("href", ""))
.FirstOrDefault(),
Load = d.Descendants().Where(s => s.HasClass("subheading"))
.Select(i => i.InnerHtml.Split(',')
.Last()
.Split('%')
.Select(v => double.TryParse(v, out var dr) ? dr : double.MaxValue)
.First())
.FirstOrDefault()
})
.OrderBy(d => d.Load)
.ToList();
return mirrors.Select(d => d.Link).ToArray();
}
public override async Task<bool> Verify(Archive a, CancellationToken? token)
{
await GetDownloadUrls(token);
return true;
}
public override IDownloader GetDownloader()
{
return DownloadDispatcher.GetInstance<ModDBDownloader>();
}
public override string GetManifestURL(Archive a)
{
return Url;
}
public override string[] GetMetaIni()
{
return new[] {"[General]", $"directURL={Url}"};
}
}
}
}