using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Net.Http; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using System.Web; using F23.StringSimilarity; using HtmlAgilityPack; using Newtonsoft.Json; using Wabbajack.Common; using Wabbajack.Lib.LibCefHelpers; using Wabbajack.Lib.Validation; using Wabbajack.Lib.WebAutomation; namespace Wabbajack.Lib.Downloaders { interface IWaitForWindowDownloader { public Task WaitForNextRequestWindow(); } // IPS4 is the site used by LoversLab, VectorPlexus, etc. the general mechanics of each site are the // same, so we can fairly easily abstract the state. // Pass in the state type via TState public abstract class AbstractIPS4Downloader : AbstractNeedsLoginDownloader, IDownloader, IWaitForWindowDownloader where TState : AbstractIPS4Downloader.State, new() where TDownloader : IDownloader { private DateTime LastRequestTime = default; protected long RequestsPerMinute = 60; private TimeSpan RequestDelay => TimeSpan.FromMinutes(1) / RequestsPerMinute; protected AbstractIPS4Downloader(Uri loginUri, string encryptedKeyName, string cookieDomain, string loginCookie = "ips4_member_id") : base(loginUri, encryptedKeyName, cookieDomain, loginCookie) { } public async Task GetDownloaderState(dynamic archiveINI, bool quickMode) { Uri url = DownloaderUtils.GetDirectURL(archiveINI); return await GetDownloaderStateFromUrl(url, quickMode); } public async Task WaitForNextRequestWindow() { TimeSpan delay; lock (this) { if (LastRequestTime < DateTime.UtcNow - RequestDelay) { LastRequestTime = DateTime.UtcNow; delay = TimeSpan.Zero; } else { LastRequestTime += RequestDelay; delay = LastRequestTime - DateTime.UtcNow; } } Utils.Log($"Waiting for {delay.TotalSeconds} to make request via {typeof(TDownloader).Name}"); await Task.Delay(delay); } public async Task GetDownloaderStateFromUrl(Uri url, bool quickMode) { var absolute = true; if (url == null || url.Host != SiteURL.Host) return null; if (url.PathAndQuery.StartsWith("/applications/core/interface/file/attachment")) { return new TState { IsAttachment = true, FullURL = url.ToString() }; } if (url.PathAndQuery.StartsWith("/index.php?")) { var id2 = HttpUtility.ParseQueryString(url.Query)["r"]; var parsed = HttpUtility.ParseQueryString(url.Query); var name = parsed[null]!.Split("/", StringSplitOptions.RemoveEmptyEntries).Last(); return new TState { FullURL = url.AbsolutePath, FileID = id2!, FileName = name }; } if (url.PathAndQuery.StartsWith("/files/getdownload")) { return new TState { FullURL = url.ToString(), IsAttachment = true }; } if (url.PathAndQuery.StartsWith("/files/download/") && long.TryParse(url.PathAndQuery.Split("/").Last(), out var fileId)) { return new TState { FullURL = url.ToString(), IsAttachment = true }; } if (!url.PathAndQuery.StartsWith("/files/file/")) { if (string.IsNullOrWhiteSpace(url.Query)) return null; if (!url.Query.Substring(1).StartsWith("/files/file/")) return null; absolute = false; } var id = HttpUtility.ParseQueryString(url.Query)["r"]; var file = absolute ? url.AbsolutePath.Split('/').Last(s => s != "") : url.Query.Substring(1).Split('/').Last(s => s != ""); return new TState { FullURL = url.AbsolutePath, FileID = id!, FileName = file }; } public class State : AbstractDownloadState, IMetaState where TStateDownloader : IDownloader { public string FullURL { get; set; } = string.Empty; public bool IsAttachment { get; set; } public string FileID { get; set; } = string.Empty; public string FileName { get; set; } = string.Empty; // from IMetaState public Uri URL => IsAttachment ? new Uri("https://www.wabbajack.org/") : new Uri($"{Site}/files/file/{FileName}"); public string? Name { get; set; } public string? Author { get; set; } public string? Version { get; set; } public Uri? ImageURL { get; set; } public virtual bool IsNSFW { get; set; } public string? Description { get; set; } private static bool IsHTTPS => Downloader.SiteURL.AbsolutePath.StartsWith("https://"); private static string URLPrefix => IsHTTPS ? "https://" : "http://"; [JsonIgnore] public static string Site => string.IsNullOrWhiteSpace(Downloader.SiteURL.Query) ? $"{URLPrefix}{Downloader.SiteURL.Host}" : Downloader.SiteURL.ToString(); public static AbstractNeedsLoginDownloader Downloader => (AbstractNeedsLoginDownloader)(object)DownloadDispatcher.GetInstance(); [JsonIgnore] public override object[] PrimaryKey { get { return string.IsNullOrWhiteSpace(FileID) ? IsAttachment ? new object[] {Downloader.SiteURL, IsAttachment, FullURL} : new object[] {Downloader.SiteURL, FileName} : new object[] {Downloader.SiteURL, FileName, FileID}; } } public override bool IsWhitelisted(ServerWhitelist whitelist) { return true; } public override async Task Download(Archive a, AbsolutePath destination) { if (Downloader.IsCloudFlareProtected) await ((IWaitForWindowDownloader)Downloader).WaitForNextRequestWindow(); return await ResolveDownloadStream(a, destination, false); } private async Task ResolveDownloadStream(Archive a, AbsolutePath path, bool quickMode, CancellationToken? token = null) { TOP: string url; if (IsAttachment) { url = FullURL; } else { var csrfKey = await GetCsrfKey(token); if (!Downloader.IsCloudFlareProtected && csrfKey == null) { Utils.Log($"Returning null from IPS4 Downloader because no csrfKey was found"); return false; } var sep = Site.EndsWith("?") ? "&" : "?"; url = FileID == null ? $"{Site}/files/file/{FileName}/{sep}do=download&confirm=1&t=1&csrfKey={csrfKey}" : $"{Site}/files/file/{FileName}/{sep}do=download&r={FileID}&confirm=1&t=1&csrfKey={csrfKey}"; } if (Downloader.IsCloudFlareProtected) { using var driver = await Downloader.GetAuthedDriver(); var size = await driver.NavigateToAndDownload(new Uri(url), path, quickMode: quickMode, token); if (a.Size == 0 || size == 0 || a.Size == size) return true; Utils.Log($"Bad Header Content sizes {a.Size} vs {size}"); return false; } var streamResult = await GetDownloadAsync(new Uri(url)); if (streamResult.StatusCode != HttpStatusCode.OK) { Utils.ErrorThrow(new InvalidOperationException(), $"{Downloader.SiteName} servers reported an error for file: {FileID}"); } var contentType = streamResult.Content.Headers.ContentType; if (contentType!.MediaType != "application/json") { var headerVar = a.Size == 0 ? "1" : a.Size.ToString(); long headerContentSize = 0; if (streamResult.Content.Headers.Contains("Content-Length")) { headerVar = streamResult.Content.Headers.GetValues("Content-Length").FirstOrDefault(); if (headerVar != null) long.TryParse(headerVar, out headerContentSize); } if (a.Size != 0 && headerContentSize != 0 && a.Size != headerContentSize) { Utils.Log($"Bad Header Content sizes {a.Size} vs {headerContentSize}"); return false; } if (quickMode) { streamResult.Dispose(); return true; } await using (var os = await path.Create()) await using (var ins = await streamResult.Content.ReadAsStreamAsync()) { if (a.Size == 0) { Utils.Status($"Downloading {a.Name}"); await ins.CopyToAsync(os); } else { await ins.CopyToWithStatusAsync(headerContentSize, os, $"Downloading {a.Name}"); } } streamResult.Dispose(); return true; } // Sometimes LL hands back a json object telling us to wait until a certain time var times = (await streamResult.Content.ReadAsStringAsync()).FromJsonString(); var secs = times.Download - times.CurrentTime; for (int x = 0; x < secs; x++) { if (quickMode) return true; Utils.Status($"Waiting for {secs} at the request of {Downloader.SiteName}", Percent.FactoryPutInRange(x, secs)); Utils.Log($"Waiting for {secs} at the request of {Downloader.SiteName}, {secs - x} remaining"); await Task.Delay(1000); } streamResult.Dispose(); Utils.Status("Retrying download"); goto TOP; } private async Task GetCsrfKey(CancellationToken? token) { var csrfURL = string.IsNullOrWhiteSpace(FileID) ? $"{Site}/files/file/{FileName}/?do=download" : $"{Site}/files/file/{FileName}/?do=download&r={FileID}"; var html = await GetStringAsync(new Uri(csrfURL), token); var pattern = new Regex("(?<=csrfKey=).*(?=[&\"\'])|(?<=csrfKey: \").*(?=[&\"\'])"); var matches = pattern.Matches(html).Cast(); var csrfKey = matches.Where(m => m.Length == 32).Select(m => m.ToString()).FirstOrDefault(); return csrfKey; } private async Task DeleteOldDownloadCookies(Driver driver) { await driver.DeleteCookiesWhere(c => c.Name.StartsWith("ips4_downloads_delay_") && c.Value == "-1"); } private class WaitResponse { [JsonProperty("download")] public int Download { get; set; } [JsonProperty("currentTime")] public int CurrentTime { get; set; } } public override async Task Verify(Archive a, CancellationToken? token) { if (Downloader.IsCloudFlareProtected) await ((IWaitForWindowDownloader)Downloader).WaitForNextRequestWindow(); await using var tp = new TempFile(); var isValid = await ResolveDownloadStream(a, tp.Path, true, token: token); return isValid; } public override IDownloader GetDownloader() { return DownloadDispatcher.GetInstance(); } public override async Task<(Archive? Archive, TempFile NewFile)> FindUpgrade(Archive a, Func> downloadResolver) { await ((IWaitForWindowDownloader)Downloader).WaitForNextRequestWindow(); var files = await GetFilesInGroup(); var nl = new Levenshtein(); foreach (var newFile in files.OrderBy(f => nl.Distance(a.Name.ToLowerInvariant(), f.Name.ToLowerInvariant()))) { /* var existing = await downloadResolver(newFile); if (existing != default) return (newFile, new TempFile());*/ var tmp = new TempFile(); await DownloadDispatcher.PrepareAll(new[] {newFile.State}); if (await newFile.State.Download(newFile, tmp.Path)) { newFile.Size = tmp.Path.Size; var tmpHash = await tmp.Path.FileHashAsync(); if (tmpHash == null) return default; newFile.Hash = tmpHash.Value; return (newFile, tmp); } await tmp.DisposeAsync(); } return default; } public override async Task ValidateUpgrade(Hash srcHash, AbstractDownloadState newArchiveState) { return !string.IsNullOrWhiteSpace(FileID); } public async Task> GetFilesInGroup() { var token = new CancellationTokenSource(); token.CancelAfter(TimeSpan.FromMinutes(10)); var csrfKey = await GetCsrfKey(token.Token); if (csrfKey == null) { Utils.Log($"Could not load CRSF key"); return new List(); } var others = await GetHtmlAsync(new Uri($"{Site}/files/file/{FileName}?do=download&csrfKey={csrfKey}"), token.Token); var pairs = others.DocumentNode.SelectNodes("//a[@data-action='download']") .Select(item => (item.GetAttributeValue("href", ""), item.ParentNode.ParentNode.SelectNodes("//div//h4//span").First().InnerText)); List archives = new List(); foreach (var (url, name) in pairs) { var urlDecoded = HttpUtility.HtmlDecode(url); var ini = new[] {"[General]", $"directURL={urlDecoded}"}; var state = (AbstractDownloadState)(await DownloadDispatcher.ResolveArchive( string.Join("\n", ini).LoadIniString(), false)); if (state == null) continue; archives.Add(new Archive(state) {Name = name}); } return archives; } public override string GetManifestURL(Archive a) { return IsAttachment ? FullURL : $"{Site}/files/file/{FileName}/?do=download&r={FileID}"; } public async Task GetStringAsync(Uri uri, CancellationToken? token = null) { if (!Downloader.IsCloudFlareProtected) return await Downloader.AuthedClient.GetStringAsync(uri, token); using var driver = await Downloader.GetAuthedDriver(); await ((IWaitForWindowDownloader)Downloader).WaitForNextRequestWindow(); await DeleteOldDownloadCookies(driver); //var drivercookies = await Helpers.GetCookies("loverslab.com"); //var cookies = await ClientAPI.GetAuthInfo("loverslabcookies"); //await Helpers.IngestCookies(uri.ToString(), cookies); await driver.NavigateTo(uri, token); if ((token ?? CancellationToken.None).IsCancellationRequested) return ""; var source = await driver.GetSourceAsync(); /* Downloader.AuthedClient.Cookies.Add(drivercookies.Where(dc => dc.Name == "cf_clearance") .Select(dc => new Cookie { Name = dc.Name, Domain = dc.Domain, Value = dc.Value, Path = dc.Path }) .FirstOrDefault()); var source = await Downloader.AuthedClient.GetStringAsync(uri); */ return source; } public async Task GetHtmlAsync(Uri s, CancellationToken? token = null) { var body = await GetStringAsync(s, token); var doc = new HtmlDocument(); doc.LoadHtml(body); return doc; } public async Task GetDownloadAsync(Uri uri) { if (!Downloader.IsCloudFlareProtected) return await Downloader.AuthedClient.GetAsync(uri); using var driver = await Downloader.GetAuthedDriver(); TaskCompletionSource promise = new(); driver.DownloadHandler = uri1 => { promise.SetResult(uri); }; await driver.NavigateTo(uri); var url = await promise.Task; if (url == null) throw new Exception("No Url to download"); var location = await driver.GetLocation(); return await Helpers.GetClient(await Helpers.GetCookies(), location!.ToString()).GetAsync(uri); } public override string[] GetMetaIni() { if (IsAttachment) return new[] {"[General]", $"directURL={FullURL}"}; if (FileID == null) return new[] {"[General]", $"directURL={Site}/files/file/{FileName}"}; if (Site.EndsWith("?")) { return new[] { "[General]", $"directURL={Site}/files/file/{FileName}&do=download&r={FileID}&confirm=1&t=1" }; } return new[] { "[General]", $"directURL={Site}/files/file/{FileName}/?do=download&r={FileID}&confirm=1&t=1" }; } public virtual async Task LoadMetaData() { return false; } } } }