From eb1d5967111277562b5da48c1c2f91e37aee0930 Mon Sep 17 00:00:00 2001 From: erri120 Date: Thu, 9 Apr 2020 14:10:46 +0200 Subject: [PATCH] Better data scrapping for loverslab --- .../Downloaders/LoversLabDownloader.cs | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/Wabbajack.Lib/Downloaders/LoversLabDownloader.cs b/Wabbajack.Lib/Downloaders/LoversLabDownloader.cs index e3bd5740..1c8b295e 100644 --- a/Wabbajack.Lib/Downloaders/LoversLabDownloader.cs +++ b/Wabbajack.Lib/Downloaders/LoversLabDownloader.cs @@ -1,6 +1,7 @@ using System; using System.Linq; using System.Threading.Tasks; +using System.Web; using HtmlAgilityPack; using Wabbajack.Common; using Wabbajack.Lib.WebAutomation; @@ -41,18 +42,37 @@ namespace Wabbajack.Lib.Downloaders var doc = new HtmlDocument(); doc.LoadHtml(html); var node = doc.DocumentNode; - Name = node.SelectNodes("//h1[@class='ipsType_pageTitle ipsContained_container']/span")?.First().InnerHtml; - Author = node + + Name = HttpUtility.HtmlDecode(node + .SelectNodes( + "//h1[@class='ipsType_pageTitle ipsContained_container']/span[@class='ipsType_break ipsContained']") + ?.First().InnerHtml); + + Author = HttpUtility.HtmlDecode(node .SelectNodes( "//div[@class='ipsBox_alt']/div[@class='ipsPhotoPanel ipsPhotoPanel_tiny ipsClearfix ipsSpacer_bottom']/div/p[@class='ipsType_reset ipsType_large ipsType_blendLinks']/a") - ?.First().InnerHtml; - Version = node.SelectNodes("//section/h2[@class='ipsType_sectionHead']/span[@data-role='versionTitle']") + ?.First().InnerHtml); + + Version = HttpUtility.HtmlDecode(node + .SelectNodes("//section/h2[@class='ipsType_sectionHead']/span[@data-role='versionTitle']") ? - .First().InnerHtml; - ImageURL = node + .First().InnerHtml); + + ImageURL = HttpUtility.HtmlDecode(node .SelectNodes( "//div[@class='ipsBox ipsSpacer_top ipsSpacer_double']/section/div[@class='ipsPad ipsAreaBackground']/div[@class='ipsCarousel ipsClearfix']/div[@class='ipsCarousel_inner']/ul[@class='cDownloadsCarousel ipsClearfix']/li[@class='ipsCarousel_item ipsAreaBackground_reset ipsPad_half']/span[@class='ipsThumb ipsThumb_medium ipsThumb_bg ipsCursor_pointer']") - ?.First().GetAttributeValue("data-fullurl", "none"); + ?.First().GetAttributeValue("data-fullurl", "none")); + + if (!string.IsNullOrWhiteSpace(ImageURL)) + return true; + + ImageURL = HttpUtility.HtmlDecode(node + .SelectNodes( + "//article[@class='ipsColumn ipsColumn_fluid']/div[@class='ipsPad']/section/div[@class='ipsType_richText ipsContained ipsType_break']/p/a/img[@class='ipsImage ipsImage_thumbnailed']") + ?.First().GetAttributeValue("src", "")); + if (string.IsNullOrWhiteSpace(ImageURL)) + ImageURL = ""; + return true; } }