Merge pull request #695 from erri120/loverslab-meta-fixes

Better data scrapping for loverslab
This commit is contained in:
Timothy Baldridge 2020-04-09 14:24:21 -06:00 committed by GitHub
commit d6e4ca6b1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,7 @@
using System; using System;
using System.Linq; using System.Linq;
using System.Threading.Tasks; using System.Threading.Tasks;
using System.Web;
using HtmlAgilityPack; using HtmlAgilityPack;
using Wabbajack.Common; using Wabbajack.Common;
using Wabbajack.Lib.WebAutomation; using Wabbajack.Lib.WebAutomation;
@ -41,18 +42,37 @@ namespace Wabbajack.Lib.Downloaders
var doc = new HtmlDocument(); var doc = new HtmlDocument();
doc.LoadHtml(html); doc.LoadHtml(html);
var node = doc.DocumentNode; var node = doc.DocumentNode;
Name = node.SelectNodes("//h1[@class='ipsType_pageTitle ipsContained_container']/span")?.First().InnerHtml;
Author = node Name = HttpUtility.HtmlDecode(node
.SelectNodes(
"//h1[@class='ipsType_pageTitle ipsContained_container']/span[@class='ipsType_break ipsContained']")
?.First().InnerHtml);
Author = HttpUtility.HtmlDecode(node
.SelectNodes( .SelectNodes(
"//div[@class='ipsBox_alt']/div[@class='ipsPhotoPanel ipsPhotoPanel_tiny ipsClearfix ipsSpacer_bottom']/div/p[@class='ipsType_reset ipsType_large ipsType_blendLinks']/a") "//div[@class='ipsBox_alt']/div[@class='ipsPhotoPanel ipsPhotoPanel_tiny ipsClearfix ipsSpacer_bottom']/div/p[@class='ipsType_reset ipsType_large ipsType_blendLinks']/a")
?.First().InnerHtml; ?.First().InnerHtml);
Version = node.SelectNodes("//section/h2[@class='ipsType_sectionHead']/span[@data-role='versionTitle']")
Version = HttpUtility.HtmlDecode(node
.SelectNodes("//section/h2[@class='ipsType_sectionHead']/span[@data-role='versionTitle']")
? ?
.First().InnerHtml; .First().InnerHtml);
ImageURL = node
ImageURL = HttpUtility.HtmlDecode(node
.SelectNodes( .SelectNodes(
"//div[@class='ipsBox ipsSpacer_top ipsSpacer_double']/section/div[@class='ipsPad ipsAreaBackground']/div[@class='ipsCarousel ipsClearfix']/div[@class='ipsCarousel_inner']/ul[@class='cDownloadsCarousel ipsClearfix']/li[@class='ipsCarousel_item ipsAreaBackground_reset ipsPad_half']/span[@class='ipsThumb ipsThumb_medium ipsThumb_bg ipsCursor_pointer']") "//div[@class='ipsBox ipsSpacer_top ipsSpacer_double']/section/div[@class='ipsPad ipsAreaBackground']/div[@class='ipsCarousel ipsClearfix']/div[@class='ipsCarousel_inner']/ul[@class='cDownloadsCarousel ipsClearfix']/li[@class='ipsCarousel_item ipsAreaBackground_reset ipsPad_half']/span[@class='ipsThumb ipsThumb_medium ipsThumb_bg ipsCursor_pointer']")
?.First().GetAttributeValue("data-fullurl", "none"); ?.First().GetAttributeValue("data-fullurl", "none"));
if (!string.IsNullOrWhiteSpace(ImageURL))
return true;
ImageURL = HttpUtility.HtmlDecode(node
.SelectNodes(
"//article[@class='ipsColumn ipsColumn_fluid']/div[@class='ipsPad']/section/div[@class='ipsType_richText ipsContained ipsType_break']/p/a/img[@class='ipsImage ipsImage_thumbnailed']")
?.First().GetAttributeValue("src", ""));
if (string.IsNullOrWhiteSpace(ImageURL))
ImageURL = "";
return true; return true;
} }
} }