// Spludlow Software // Copyright © Samuel P. Ludlow 2020 All Rights Reserved // Distributed under the terms of the GNU General Public License version 3 // Distributed WITHOUT ANY WARRANTY; without implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE // https://www.spludlow.co.uk/LICENCE.TXT // The Spludlow logo is a registered trademark of Samuel P. Ludlow and may not be used without permission // v1.14 using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.IO; namespace Spludlow.Html { /// /// Some simple that helpers for HtmlAgilityPack /// that do the HTTP Get also /// Can be handy for automating downloads from web pages /// public class Web { public static string UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"; public static string[] DownloadLinks(string url) { return DownloadLinks(url, null, url); } public static string[] DownloadLinks(string url, string referer) { return DownloadLinks(url, referer, url); } public static string[] DownloadLinks(string url, string referer, string absoluteUrl) { string html = Spludlow.Net.Http.GetText(url, null, null, referer, UserAgent); return Spludlow.Html.Parse.ExtractLinks(html, absoluteUrl); } public static string[] DownloadImageLinks(string url) { return DownloadImageLinks(url, null, url); } public static string[] DownloadImageLinks(string url, string referer) { return DownloadImageLinks(url, referer, url); } public static string[] DownloadImageLinks(string url, string referer, string absoluteUrl) { string html = Spludlow.Net.Http.GetText(url, null, null, referer, UserAgent); return Spludlow.Html.Parse.ExtractImages(html, absoluteUrl); } public static string[] DownloadLinksFilter(string url, string referer, string filter) { string[] completeLinks = DownloadLinks(url, referer); if (filter == null) return completeLinks; string[] filters = Spludlow.Text.Split(filter, ',', true, false); List links = new List(); foreach (string link in completeLinks) { string lowLink = link.ToLower(); foreach (string filterWord in filters) { if (lowLink.Contains(filterWord) == true) { links.Add(link); break; } } } return links.ToArray(); } public static string[] DownloadFiles(string url, string referer, string directory, string filter) { string[] links = DownloadLinksFilter(url, referer, filter); return DownloadFiles(links, url, directory); } public static string[] DownloadFiles(string[] links, string referer, string directory) { List result = new List(); foreach (string link in links) { string filename = directory + @"\" + GetFilename(link); filename = Spludlow.Io.Files.UniqueExistingName(filename); if (link.StartsWith("http") == true) { try { Spludlow.Net.Http.GetDataFile(link, filename, null, null, referer, UserAgent, false); result.Add(link); } catch (Exception ee) { Spludlow.Log.Warning("Spludlow.Html.Web; Download File Error:\t" + link, ee); } } } return result.ToArray(); } public static string GetFilename(string url) { string name = Path.GetFileName(url); if (name == "") name = Path.GetDirectoryName(url); return Spludlow.Io.Paths.LegalFileName(name); } } }