// Spludlow Software
// Copyright © Samuel P. Ludlow 2020 All Rights Reserved
// Distributed under the terms of the GNU General Public License version 3
// Distributed WITHOUT ANY WARRANTY; without implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE
// https://www.spludlow.co.uk/LICENCE.TXT
// The Spludlow logo is a registered trademark of Samuel P. Ludlow and may not be used without permission
// v1.14
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
namespace Spludlow.Html
{
///
/// Some simple that helpers for HtmlAgilityPack
/// that do the HTTP Get also
/// Can be handy for automating downloads from web pages
///
public class Web
{
public static string UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36";
public static string[] DownloadLinks(string url)
{
return DownloadLinks(url, null, url);
}
public static string[] DownloadLinks(string url, string referer)
{
return DownloadLinks(url, referer, url);
}
public static string[] DownloadLinks(string url, string referer, string absoluteUrl)
{
string html = Spludlow.Net.Http.GetText(url, null, null, referer, UserAgent);
return Spludlow.Html.Parse.ExtractLinks(html, absoluteUrl);
}
public static string[] DownloadImageLinks(string url)
{
return DownloadImageLinks(url, null, url);
}
public static string[] DownloadImageLinks(string url, string referer)
{
return DownloadImageLinks(url, referer, url);
}
public static string[] DownloadImageLinks(string url, string referer, string absoluteUrl)
{
string html = Spludlow.Net.Http.GetText(url, null, null, referer, UserAgent);
return Spludlow.Html.Parse.ExtractImages(html, absoluteUrl);
}
public static string[] DownloadLinksFilter(string url, string referer, string filter)
{
string[] completeLinks = DownloadLinks(url, referer);
if (filter == null)
return completeLinks;
string[] filters = Spludlow.Text.Split(filter, ',', true, false);
List links = new List();
foreach (string link in completeLinks)
{
string lowLink = link.ToLower();
foreach (string filterWord in filters)
{
if (lowLink.Contains(filterWord) == true)
{
links.Add(link);
break;
}
}
}
return links.ToArray();
}
public static string[] DownloadFiles(string url, string referer, string directory, string filter)
{
string[] links = DownloadLinksFilter(url, referer, filter);
return DownloadFiles(links, url, directory);
}
public static string[] DownloadFiles(string[] links, string referer, string directory)
{
List result = new List();
foreach (string link in links)
{
string filename = directory + @"\" + GetFilename(link);
filename = Spludlow.Io.Files.UniqueExistingName(filename);
if (link.StartsWith("http") == true)
{
try
{
Spludlow.Net.Http.GetDataFile(link, filename, null, null, referer, UserAgent, false);
result.Add(link);
}
catch (Exception ee)
{
Spludlow.Log.Warning("Spludlow.Html.Web; Download File Error:\t" + link, ee);
}
}
}
return result.ToArray();
}
public static string GetFilename(string url)
{
string name = Path.GetFileName(url);
if (name == "")
name = Path.GetDirectoryName(url);
return Spludlow.Io.Paths.LegalFileName(name);
}
}
}