// Spludlow Software // Copyright © Samuel P. Ludlow 2020 All Rights Reserved // Distributed under the terms of the GNU General Public License version 3 // Distributed WITHOUT ANY WARRANTY; without implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE // https://www.spludlow.co.uk/LICENCE.TXT // The Spludlow logo is a registered trademark of Samuel P. Ludlow and may not be used without permission // v1.14 using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Drawing; using System.Data; using System.IO; namespace Spludlow { public class WebLogs { // Beware: Web logs can contain security sensitive information! // Access to the path 'C:\inetpub\logs' is denied. // Add SpludlowGroup with default (Add & List permissions) // Tick replace permisioons on all children or do indivdually as below // Access to the path 'C:\inetpub\logs\LogFiles\W3SVC1' is denied. // Add SpludlowGroup with default (Add & List permissions) // (Do for each SITE directory you want to give permission) // cs(Referer) does not seem to be enabled by default before Windows 10/Server 2016 public static DataSet QuerySiteIds() { DataTable table = null; foreach (string host in Spludlow.Config.Hosts()) { DataTable hostTable = Spludlow.Admin.WebQuerySites(host).Tables[0]; if (table == null) { table = new DataTable(); table.Columns.Add("Host", typeof(string)); foreach (DataColumn column in hostTable.Columns) table.Columns.Add(column.ColumnName, column.DataType); } foreach (DataRow hostRow in hostTable.Rows) { table.ImportRow(hostRow); table.Rows[table.Rows.Count - 1]["Host"] = host; } } DataSet dataSet = new DataSet(); dataSet.Tables.Add(table); Spludlow.Log.Report("WebLogs QuerySiteIds", dataSet); return dataSet; } public static void ClearLogFiles(string rootLogDirectory, DateTime beforeDate) { foreach (string logDirectory in Directory.GetDirectories(rootLogDirectory)) { foreach (string logFilename in Directory.GetFiles(logDirectory, "*.log")) { if (File.GetLastWriteTime(logFilename) < beforeDate) File.Delete(logFilename); } } } public static DataSet MirrorLogs(string host, int[] sitesIds, string localTargetDirectory) { localTargetDirectory = localTargetDirectory + @"\00-RAW"; DataTable table = Spludlow.Data.TextTable.ReadText(new string[] { "Host SiteId SiteName Filename Length LastWriteTime Status", "String Int32 String String UInt64 DateTime String", }); string hostDirectory = localTargetDirectory + @"\" + host; if (Directory.Exists(hostDirectory) == false) Directory.CreateDirectory(hostDirectory); DataTable sitesTable = Spludlow.Admin.WebQuerySites(host).Tables[0]; List siteIdList = new List(sitesIds); foreach (DataRow siteRow in sitesTable.Rows) { int siteId = (int)(long)siteRow["SiteId"]; if (siteIdList.Contains(siteId) == false) continue; string siteName = (string)siteRow["SiteName"]; string logDirectory = (string)siteRow["SiteLogFileDirectory"] + @"\W3SVC" + siteId; if (Spludlow.RemoteIO.DirectoryExists(host, logDirectory) == false) { Spludlow.Log.Warning("MirrorLogs; Log Directory does not exists: " + logDirectory); continue; } DataTable logFilesTable = Spludlow.Io.DirectoryList.List(host, logDirectory, false, false).Tables[0]; string siteDirectory = hostDirectory + @"\" + siteName; if (Directory.Exists(siteDirectory) == false) Directory.CreateDirectory(siteDirectory); foreach (DataRow logFileRow in logFilesTable.Rows) { string remoteFilename = (string)logFileRow["Path"]; if (remoteFilename.EndsWith(".log") == false) continue; string logName = Path.GetFileName(remoteFilename); string targetFilename = siteDirectory + @"\" + logName; string status = ""; if (File.Exists(targetFilename) == true && ((DateTime)logFileRow["LastWriteTime"] == File.GetLastWriteTime(targetFilename))) { status = "exists"; } else { try { Spludlow.Call.Download(host, remoteFilename, targetFilename); File.SetLastWriteTime(targetFilename, (DateTime)logFileRow["LastWriteTime"]); File.SetLastAccessTime(targetFilename, (DateTime)logFileRow["LastAccessTime"]); File.SetCreationTime(targetFilename, (DateTime)logFileRow["CreationTime"]); } catch (Exception ee) { status = ee.Message; } table.Rows.Add(host, siteId, siteName, logName, (ulong)logFileRow["Length"], logFileRow["LastWriteTime"], status); } } } DataSet dataSet = new DataSet(); dataSet.Tables.Add(table); Spludlow.Log.Report("WebLogs Mirror: " + host, dataSet); return dataSet; } public static void JoinLogs(string localRootDirectory, DateTime startDate) { JoinLogs(localRootDirectory, startDate, DateTime.MaxValue); } public static void JoinLogs(string localRootDirectory, DateTime startDate, DateTime beforeDate) { string sourceDirectory = localRootDirectory + @"\00-RAW"; string targetDirectory = localRootDirectory + @"\01-JOIN"; if (Directory.Exists(targetDirectory) == true) Directory.Delete(targetDirectory, true); Directory.CreateDirectory(targetDirectory); foreach (string hostDirectory in Directory.GetDirectories(sourceDirectory)) { string host = Path.GetFileName(hostDirectory); foreach (string siteDirectory in Directory.GetDirectories(hostDirectory)) { string site = Path.GetFileName(siteDirectory); DataSet dataSet = Read(siteDirectory, startDate, beforeDate); if (dataSet == null) continue; DataTable table = dataSet.Tables[0]; string filename = targetDirectory + @"\" + host + "_" + site + ".txt"; Spludlow.Data.TextTable.Write(filename, table); } } } public static DataSet Read(string directory, DateTime startDate, DateTime beforeDate) { DataTable table = null; foreach (string filename in Directory.GetFiles(directory, "*.log")) { DateTime fileDate = File.GetLastWriteTime(filename); if (startDate != DateTime.MinValue && fileDate < startDate) continue; if (beforeDate != DateTime.MaxValue && fileDate >= beforeDate) continue; try { using (FileStream stream = new FileStream(filename, FileMode.Open)) { using (StreamReader reader = new StreamReader(stream, Encoding.UTF8)) { string[] parts; string line; while ((line = reader.ReadLine()) != null) { if (line.StartsWith("#Fields:") == true && table == null) { table = new DataTable(); table.Columns.Add("DateTime", typeof(DateTime)); parts = line.Split(new char[] { ' ' }); for (int index = 1; index < parts.Length; ++index) { string columnName = parts[index]; columnName = columnName.Replace("(", "-"); columnName = columnName.Replace(")", ""); table.Columns.Add(columnName, typeof(string)); } } if (line.StartsWith("#") == true) continue; if (table == null) throw new ApplicationException("Table not defined"); parts = line.Split(new char[] { ' ' }); if (parts.Length != table.Columns.Count - 1) throw new ApplicationException("Parts: " + parts.Length + ", Columns: " + table.Columns.Count); DataRow row = table.NewRow(); for (int index = 0; index < parts.Length; ++index) { row[index + 1] = parts[index]; } row["DateTime"] = DateTime.Parse(row["date"] + " " + row["time"]); if (table.Columns.Contains("cs-User-Agent") == true) row["cs-User-Agent"] = System.Web.HttpUtility.UrlDecode((string)row["cs-User-Agent"]); table.Rows.Add(row); } } } } catch (IOException ee) { if ((uint)ee.HResult != 0x80070020) throw ee; Spludlow.Log.Warning("File Locked: " + filename); // Should be doing in mirror running remote !!! } } if (table == null) return null; DataSet dataSet = new DataSet(); dataSet.Tables.Add(table); return dataSet; } public static void CleanLogs(string localRootDirectory) { string sourceDirectory = localRootDirectory + @"\01-JOIN"; string targetDirectory = localRootDirectory + @"\02-CLEAN"; if (Directory.Exists(targetDirectory) == true) Directory.Delete(targetDirectory, true); Directory.CreateDirectory(targetDirectory); foreach (string sourceFilename in Directory.GetFiles(sourceDirectory, "*.txt")) { string targetFilename = targetDirectory + @"\" + Path.GetFileName(sourceFilename); DataTable table = Spludlow.Data.TextTable.ReadFile(sourceFilename); table = CleanLogs(table); Spludlow.Data.TextTable.Write(targetFilename, table); } } public static DataTable CleanLogs(DataTable table) { DataTable resultTable = Spludlow.Data.TextTable.ReadText(new string[] { "DateTime Status Real IP Country Path", "DateTime String Boolean String String String", }); Spludlow.Net.IPWhoIsRange range = new Spludlow.Net.IPWhoIsRange(); foreach (DataRow row in table.Rows) { string ip = (string)row["c-ip"]; if (ip.Contains(":") == true || Spludlow.Net.IP.IsPrivate(ip) == true) continue; string country = ""; //try //{ // country = range.CountryCode(ip); //} //catch (Exception ee) //{ // Spludlow.Log.Warning("Bad CountryCode for IP: " + ip, ee); //} DateTime dateTime = (DateTime)row["DateTime"]; string status = (string)row["sc-status"]; string agent = (string)row["cs-User-Agent"]; bool realAgent = Spludlow.Net.Http.IsUserAgentReal(agent); string path = (string)row["cs-uri-stem"]; resultTable.Rows.Add(dateTime, status, realAgent, ip, country, path); } return resultTable; } public static void Report(string localRootDirectory) { string directory = localRootDirectory + @"\02-CLEAN"; string targetDirectory = localRootDirectory + @"\03-REPORT"; if (Directory.Exists(targetDirectory) == true) Directory.Delete(targetDirectory, true); DataSet dataSet = new DataSet(); foreach (string filename in Directory.GetFiles(directory, "*.txt")) { string name = Path.GetFileNameWithoutExtension(filename); foreach (Spludlow.Periods.PeriodSize periodSize in new Spludlow.Periods.PeriodSize[] { Periods.PeriodSize.Quarter, Periods.PeriodSize.Month, Periods.PeriodSize.Week, Periods.PeriodSize.Day }) { DataTable table; table = Report(filename, periodSize, true); table.TableName = name + "_Human_" + periodSize.ToString(); dataSet.Tables.Add(table); table = Report(filename, periodSize, false); table.TableName = name + "_Bot_" + periodSize.ToString(); dataSet.Tables.Add(table); } } Spludlow.Data.TextTable.WriteDirectory(targetDirectory, dataSet); Spludlow.Log.Report("WebLogs Report", dataSet); } public static DataTable Report(string sourceFilename, Spludlow.Periods.PeriodSize periodSize, bool real) { DataTable table = Spludlow.Data.TextTable.ReadFile(sourceFilename); List allStatusCodes = new List(); DateTime minDate = DateTime.MaxValue; DateTime maxDate = DateTime.MinValue; foreach (DataRow row in table.Rows) { string status = (string)row["Status"]; if (allStatusCodes.Contains(status) == false) allStatusCodes.Add(status); DateTime dateTime = (DateTime)row["DateTime"]; if (dateTime < minDate) minDate = dateTime; if (dateTime > maxDate) maxDate = dateTime; } allStatusCodes.Sort(); allStatusCodes.Remove("200"); allStatusCodes.Insert(0, "200"); DateTime[] range = Spludlow.Periods.Range(periodSize, minDate, maxDate); DataTable resultTable = new DataTable(); resultTable.Columns.Add("PeriodDate", typeof(DateTime)); resultTable.PrimaryKey = new DataColumn[] { resultTable.Columns["PeriodDate"] }; foreach (string status in allStatusCodes) resultTable.Columns.Add(status, typeof(int)); foreach (DateTime date in range) { DataRow row = resultTable.NewRow(); row[0] = date; for (int colIndex = 1; colIndex < resultTable.Columns.Count; ++colIndex) row[colIndex] = 0; resultTable.Rows.Add(row); } foreach (string status in allStatusCodes) { Dictionary> ipPeriods = new Dictionary>(); string selectText = "Real = " + real.ToString() + " AND Status = '" + status + "'"; foreach (DataRow row in table.Select(selectText)) { string ip = (string)row["IP"]; DateTime dateTime = (DateTime)row["DateTime"]; DateTime periodDateTime = Spludlow.Periods.RoundDown(periodSize, dateTime); if (ipPeriods.ContainsKey(periodDateTime) == false) ipPeriods.Add(periodDateTime, new List()); if (ipPeriods[periodDateTime].Contains(ip) == false) ipPeriods[periodDateTime].Add(ip); } foreach (DateTime periodDateTime in ipPeriods.Keys) { DataRow resultRow = resultTable.Rows.Find(periodDateTime); resultRow[status] = ipPeriods[periodDateTime].Count; } } return resultTable; } public static void Chart(string localRootDirectory) { string sourceDirectory = localRootDirectory + @"\03-REPORT"; DataSet dataSet = Spludlow.Data.TextTable.ReadDirectory(sourceDirectory); Spludlow.Log.Report("Chart", dataSet); Spludlow.Printing.PrintDoc doc = new Printing.PrintDoc("A4*"); foreach (DataTable table in dataSet.Tables) { doc.NewPage(); int width = table.Columns.Count - 1; int limit = 64 / width; int startIndex = table.Rows.Count - limit; if (startIndex < 0) startIndex = 0; DataTable limitTable = table.Clone(); for (int index = startIndex; index < table.Rows.Count; ++index) limitTable.ImportRow(table.Rows[index]); Spludlow.Drawing.BarChart barChart = new Drawing.BarChart(doc); float boarder = 5; barChart.Plot(boarder, boarder, doc.Width - boarder * 2, doc.Height - boarder * 2, FixReportFirstDateColumn(limitTable), true); //break; } Spludlow.Printing.Printer.Print(doc); } public static DataTable FixReportFirstDateColumn(DataTable table) { string periodText = table.TableName; int index = periodText.LastIndexOf("_"); periodText = periodText.Substring(index + 1); Spludlow.Periods.PeriodSize periodSize = (Spludlow.Periods.PeriodSize)Enum.Parse(typeof(Spludlow.Periods.PeriodSize), periodText); DataTable newTable = new DataTable(table.TableName); newTable.Columns.Add(table.Columns[0].ColumnName, typeof(string)); for (int colIndex = 1; colIndex < table.Columns.Count; ++colIndex) newTable.Columns.Add(table.Columns[colIndex].ColumnName, table.Columns[colIndex].DataType); foreach (DataRow row in table.Rows) { DataRow newRow = newTable.NewRow(); newRow[0] = Spludlow.Periods.PeriodNameRaw(periodSize, (DateTime)row[0]); for (int colIndex = 1; colIndex < table.Columns.Count; ++colIndex) newRow[colIndex] = row[colIndex]; newTable.Rows.Add(newRow); } return newTable; } public static void IPReports(string directory) { foreach (string sourceFilename in Directory.GetFiles(directory, "*.txt")) { string name = Path.GetFileNameWithoutExtension(sourceFilename); if (name.Contains("_") == true) // Source files will not have underscores, all generated files will continue; DataTable table = Spludlow.Data.TextTable.ReadFile(sourceFilename); table = IPReport(table); string targetFilename = directory + @"\" + name + "_IPReport.txt"; Spludlow.Data.TextTable.Write(targetFilename, table); Spludlow.Log.Report("IP Report" + name, table); } } public static DataTable IPReport(DataTable webLogsTable) { //DataTable table = Spludlow.Data.TextTable.ReadFile(@"D:\WebLogs.txt"); DataTable table = Spludlow.Data.ADO.CountOccurrence(webLogsTable, "c-ip"); table.Columns.Add("CountryName", typeof(string)); string[] columnNames = new string[] { "country", "descr", "netname", "inetnum" }; //, "Country", "Customer", "NetName" }; foreach (string columnName in columnNames) table.Columns.Add(columnName, typeof(string)); foreach (DataRow row in table.Rows) { string ip = (string)row["c-ip"]; string whoisText = Spludlow.Net.IpWhoIs.WhoIs(ip); if (whoisText == null) continue; Dictionary> info = Spludlow.Net.IpWhoIs.Parse(whoisText); foreach (string columnName in columnNames) { if (info.ContainsKey(columnName) == true) row[columnName] = info[columnName][0]; } if (row.IsNull("country") == false) { string countryCode = (string)row["country"]; string countryName = Spludlow.ISO3166.Alpha2EnglishName(countryCode); row["CountryName"] = countryName; } } return table; //Spludlow.Data.TextTable.Write(@"D:\WebLogsIPCount.txt", table); } // DateTime cs-method cs-uri-stem cs-uri-query c-ip cs-User-Agent cs-Referer sc-status // cs-User-Agent public static void ColumnAnalysis(string localRootDirectory) { //string[] columnNames = new string[] { "cs-method", "cs-uri-stem", "c-ip", "cs-User-Agent", "cs-Referer", "sc-status" }; string[] columnNames = new string[] { "c-ip" }; foreach (string hostDirectory in Directory.GetDirectories(localRootDirectory)) { string host = Path.GetFileName(hostDirectory); foreach (string filename in Directory.GetFiles(hostDirectory, "*.txt")) { string name = Path.GetFileName(filename); DataTable table = Spludlow.Data.TextTable.ReadFile(filename); foreach (string columnName in columnNames) { if (table.Columns.Contains(columnName) == false) { Spludlow.Log.Warning("ColumnAnalysis: " + name + ", column not found: " + columnName); continue; } DataTable countTable = Spludlow.Data.ADO.CountOccurrence(table, columnName); if (columnName == "sc-status") { countTable.Columns.Add("HttpStatus", typeof(string)); foreach (DataRow row in countTable.Rows) row["HttpStatus"] = Spludlow.Net.Http.StatusCode(Int32.Parse((string)row["sc-status"])); } if (columnName == "cs-User-Agent") { countTable.Columns.Add("UserLength", typeof(int)); countTable.Columns.Add("Real", typeof(bool)); countTable.Columns.Add("User0", typeof(string)); countTable.Columns.Add("User1", typeof(string)); foreach (DataRow row in countTable.Rows) { string agent = ((string)row["cs-User-Agent"]).Trim(); row["UserLength"] = agent.Length; int index = agent.IndexOf(" "); if (index == -1) { row["User0"] = agent; continue; } bool real = Spludlow.Net.Http.IsUserAgentReal(agent); if (real == true) row["Real"] = true; row["User0"] = agent.Substring(0, index).Trim(); row["User1"] = agent.Substring(index + 1).Trim(); } } Spludlow.Log.Report("ColumnAnalysis: " + name + " " + columnName, countTable); } } } } public static void IPAnalysis(string localRootDirectory) { foreach (string hostDirectory in Directory.GetDirectories(localRootDirectory)) { string host = Path.GetFileName(hostDirectory); foreach (string filename in Directory.GetFiles(hostDirectory, "*.txt")) { string name = Path.GetFileName(filename); DataTable table = Spludlow.Data.TextTable.ReadFile(filename); DataTable tempTable = table.Clone(); foreach (DataRow row in table.Rows) { string ip = (string)row["c-ip"]; if (ip.Contains(":") || Spludlow.Net.IP.IsPrivate(ip) == true) continue; tempTable.ImportRow(row); } table = tempTable; DataTable resultTable = IPReport(table); Spludlow.Log.Report("IPAnalysis: " + name, resultTable); } } } // Hits above ok // Detail // Time Period // IP real or bot (remote all private) // Status // individual IP // paths in that IP public static void WebLogReport(DataTable table, Spludlow.Periods.PeriodSize periodSize) { DataTable resultTable = Spludlow.Data.TextTable.ReadText(new string[] { //"DateTime Path Country Status IP Agent", //"DateTime String String String String String", "PeriodDate Country IP", "DateTime String String", }); DataTable hitsTable = Spludlow.Data.TextTable.ReadText(new string[] { "PeriodDate Hits", "DateTime Int32", }); DataTable removeTable = resultTable.Clone(); Dictionary> periodIps = new Dictionary>(); foreach (DataRow row in table.Rows) { bool remove = false; string status = (string)row["sc-status"]; if (status != "200") remove = true; string ip = (string)row["c-ip"]; if (ip.Contains(":") == true || Spludlow.Net.IP.IsPrivate(ip) == true) remove = true; string agent = (string)row["cs-User-Agent"]; bool realAgent = Spludlow.Net.Http.IsUserAgentReal(agent); if (realAgent == false) remove = true; if (remove == true) continue; //string path = (string)row["cs-uri-stem"]; DateTime dateTime = (DateTime)row["DateTime"]; DateTime periodDateTime = Spludlow.Periods.RoundDown(periodSize, dateTime); if (periodIps.ContainsKey(periodDateTime) == false) periodIps.Add(periodDateTime, new List()); if (periodIps[periodDateTime].Contains(ip) == false) periodIps[periodDateTime].Add(ip); //if (remove == false) // resultTable.Rows.Add(dateTime, path, country, status, ip, agent); //else // removeTable.Rows.Add(dateTime, path, country, status, ip, agent); } // foreach (DateTime periodDateTime in periodIps.Keys) { foreach (string ip in periodIps[periodDateTime]) { string country = Spludlow.Net.IpWhoIs.CountryCode(ip); resultTable.Rows.Add(periodDateTime, country, ip); } hitsTable.Rows.Add(periodDateTime, periodIps[periodDateTime].Count); } Spludlow.Log.Report("resultTable", resultTable); Spludlow.Log.Report("hitsTable", hitsTable); //Spludlow.Log.Report("removeTable", removeTable); } } }