// Spludlow Software
// Copyright © Samuel P. Ludlow 2020 All Rights Reserved
// Distributed under the terms of the GNU General Public License version 3
// Distributed WITHOUT ANY WARRANTY; without implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE
// https://www.spludlow.co.uk/LICENCE.TXT
// The Spludlow logo is a registered trademark of Samuel P. Ludlow and may not be used without permission
// v1.14
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Data;
namespace Spludlow.Data
{
public class BasicCompanyData
{
// http://download.companieshouse.gov.uk/en_output.html
private static Encoding encoding = Encoding.UTF8;
///
/// Read table from part CSV file, ditch unwanted columns and fix datatypes based on mapping table
///
public static DataTable CleanTable(string csvFilename, Encoding csvEncoding, DataTable columnMappingTable)
{
// Fix empty datatypes to String
foreach (DataRow row in columnMappingTable.Rows)
{
if (row.IsNull("TargetName") == false && row.IsNull("DataType") == true)
row["DataType"] = "String";
}
// Create Result table
DataTable resultTable = new DataTable();
foreach (DataRow row in columnMappingTable.Rows)
{
if (row.IsNull("TargetName") == true)
continue;
resultTable.Columns.Add((string)row["TargetName"], Type.GetType("System." + (string)row["DataType"], true));
}
DataTable sourceTable = Spludlow.Data.CSV.Read(csvFilename, csvEncoding);
// Copy rows to target table based on mapping table
foreach (DataRow sourceRow in sourceTable.Rows)
{
DataRow targetRow = resultTable.NewRow();
foreach (DataRow mapRow in columnMappingTable.Rows)
{
if (mapRow.IsNull("TargetName") == true)
continue;
string dataType = (string)mapRow["DataType"];
string sourceName = (string)mapRow["SourceName"];
string targetName = (string)mapRow["TargetName"];
string textData = (string)sourceRow[sourceName];
object result = DBNull.Value;
switch (dataType)
{
case "DateTime":
if (textData.Length > 0)
result = Convert.ToDateTime(textData);
break;
case "Int32":
if (textData.Length > 0)
result = Convert.ToInt32(textData);
break;
case "String":
result = textData;
break;
default:
throw new ApplicationException("CombineTables; Unsupported datatype: " + dataType);
}
targetRow[targetName] = result;
}
resultTable.Rows.Add(targetRow);
}
return resultTable;
}
///
/// Combine the 4 Normalized SIC codes into a single table
///
public static void CombineSIC(string normalizationDirectory, string targetFilename)
{
int sicCount = 4;
DataTable resultTable = new DataTable();
resultTable.Columns.Add("SICCode", typeof(int));
resultTable.Columns.Add("SICText", typeof(string));
resultTable.PrimaryKey = new DataColumn[] { resultTable.Columns[0] };
for (int sicIndex = 0; sicIndex < sicCount; ++sicIndex)
{
DataTable table = Spludlow.Data.TextTable.ReadFile(normalizationDirectory + @"\SicText" + (sicIndex + 1).ToString() + ".txt", encoding);
foreach (DataRow sourceRow in table.Rows)
{
string sicText = (string)sourceRow[1];
if (sicText == "")
continue;
int sicCode = 0;
if (sicText != "None Supplied")
{
int index = sicText.IndexOf("-");
if (index == -1)
throw new ApplicationException("CombineSIC; Bad SIC Text: " + sicText);
sicCode = Int32.Parse(sicText.Substring(0, index).Trim());
sicText = sicText.Substring(index + 1).Trim();
}
DataRow resultRow = resultTable.Rows.Find(sicCode);
if (resultRow == null)
resultTable.Rows.Add(new object[] { sicCode, sicText });
}
}
DataView view = new DataView(resultTable);
view.Sort = "SICCode";
Spludlow.Data.TextTable.Write(targetFilename, view, encoding);
}
///
/// Create Final lookup tables from nornalized tables
///
public static void FinalNormalTables(string normalizationDirectory, string targetDirectory)
{
MakeFinalTable4CharPK(normalizationDirectory + @"\AccountCategory.txt", targetDirectory, "AccountCategories", "AccountCategoryId");
MakeFinalTable4CharPK(normalizationDirectory + @"\CompanyCategory.txt", targetDirectory, "CompanyCategories", "CompanyCategoryId");
MakeFinalTable4CharPK(normalizationDirectory + @"\CompanyStatus.txt", targetDirectory, "CompanyStatuses", "CompanyStatusId");
MakeFinalTableInt32PK(normalizationDirectory + @"\CountryOfOrigin.txt", targetDirectory, "CountryOfOrigins", "CountryOfOriginId");
}
public static void MakeFinalTable4CharPK(string sourceFilename, string finalDirectory, string tableName, string idColumnName)
{
string filename = finalDirectory + @"\" + tableName + ".txt";
DataTable sourceTable = Spludlow.Data.TextTable.ReadFile(sourceFilename, encoding);
DataTable targetTable = new DataTable();
targetTable.TableName = tableName;
targetTable.Columns.Add(idColumnName, typeof(string));
targetTable.Columns.Add("Description", typeof(string));
targetTable.PrimaryKey = new DataColumn[] { targetTable.Columns[0] };
foreach (DataRow sourceRow in sourceTable.Rows)
{
string description = (string)sourceRow[1];
string[] parts = description.Split(new char[] { ' ', '/', '(', ')', '-', ',', '.', '\'' }, StringSplitOptions.RemoveEmptyEntries);
string code;
switch (parts.Length)
{
case 0:
code = " ";
break;
case 1:
code = description.Substring(0, 4);
break;
case 2:
code = parts[0].Substring(0, 2) + parts[1].Substring(0, 2);
break;
case 3:
code = parts[0].Substring(0, 2) + parts[1].Substring(0, 1) + parts[2].Substring(0, 1);
break;
default:
code = parts[0].Substring(0, 1) + parts[1].Substring(0, 1) + parts[2].Substring(0, 1) + parts[parts.Length - 1].Substring(0, 1);
break;
}
targetTable.Rows.Add(new object[] { code.ToUpper(), description });
}
Spludlow.Data.TextTable.Write(filename, targetTable, encoding);
}
public static void MakeFinalTableInt32PK(string sourceFilename, string finalDirectory, string tableName, string idColumnName)
{
DataTable sourceTable = Spludlow.Data.TextTable.ReadFile(sourceFilename, encoding);
DataTable targetTable = new DataTable();
targetTable.TableName = tableName;
targetTable.Columns.Add(idColumnName, typeof(int));
targetTable.Columns.Add("Description", typeof(string));
targetTable.PrimaryKey = new DataColumn[] { targetTable.Columns[0] };
for (int index = 0; index < sourceTable.Rows.Count; ++index)
{
string description = (string)sourceTable.Rows[index][1];
targetTable.Rows.Add(new object[] { (index + 1), description });
}
Spludlow.Data.TextTable.Write(finalDirectory + @"\" + tableName + ".txt", targetTable, encoding);
}
public static void MakeSICResolver(string[] sourceFilenames, string targetFilename)
{
DataTable resolverTable = Spludlow.Data.TextTable.ReadText(new string[]
{
"CompanyNumber SICCode Position",
"String* Int32* Int32",
});
resolverTable.TableName = "CompaniesSIC";
foreach (string sourceFilename in sourceFilenames)
{
DataTable table = Spludlow.Data.TextTable.ReadFile(sourceFilename, encoding);
foreach (DataRow row in table.Rows)
{
string companyNumber = (string)row["CompanyNumber"];
for (int position = 1; position <= 4; ++position)
{
string sicText = (string)row["SicText" + position];
if (sicText.Length == 0)
continue;
int sicCode = 0;
if (sicText != "None Supplied")
{
int index = sicText.IndexOf("-");
if (index == -1)
throw new ApplicationException("CombineSIC; Bad SIC Text: " + sicText);
sicCode = Int32.Parse(sicText.Substring(0, index).Trim());
}
resolverTable.Rows.Add(companyNumber, sicCode, position);
}
}
}
Spludlow.Data.TextTable.Write(targetFilename, resolverTable, encoding);
}
public static void MakeCompaniesTableParts(string[] sourceDataFilenames, string sourceDirectory, string targetDirectory, DataTable mappingTable, string schemaDirectory)
{
DataSet schema = Spludlow.Data.Schemas.ReadDirectory(schemaDirectory);
Dictionary> reverseLookups = new Dictionary>();
foreach (DataRow mapRow in mappingTable.Rows)
{
string tableName = (string)mapRow["TableName"];
string keyName = (string)mapRow["KeyName"];
DataTable table = Spludlow.Data.TextTable.ReadFile(sourceDirectory + @"\" + tableName + ".txt", encoding);
Dictionary reverseLookup = new Dictionary(StringComparer.OrdinalIgnoreCase);
foreach (DataRow row in table.Rows)
reverseLookup.Add((string)row[1], row[0]);
reverseLookups.Add(keyName, reverseLookup);
}
foreach (string sourceFilename in sourceDataFilenames)
{
string targetFilename = targetDirectory + @"\" + Path.GetFileName(sourceFilename);
DataTable sourceTable = Spludlow.Data.TextTable.ReadFile(sourceFilename, encoding);
DataTable targetTable = Spludlow.Data.ADO.TableCreate("Companies", schema);
foreach (DataRow sourceRow in sourceTable.Rows)
{
DataRow targetRow = targetTable.NewRow();
foreach (DataColumn column in targetTable.Columns)
{
DataRow mapRow = mappingTable.Rows.Find(column.ColumnName);
if (mapRow == null)
{
targetRow[column] = sourceRow[column.ColumnName];
}
else
{
string keyName = (string)mapRow["KeyName"];
string sourceColumnName = (string)mapRow["SourceColumn"];
string sourceValue = (string)sourceRow[sourceColumnName];
targetRow[column] = reverseLookups[keyName][sourceValue];
}
}
targetTable.Rows.Add(targetRow);
}
Spludlow.Data.TextTable.Write(targetFilename, targetTable, encoding);
}
}
public static void ImportDatabase(string sourceDirectory, string companySourceDirectory, string schemaDirectory, string connectionString, string databaseName, string tempDirectory)
{
Spludlow.Data.IDAL database = Spludlow.Data.DAL.Create(connectionString);
if (database.DatabaseExists(databaseName) == true)
database.DatabaseDelete(databaseName);
database.DatabaseCreate(databaseName);
database.ChangeDatabase(databaseName);
DataSet schema = Spludlow.Data.Schemas.ReadDirectory(schemaDirectory);
string[] tableNames = Spludlow.Data.Schemas.TableNamesInOrder(schema);
foreach (string tableName in tableNames)
database.TableCreate(tableName, schema);
database.TableAddKeys(schema, "UI");
foreach (string tableName in tableNames)
{
if (tableName == "Companies")
continue;
string dataFilename = sourceDirectory + @"\" + tableName + ".txt";
DataTable table = Spludlow.Data.TextTable.ReadFile(dataFilename, encoding);
database.BulkInsert(table, tableName, tempDirectory);
Spludlow.Log.Info("BasicCompanyData, ImportDatabase; Done table: " + tableName + ", rows: " + table.Rows.Count);
}
string[] companyFilenames = Directory.GetFiles(companySourceDirectory, "*.txt");
foreach (string companyFilename in companyFilenames)
{
DataTable table = Spludlow.Data.TextTable.ReadFile(companyFilename, encoding);
database.BulkInsert(table, "Companies", tempDirectory);
Spludlow.Log.Info("BasicCompanyData, ImportDatabase; Done company table: " + companyFilename + ", rows: " + table.Rows.Count);
}
database.CommandTimeout(240);
database.TableAddKeys(schema, "F");
database.Operation("shrink log");
}
// Create mapping file seperate !!!!
// string[] lines = System.IO.File.ReadAllLines(@"C:\BasicCompanyData\ColumnNames.txt");
// lines = Spludlow.Text.Swivel(lines, false);
//File.WriteAllLines(@"C:\BasicCompanyData\ColumnMapping.txt", lines);
public static void CreateFullCleanSchema(string cleanDirectory, string combinedCompaniesFilename, string schemaDirectory) // Needs plenty of Memory
{
if (File.Exists(combinedCompaniesFilename) == true)
File.Delete(combinedCompaniesFilename);
string[] sourceFilenames = Directory.GetFiles(cleanDirectory, "*.txt");
if (sourceFilenames.Length != 5)
throw new ApplicationException("Wrong number of TXT files than expected in directory: " + cleanDirectory);
Spludlow.Data.TextTable.CombineTables(sourceFilenames, combinedCompaniesFilename, encoding);
DataTable table = Spludlow.Data.TextTable.ReadFile(combinedCompaniesFilename, encoding);
table.TableName = "Companies";
table.PrimaryKey = new DataColumn[] { table.Columns["CompanyNumber"] };
DataSet schema = Spludlow.Data.ADO.Schema(table, true);
Spludlow.Data.TextTable.WriteDirectory(schemaDirectory, schema, encoding);
}
public static void MakeNormalTablesSchema(string finalDirectory, string normalSchemaDirectory)
{
string[] dataFilenames = Directory.GetFiles(finalDirectory, "*.txt");
DataSet dataSet = new DataSet();
foreach (string dataFilename in dataFilenames)
{
string tableName = Path.GetFileNameWithoutExtension(dataFilename);
if (tableName == "Companies")
continue;
DataTable table = Spludlow.Data.TextTable.ReadFile(dataFilename, encoding);
table.TableName = tableName;
dataSet.Tables.Add(table);
}
DataSet schema = Spludlow.Data.ADO.Schema(dataSet, true);
Spludlow.Data.TextTable.WriteDirectory(normalSchemaDirectory, schema);
}
public static void CombineFinalCompaniesTable(string finalPartsDirectory, string finalFilename)
{
if (File.Exists(finalFilename) == true)
return;
string[] sourceFilenames = Directory.GetFiles(finalPartsDirectory, "*.txt");
if (sourceFilenames.Length != 5)
throw new ApplicationException("Wrong number of TXT files than expected in directory: " + finalPartsDirectory);
Spludlow.Data.TextTable.CombineTables(sourceFilenames, finalFilename, encoding);
}
}
}