// Spludlow Software
// Copyright © Samuel P. Ludlow 2020 All Rights Reserved
// Distributed under the terms of the GNU General Public License version 3
// Distributed WITHOUT ANY WARRANTY; without implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE
// https://www.spludlow.co.uk/LICENCE.TXT
// The Spludlow logo is a registered trademark of Samuel P. Ludlow and may not be used without permission
// v1.14
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.IO;
using System.Data;
using System.Xml;
namespace Spludlow.Data
{
///
/// Convert XML into relational data tables and some JSON->XML conversion
///
/// Identity PKs added and FKs connected up
///
/// Tested On:
/// MAME mame0191 2017-11-23
/// EWS some traced output 2017-10-03
/// ipv4-address-space.xml 2017-10-03
/// Security Event Logs Export 2018-08-17
///
public class XML
{
public static string ID_Suffix = "_SPLID";
public static string RelationDelimiter = "_SPLDEL_";
public static void ConvertRelational(string xmlFilename, string topElementName, string connectionString, string databaseName)
{
DataSet dataSet = Spludlow.Data.XML.ConvertRelational(xmlFilename, topElementName);
DataSet schema = Spludlow.Data.ADO.Schema(dataSet, true);
Spludlow.Data.Database.AutoAddForeignKeys(schema);
Spludlow.Data.IDAL database = Spludlow.Data.DAL.Create(connectionString);
if (database.DatabaseExists(databaseName) == true)
database.DatabaseDelete(databaseName);
Spludlow.Data.Schemas.ImportDatabase(schema, dataSet, connectionString, databaseName, null);
}
///
/// Convert XML into relational data tables
/// I 'think' in data tables not XML. When presented with data in XML this generic converter will give me something I can work with
/// This code was thrashed away at until it worked, no design just deep concentration, its a dogs dinner but it seems to work
///
public static DataSet ConvertRelational(string xmlFilename, string topElementName)
{
return ConvertRelational(xmlFilename, topElementName, typeof(int));
}
public static DataSet ConvertRelational(string xmlFilename, string topElementName, Type primaryKeyType)
{
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.Load(xmlFilename);
return ConvertRelational(xmlDocument, topElementName, primaryKeyType);
}
public static DataSet ConvertRelational(XmlDocument xmlDocument, string topElementName)
{
return ConvertRelational(xmlDocument, topElementName, typeof(int));
}
public static DataSet ConvertRelational(XmlDocument xmlDocument, string topElementName, Type primaryKeyType)
{
// Analyse the XML to determine Tables and Fields
Dictionary> elementParts = FieldAnalysis(xmlDocument, topElementName, null);
DataSet dataSet = new DataSet();
// For each table
foreach (string tableName in elementParts.Keys)
{
string[] nameParts = SplitName(tableName);
XmlElement tableElement = null;
foreach (XmlNode node in xmlDocument.GetElementsByTagName(nameParts[1]))
{
if (node.NodeType != XmlNodeType.Element)
throw new ApplicationException("not an element");
if (nameParts[0] == null || nameParts[0] == node.ParentNode.Name)
{
tableElement = (XmlElement)node;
break;
}
}
if (tableElement == null)
continue;
DataTable table = new DataTable();
table.TableName = tableName;
// Add Primary Key
string primaryKeyName = table.TableName + ID_Suffix;
table.Columns.Add(primaryKeyName, primaryKeyType);
table.PrimaryKey = new DataColumn[] { table.Columns[0] };
table.Columns[0].AutoIncrement = true;
table.Columns[0].AutoIncrementSeed = 1;
// If not the top table then add a forign key that relates to the parent
if (tableElement.Name != topElementName)
{
string parentTableName = TableName(tableElement.ParentNode); // names !!!
table.Columns.Add(parentTableName + ID_Suffix, primaryKeyType);
//Spludlow.Log.Info(tableElement.Name + " " + parentTableName);
}
// Add all the data fields
for (int columnIndex = 0; columnIndex < elementParts[tableName].Count; ++columnIndex)
{
string columnName = elementParts[tableName][columnIndex];
table.Columns.Add(columnName, typeof(string));
}
dataSet.Tables.Add(table);
}
// Go through all the top elements calling the recursive processing method
foreach (XmlElement element in xmlDocument.GetElementsByTagName(topElementName))
{
ProcessElement(dataSet, element);
}
// Fix table and column names (dont all need parent_child convention)
// Find table names that need naming "parent_child"
List needParentNames = TablesNeedingParentInName(elementParts);
//Spludlow.Log.Warning("needParentNames", needParentNames.ToArray());
foreach (DataTable table in dataSet.Tables)
{
string[] names;
string name;
name = table.TableName;
names = SplitName(name);
if (needParentNames.Contains(names[1]) == false)
name = names[1];
name = name.Replace(Spludlow.Data.XML.RelationDelimiter, "");
table.TableName = name;
foreach (DataColumn column in table.Columns)
{
name = column.ColumnName;
if (name.EndsWith(Spludlow.Data.XML.ID_Suffix) == true)
{
names = SplitName(name.Substring(0, name.Length - Spludlow.Data.XML.ID_Suffix.Length));
if (needParentNames.Contains(names[1]) == false)
name = names[1] + Spludlow.Data.XML.ID_Suffix;
}
else
{
if (name.EndsWith("Value") == true && name.Contains(Spludlow.Data.XML.RelationDelimiter) == true)
{
names = SplitName(name.Substring(0, name.Length - 5));
if (needParentNames.Contains(names[1]) == false)
name = names[1] + "Value";
}
}
name = name.Replace(Spludlow.Data.XML.RelationDelimiter, "");
name = name.Replace(Spludlow.Data.XML.ID_Suffix, "_Id");
column.ColumnName = name;
}
}
return dataSet;
}
private static List TablesNeedingParentInName(Dictionary> elementParts)
{
// Child, List of parents
Dictionary> relations = new Dictionary>();
foreach (string tableName in elementParts.Keys)
{
int index = tableName.IndexOf(RelationDelimiter);
if (index == -1) // Top level
continue;
string[] parentChild = SplitName(tableName);
string parent = parentChild[0];
string child = parentChild[1];
if (relations.ContainsKey(child) == false)
relations.Add(child, new List());
if (relations[child].Contains(parent) == false)
relations[child].Add(parent);
}
List result = new List();
foreach (string child in relations.Keys)
{
if (relations[child].Count > 1)
result.Add(child);
}
return result;
}
private static void ProcessElement(DataSet dataSet, XmlElement element)
{
string tableName = TableName(element);
// Get table and create new row that will represent this element
DataTable table = dataSet.Tables[tableName];
DataRow row = table.NewRow();
// foreach column past the first primary key, which is auto generated
for (int columnIndex = 1; columnIndex < table.Columns.Count; ++columnIndex)
{
string columnName = table.Columns[columnIndex].ColumnName;
// If a foreign key find PK for last row in parent table
if (columnName.EndsWith(ID_Suffix) == true) // ??? Columns may already be named this way ??? !!!!!!!!!!!!!!!!!!!!!!!!!
{
//row[columnIndex] = 0;
string parentTableName = columnName.Substring(0, columnName.Length - ID_Suffix.Length);
DataTable parentTable = dataSet.Tables[parentTableName];
row[columnIndex] = parentTable.Rows[parentTable.Rows.Count - 1][0];
continue;
}
// Column is an attribute of this element
XmlAttribute attribute = element.Attributes[columnName];
if (attribute != null)
{
row[attribute.Name] = attribute.Value;
continue;
}
// Column is child element text (may be sub children with same name)
XmlNodeList childElements = element.GetElementsByTagName(columnName);
if (childElements.Count > 1)
{
XmlNode foundNode = null;
List list = new List();
foreach (XmlNode node in childElements)
{
if (node.ParentNode == element)
{
if (foundNode == null)
foundNode = node;
else
throw new ApplicationException("found multiple child nodes");
}
list.Add(node.Name);
}
if (foundNode == null)
continue; // throw new ApplicationException("node not found");
row[columnName] = GetText(foundNode);
continue;
}
if (childElements.Count == 1)
{
row[columnName] = GetText(childElements[0]);
continue;
}
// Column is the text of this element
if (columnName == table.TableName + "Value")
{
row[columnName] = GetText(element);
}
}
table.Rows.Add(row);
// For all the child elements
foreach (XmlNode childNode in element.ChildNodes)
{
if (childNode.NodeType == XmlNodeType.Element)
{
XmlElement childElement = (XmlElement)childNode;
string childTableName = TableName(childElement);
// Provide child element is a table
if (dataSet.Tables.Contains(childTableName) == true)
ProcessElement(dataSet, childElement);
}
}
}
private static string GetText(XmlNode element)
{
foreach (XmlNode node in element.ChildNodes)
{
if (node.NodeType == XmlNodeType.Text)
return node.Value;
}
return null;
}
public static Dictionary> FieldAnalysis(string sourcefilename, string topElementName, string outputFilename)
{
XmlDocument xmlDocument = new XmlDocument();
xmlDocument.Load(sourcefilename);
return FieldAnalysis(xmlDocument, topElementName, outputFilename);
}
public static Dictionary> FieldAnalysis(XmlDocument xmlDocument, string topElementName, string outputFilename)
{
Dictionary> elementParts = new Dictionary>();
List machineFields = new List();
foreach (XmlNode machineElement in xmlDocument.GetElementsByTagName(topElementName))
FieldAnalysisWalkElements(elementParts, machineElement);
// Remove columns that have been identified as tables (due to hitting an element early on without attributes that later on ends up having attributes)
foreach (string tableName in elementParts.Keys)
{
List removeColumnList = new List();
foreach (string columnName in elementParts[tableName])
{
string[] tableNameParts = SplitName(tableName);
string testTableName = tableNameParts[1] + RelationDelimiter + columnName; // Only work on tables with fuill name
if (elementParts.ContainsKey(testTableName) == true)
removeColumnList.Add(columnName);
}
foreach (string removeColumnName in removeColumnList)
{
//Spludlow.Log.Warning(removeColumnName);
elementParts[tableName].Remove(removeColumnName);
}
}
// Save to file if required
if (outputFilename != null)
{
List result = new List();
foreach (string key in elementParts.Keys)
{
result.Add(key);
foreach (string field in elementParts[key])
result.Add("\t" + field);
}
File.WriteAllLines(outputFilename, result.ToArray(), Encoding.UTF8);
}
return elementParts;
}
private static void FieldAnalysisWalkElements(Dictionary> elementParts, XmlNode element)
{
string tableName = TableName(element);
// Add element list if not already
if (elementParts.ContainsKey(tableName) == false)
elementParts.Add(tableName, new List());
List elementList = elementParts[tableName];
// All attributes will be columns
foreach (XmlAttribute attribute in element.Attributes)
{
if (elementList.Contains(attribute.Name) == false)
elementList.Add(attribute.Name);
}
Dictionary childElementCount = new Dictionary();
// For all child elements
foreach (XmlNode childNode in element.ChildNodes)
{
if (childNode.NodeType == XmlNodeType.Element)
{
XmlElement childElement = (XmlElement)childNode;
bool hasChildElements = false;
foreach (XmlNode subNode in childNode.ChildNodes)
{
if (subNode.NodeType == XmlNodeType.Element)
{
hasChildElements = true;
break;
}
}
if (childElementCount.ContainsKey(childElement.Name) == false)
childElementCount.Add(childElement.Name, 0);
// The child element has no atributes to it will be just another column (unless has multiple child elements with same name)
if (childElement.Attributes.Count == 0 && hasChildElements == false && ++childElementCount[childElement.Name] <= 1)
{
if (elementList.Contains(childElement.Name) == false)
elementList.Add(childElement.Name);
}
else
{
// Recursivly process child element
FieldAnalysisWalkElements(elementParts, childElement);
}
}
// If element has text add as another column
if (childNode.NodeType == XmlNodeType.Text)
{
string name = tableName + "Value";
if (elementList.Contains(name) == false)
elementList.Add(name);
}
}
}
private static string TableName(XmlNode element)
{
XmlNode parentNode = element.ParentNode;
if (parentNode != null)
{
if (parentNode.NodeType == XmlNodeType.Document)
return element.Name;
if (parentNode.NodeType != XmlNodeType.Element)
throw new ApplicationException("not element " + parentNode.Name + " " + parentNode.NodeType.ToString());
return parentNode.Name + RelationDelimiter + element.Name;
}
return element.Name;
}
private static string[] SplitName(string name)
{
int index = name.IndexOf(RelationDelimiter);
if (index == -1)
return new string[] { null, name };
return new string[] { name.Substring(0, index), name.Substring(index + RelationDelimiter.Length) };
}
public static string JsonToXml(string json)
{
using (MemoryStream xmlStream = new MemoryStream())
{
JsonToXml(json, xmlStream);
xmlStream.Position = 0;
using (StreamReader reader = new StreamReader(xmlStream))
return reader.ReadToEnd();
}
}
public static void JsonToXmlFile(string json, string xmlFilename)
{
using (FileStream xmlStream = new FileStream(xmlFilename, FileMode.Create))
{
JsonToXml(json, xmlStream);
}
}
public static string JsonFileToXml(string jsonFilename, Encoding jsonFileEncoding)
{
string json = File.ReadAllText(jsonFilename, jsonFileEncoding);
using (MemoryStream xmlStream = new MemoryStream())
{
JsonToXml(json, xmlStream);
xmlStream.Position = 0;
using (StreamReader reader = new StreamReader(xmlStream))
return reader.ReadToEnd();
}
}
public static void JsonFileToXmlFile(string jsonFilename, string xmlFilename, Encoding jsonFileEncoding)
{
string json = File.ReadAllText(jsonFilename, jsonFileEncoding);
using (FileStream xmlStream = new FileStream(xmlFilename, FileMode.Create))
{
JsonToXml(json, xmlStream);
}
}
public static void JsonToXml(string json, Stream xmlStream)
{
Encoding writerEncoding = Encoding.Default; // Thought these shoud be the same?
Encoding jsonReaderEncoding = Encoding.UTF8;
XmlDictionaryReaderQuotas quotas = new XmlDictionaryReaderQuotas();
using (MemoryStream jsonStream = new MemoryStream())
{
using (StreamWriter writer = new StreamWriter(jsonStream, writerEncoding, 4096, true))
{
writer.Write(json);
}
jsonStream.Position = 0;
using (XmlDictionaryReader jsonReader = System.Runtime.Serialization.Json.JsonReaderWriterFactory.CreateJsonReader(jsonStream, jsonReaderEncoding, quotas, null))
{
System.Xml.Linq.XDocument xDocument = System.Xml.Linq.XDocument.Load(jsonReader);
xDocument.Save(xmlStream);
}
}
}
}
}