// Spludlow Software // Copyright © Samuel P. Ludlow 2020 All Rights Reserved // Distributed under the terms of the GNU General Public License version 3 // Distributed WITHOUT ANY WARRANTY; without implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE // https://www.spludlow.co.uk/LICENCE.TXT // The Spludlow logo is a registered trademark of Samuel P. Ludlow and may not be used without permission // v1.14 using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using System.IO; using System.Data; using System.Xml; namespace Spludlow.Data { /// /// Convert XML into relational data tables and some JSON->XML conversion /// /// Identity PKs added and FKs connected up /// /// Tested On: /// MAME mame0191 2017-11-23 /// EWS some traced output 2017-10-03 /// ipv4-address-space.xml 2017-10-03 /// Security Event Logs Export 2018-08-17 /// public class XML { public static string ID_Suffix = "_SPLID"; public static string RelationDelimiter = "_SPLDEL_"; public static void ConvertRelational(string xmlFilename, string topElementName, string connectionString, string databaseName) { DataSet dataSet = Spludlow.Data.XML.ConvertRelational(xmlFilename, topElementName); DataSet schema = Spludlow.Data.ADO.Schema(dataSet, true); Spludlow.Data.Database.AutoAddForeignKeys(schema); Spludlow.Data.IDAL database = Spludlow.Data.DAL.Create(connectionString); if (database.DatabaseExists(databaseName) == true) database.DatabaseDelete(databaseName); Spludlow.Data.Schemas.ImportDatabase(schema, dataSet, connectionString, databaseName, null); } /// /// Convert XML into relational data tables /// I 'think' in data tables not XML. When presented with data in XML this generic converter will give me something I can work with /// This code was thrashed away at until it worked, no design just deep concentration, its a dogs dinner but it seems to work /// public static DataSet ConvertRelational(string xmlFilename, string topElementName) { return ConvertRelational(xmlFilename, topElementName, typeof(int)); } public static DataSet ConvertRelational(string xmlFilename, string topElementName, Type primaryKeyType) { XmlDocument xmlDocument = new XmlDocument(); xmlDocument.Load(xmlFilename); return ConvertRelational(xmlDocument, topElementName, primaryKeyType); } public static DataSet ConvertRelational(XmlDocument xmlDocument, string topElementName) { return ConvertRelational(xmlDocument, topElementName, typeof(int)); } public static DataSet ConvertRelational(XmlDocument xmlDocument, string topElementName, Type primaryKeyType) { // Analyse the XML to determine Tables and Fields Dictionary> elementParts = FieldAnalysis(xmlDocument, topElementName, null); DataSet dataSet = new DataSet(); // For each table foreach (string tableName in elementParts.Keys) { string[] nameParts = SplitName(tableName); XmlElement tableElement = null; foreach (XmlNode node in xmlDocument.GetElementsByTagName(nameParts[1])) { if (node.NodeType != XmlNodeType.Element) throw new ApplicationException("not an element"); if (nameParts[0] == null || nameParts[0] == node.ParentNode.Name) { tableElement = (XmlElement)node; break; } } if (tableElement == null) continue; DataTable table = new DataTable(); table.TableName = tableName; // Add Primary Key string primaryKeyName = table.TableName + ID_Suffix; table.Columns.Add(primaryKeyName, primaryKeyType); table.PrimaryKey = new DataColumn[] { table.Columns[0] }; table.Columns[0].AutoIncrement = true; table.Columns[0].AutoIncrementSeed = 1; // If not the top table then add a forign key that relates to the parent if (tableElement.Name != topElementName) { string parentTableName = TableName(tableElement.ParentNode); // names !!! table.Columns.Add(parentTableName + ID_Suffix, primaryKeyType); //Spludlow.Log.Info(tableElement.Name + " " + parentTableName); } // Add all the data fields for (int columnIndex = 0; columnIndex < elementParts[tableName].Count; ++columnIndex) { string columnName = elementParts[tableName][columnIndex]; table.Columns.Add(columnName, typeof(string)); } dataSet.Tables.Add(table); } // Go through all the top elements calling the recursive processing method foreach (XmlElement element in xmlDocument.GetElementsByTagName(topElementName)) { ProcessElement(dataSet, element); } // Fix table and column names (dont all need parent_child convention) // Find table names that need naming "parent_child" List needParentNames = TablesNeedingParentInName(elementParts); //Spludlow.Log.Warning("needParentNames", needParentNames.ToArray()); foreach (DataTable table in dataSet.Tables) { string[] names; string name; name = table.TableName; names = SplitName(name); if (needParentNames.Contains(names[1]) == false) name = names[1]; name = name.Replace(Spludlow.Data.XML.RelationDelimiter, ""); table.TableName = name; foreach (DataColumn column in table.Columns) { name = column.ColumnName; if (name.EndsWith(Spludlow.Data.XML.ID_Suffix) == true) { names = SplitName(name.Substring(0, name.Length - Spludlow.Data.XML.ID_Suffix.Length)); if (needParentNames.Contains(names[1]) == false) name = names[1] + Spludlow.Data.XML.ID_Suffix; } else { if (name.EndsWith("Value") == true && name.Contains(Spludlow.Data.XML.RelationDelimiter) == true) { names = SplitName(name.Substring(0, name.Length - 5)); if (needParentNames.Contains(names[1]) == false) name = names[1] + "Value"; } } name = name.Replace(Spludlow.Data.XML.RelationDelimiter, ""); name = name.Replace(Spludlow.Data.XML.ID_Suffix, "_Id"); column.ColumnName = name; } } return dataSet; } private static List TablesNeedingParentInName(Dictionary> elementParts) { // Child, List of parents Dictionary> relations = new Dictionary>(); foreach (string tableName in elementParts.Keys) { int index = tableName.IndexOf(RelationDelimiter); if (index == -1) // Top level continue; string[] parentChild = SplitName(tableName); string parent = parentChild[0]; string child = parentChild[1]; if (relations.ContainsKey(child) == false) relations.Add(child, new List()); if (relations[child].Contains(parent) == false) relations[child].Add(parent); } List result = new List(); foreach (string child in relations.Keys) { if (relations[child].Count > 1) result.Add(child); } return result; } private static void ProcessElement(DataSet dataSet, XmlElement element) { string tableName = TableName(element); // Get table and create new row that will represent this element DataTable table = dataSet.Tables[tableName]; DataRow row = table.NewRow(); // foreach column past the first primary key, which is auto generated for (int columnIndex = 1; columnIndex < table.Columns.Count; ++columnIndex) { string columnName = table.Columns[columnIndex].ColumnName; // If a foreign key find PK for last row in parent table if (columnName.EndsWith(ID_Suffix) == true) // ??? Columns may already be named this way ??? !!!!!!!!!!!!!!!!!!!!!!!!! { //row[columnIndex] = 0; string parentTableName = columnName.Substring(0, columnName.Length - ID_Suffix.Length); DataTable parentTable = dataSet.Tables[parentTableName]; row[columnIndex] = parentTable.Rows[parentTable.Rows.Count - 1][0]; continue; } // Column is an attribute of this element XmlAttribute attribute = element.Attributes[columnName]; if (attribute != null) { row[attribute.Name] = attribute.Value; continue; } // Column is child element text (may be sub children with same name) XmlNodeList childElements = element.GetElementsByTagName(columnName); if (childElements.Count > 1) { XmlNode foundNode = null; List list = new List(); foreach (XmlNode node in childElements) { if (node.ParentNode == element) { if (foundNode == null) foundNode = node; else throw new ApplicationException("found multiple child nodes"); } list.Add(node.Name); } if (foundNode == null) continue; // throw new ApplicationException("node not found"); row[columnName] = GetText(foundNode); continue; } if (childElements.Count == 1) { row[columnName] = GetText(childElements[0]); continue; } // Column is the text of this element if (columnName == table.TableName + "Value") { row[columnName] = GetText(element); } } table.Rows.Add(row); // For all the child elements foreach (XmlNode childNode in element.ChildNodes) { if (childNode.NodeType == XmlNodeType.Element) { XmlElement childElement = (XmlElement)childNode; string childTableName = TableName(childElement); // Provide child element is a table if (dataSet.Tables.Contains(childTableName) == true) ProcessElement(dataSet, childElement); } } } private static string GetText(XmlNode element) { foreach (XmlNode node in element.ChildNodes) { if (node.NodeType == XmlNodeType.Text) return node.Value; } return null; } public static Dictionary> FieldAnalysis(string sourcefilename, string topElementName, string outputFilename) { XmlDocument xmlDocument = new XmlDocument(); xmlDocument.Load(sourcefilename); return FieldAnalysis(xmlDocument, topElementName, outputFilename); } public static Dictionary> FieldAnalysis(XmlDocument xmlDocument, string topElementName, string outputFilename) { Dictionary> elementParts = new Dictionary>(); List machineFields = new List(); foreach (XmlNode machineElement in xmlDocument.GetElementsByTagName(topElementName)) FieldAnalysisWalkElements(elementParts, machineElement); // Remove columns that have been identified as tables (due to hitting an element early on without attributes that later on ends up having attributes) foreach (string tableName in elementParts.Keys) { List removeColumnList = new List(); foreach (string columnName in elementParts[tableName]) { string[] tableNameParts = SplitName(tableName); string testTableName = tableNameParts[1] + RelationDelimiter + columnName; // Only work on tables with fuill name if (elementParts.ContainsKey(testTableName) == true) removeColumnList.Add(columnName); } foreach (string removeColumnName in removeColumnList) { //Spludlow.Log.Warning(removeColumnName); elementParts[tableName].Remove(removeColumnName); } } // Save to file if required if (outputFilename != null) { List result = new List(); foreach (string key in elementParts.Keys) { result.Add(key); foreach (string field in elementParts[key]) result.Add("\t" + field); } File.WriteAllLines(outputFilename, result.ToArray(), Encoding.UTF8); } return elementParts; } private static void FieldAnalysisWalkElements(Dictionary> elementParts, XmlNode element) { string tableName = TableName(element); // Add element list if not already if (elementParts.ContainsKey(tableName) == false) elementParts.Add(tableName, new List()); List elementList = elementParts[tableName]; // All attributes will be columns foreach (XmlAttribute attribute in element.Attributes) { if (elementList.Contains(attribute.Name) == false) elementList.Add(attribute.Name); } Dictionary childElementCount = new Dictionary(); // For all child elements foreach (XmlNode childNode in element.ChildNodes) { if (childNode.NodeType == XmlNodeType.Element) { XmlElement childElement = (XmlElement)childNode; bool hasChildElements = false; foreach (XmlNode subNode in childNode.ChildNodes) { if (subNode.NodeType == XmlNodeType.Element) { hasChildElements = true; break; } } if (childElementCount.ContainsKey(childElement.Name) == false) childElementCount.Add(childElement.Name, 0); // The child element has no atributes to it will be just another column (unless has multiple child elements with same name) if (childElement.Attributes.Count == 0 && hasChildElements == false && ++childElementCount[childElement.Name] <= 1) { if (elementList.Contains(childElement.Name) == false) elementList.Add(childElement.Name); } else { // Recursivly process child element FieldAnalysisWalkElements(elementParts, childElement); } } // If element has text add as another column if (childNode.NodeType == XmlNodeType.Text) { string name = tableName + "Value"; if (elementList.Contains(name) == false) elementList.Add(name); } } } private static string TableName(XmlNode element) { XmlNode parentNode = element.ParentNode; if (parentNode != null) { if (parentNode.NodeType == XmlNodeType.Document) return element.Name; if (parentNode.NodeType != XmlNodeType.Element) throw new ApplicationException("not element " + parentNode.Name + " " + parentNode.NodeType.ToString()); return parentNode.Name + RelationDelimiter + element.Name; } return element.Name; } private static string[] SplitName(string name) { int index = name.IndexOf(RelationDelimiter); if (index == -1) return new string[] { null, name }; return new string[] { name.Substring(0, index), name.Substring(index + RelationDelimiter.Length) }; } public static string JsonToXml(string json) { using (MemoryStream xmlStream = new MemoryStream()) { JsonToXml(json, xmlStream); xmlStream.Position = 0; using (StreamReader reader = new StreamReader(xmlStream)) return reader.ReadToEnd(); } } public static void JsonToXmlFile(string json, string xmlFilename) { using (FileStream xmlStream = new FileStream(xmlFilename, FileMode.Create)) { JsonToXml(json, xmlStream); } } public static string JsonFileToXml(string jsonFilename, Encoding jsonFileEncoding) { string json = File.ReadAllText(jsonFilename, jsonFileEncoding); using (MemoryStream xmlStream = new MemoryStream()) { JsonToXml(json, xmlStream); xmlStream.Position = 0; using (StreamReader reader = new StreamReader(xmlStream)) return reader.ReadToEnd(); } } public static void JsonFileToXmlFile(string jsonFilename, string xmlFilename, Encoding jsonFileEncoding) { string json = File.ReadAllText(jsonFilename, jsonFileEncoding); using (FileStream xmlStream = new FileStream(xmlFilename, FileMode.Create)) { JsonToXml(json, xmlStream); } } public static void JsonToXml(string json, Stream xmlStream) { Encoding writerEncoding = Encoding.Default; // Thought these shoud be the same? Encoding jsonReaderEncoding = Encoding.UTF8; XmlDictionaryReaderQuotas quotas = new XmlDictionaryReaderQuotas(); using (MemoryStream jsonStream = new MemoryStream()) { using (StreamWriter writer = new StreamWriter(jsonStream, writerEncoding, 4096, true)) { writer.Write(json); } jsonStream.Position = 0; using (XmlDictionaryReader jsonReader = System.Runtime.Serialization.Json.JsonReaderWriterFactory.CreateJsonReader(jsonStream, jsonReaderEncoding, quotas, null)) { System.Xml.Linq.XDocument xDocument = System.Xml.Linq.XDocument.Load(jsonReader); xDocument.Save(xmlStream); } } } } }