通过非制表符分隔符的非结构化 XML? [英] Unstructured XML via non-tab deliminator?

查看:18
本文介绍了通过非制表符分隔符的非结构化 XML?的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我们有一个复杂的 XML 结构,而且确实很大 (>500 MB).结构的 XSD 是:

有了 jdweng 的好回答,我们想要提取所有信息.

解决方案

决定艰难地创建SQL数据库.没有一个工具能很好地工作.我只装了几张桌子.如果需要,您可以添加更多.

步骤 1 在 SQL Server Management Studio 中使用以下查询创建数据库.如果您多次运行并且数据库或表已经存在,则查询可能会出错.

运行以下c#项目:

使用系统;使用 System.Collections.Generic;使用 System.Linq;使用 System.Text;使用 System.Threading.Tasks;使用 System.Data;使用 System.Data.SqlClient;使用 System.Xml;使用 System.Xml.Linq;使用 System.IO;命名空间 DrubBank{课堂节目{常量字符串 FILENAME = @"c:	empfull database.xml";静态无效主要(字符串 [] 参数){新的 UploadXml(文件名);}}公共类 UploadXml{常量字符串 INSERT_DRUG ="插入 [DrugBank].[dbo].[Drugs] (" +"[类型],[创建],[更新],[ID],[名称],[描述],[案例编号],[Unii],[状态]," +"【合成参考】、【适应症】、【药效学】、【作用机制】、【毒性】,"+"[代谢] , [吸收] ,[半衰期], [蛋白质结合],, +【淘汰路线】、【分发量】、【清仓】)"+"价值观" +"(@Type、@Created、@Updated、@ID、@Name、@Description、@Case_Number、@Unii、@State,"+@Synthesis_Reference,@Indication,@Pharmacodynamics,@Mechanism_of_Action,@Toxicity,"+@代谢,@吸收,@Half_Life,@Protein_Binding,"+@Route_of_Elimination,@Volume_of_Distribution,@Clearance)";常量字符串 INSERT_DRUG_LINK ="插入 [DrugBank].[dbo].[Links] (" +"[ID],[标题],[URL])" +"价值观" +"(@ID,@Title,@URL)";常量字符串 INSERT_DRUG_ARTICLE ="插入 [DrugBank].[dbo].[Articles] (" +"[ID],[Pubmed ID],[Citation])" +"价值观" +"(@ID,@Pubmed_ID, @Citation)";常量字符串 INSERT_DRUG_INTERACTION ="插入 [DrugBank].[dbo].[交互] (" +"[ID],[交互 ID],[描述])" +"价值观" +"(@ID,@Interaction_ID, @Description)";常量字符串 INSERT_DRUG_ID ="插入 [DrugBank].[dbo].[ID] (" +"[ID],[ALT ID])" +"价值观" +(@ID,@ALT_ID)";常量字符串 INSERT_DRUG_PRODUCT ="插入 [DrugBank].[dbo].[Products] (" +"[ID],[Name],[Labeller], [NDC ID], [NDC Product Code], [DPD ID]," +[EMA 产品代码]、[EMA MA 编号]、[开始营销时间]、[结束营销时间]、[剂型]",+"[Strength],[Route],[FDA Application Number],[Generic],[Over the Counter],[Approved],[Country],[Source])" +"价值观" +"(@ID,@Name,@Labeller,@NDC_ID,@NDC_Product_Code,@DPD_ID," +"@EMA_Product_Code、@EMA_MA_Number、@Started_Marketing_On、@Ended_Marketing_On、@Dosage_Form," +@Strength,@Route,@FDA_Application_Number,@Generic,@Over_the_Counter,@Approved,@Country,@Source)";常量字符串 INSERT_DRUG_MIXTURE ="插入 [DrugBank].[dbo].[混合物] (" +"[ID]、[姓名]、[成分])" +"价值观" +"(@ID, @Name, @ingredients)";常量字符串 INSERT_DRUG_PACKAGER ="插入 [DrugBank].[dbo].[Packagers] (" +"[ID]、[名称]、[URL])" +"价值观" +"(@ID, @Name, @URL)";常量字符串 INSERT_DRUG_PRICE ="插入 [DrugBank].[dbo].[价格] (" +"[ID]、[描述]、[成本]、[货币]、[单位])" +"价值观" +(@ID,@Description,@Cost,@Currency,@Unit)";常量字符串 INSERT_DRUG_CATEGORY ="插入 [DrugBank].[dbo].[Categories] (" +"[ID]、[类别]、[网格 ID])" +"价值观" +(@ID,@Category,@Mesh_ID)";常量字符串 INSERT_DRUG_ORGANISM ="插入 [DrugBank].[dbo].[有机体] (" +"[ID],[有机体])" +"价值观" +(@ID,@Organism)";常量字符串 INSERT_DRUG_PATENT ="插入 [DrugBank].[dbo].[专利] (" +"[ID]、[Number]、[Country]、[Approved]、[Expires]、[Pediatric Extension])" +"价值观" +(@ID、@Number、@Country、@Approved、@Expires、@Pediatric_Extension)";常量字符串 INSERT_DRUG_SEQUENCE ="插入 [DrugBank].[dbo].[Sequences] (" +"[ID]、[格式]、[类型]、[序列])" +"价值观" +(@ID,@Format,@Type,@Sequence)";常量字符串 INSERT_DRUG_PROPERTY ="插入 [DrugBank].[dbo].[Properties] (" +"[ID]、[种类]、[值]、[来源])" +"价值观" +(@ID,@Kind,@Value,@Source)";常量字符串 INSERT_DRUG_IDENTIFIER ="插入 [DrugBank].[dbo].[标识符] (" +"[ID]、[资源]、[标识符])" +"价值观" +(@ID,@Resource,@Identifier)";常量字符串 INSERT_DRUG_ENZYM ="插入 [药物库].[dbo].[酶] (" +"[ID], [UniprotID])" +"价值观" +(@ID,@UniprotID)";SqlCommand productCmd = null;SqlCommand 交互Cmd = null;SqlCommand articleCmd = null;SqlCommand linkCmd = null;SqlCommand drugCmd = null;SqlCommand idCmd = null;SqlCommand mixCmd = null;SqlCommand packagerCmd = null;SqlCommand priceCmd = null;SqlCommand categoryCmd = null;SqlCommand有机体Cmd = null;SqlCommand 专利Cmd = null;SqlCommand sequenceCmd = null;SqlCommand propertyCmd = null;SqlCommand 标识符Cmd = null;SqlCommand enzymCmd = null;公共 UploadXml(字符串文件名){字符串 connStr = DrugBank.Properties.Settings.Default.DrugBankConnectionString;SqlConnection conn = new SqlConnection(connStr);conn.Open();drugCmd = new SqlCommand(INSERT_DRUG, conn);drugCmd.Parameters.Add("@Type", SqlDbType.VarChar, 20);drugCmd.Parameters.Add("@Created", SqlDbType.DateTime);drugCmd.Parameters.Add("@Updated", SqlDbType.DateTime);drugCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);drugCmd.Parameters.Add("@Name", SqlDbType.VarChar, 50);drugCmd.Parameters.Add("@Description", SqlDbType.VarChar);drugCmd.Parameters.Add("@Case_Number", SqlDbType.VarChar, 20);drugCmd.Parameters.Add("@Unii", SqlDbType.VarChar, 20);drugCmd.Parameters.Add("@State", SqlDbType.VarChar, 20);drugCmd.Parameters.Add("@Synthesis_reference", SqlDbType.VarChar, 1024);drugCmd.Parameters.Add("@Indication", SqlDbType.VarChar);drugCmd.Parameters.Add("@Pharmacodynamics", SqlDbType.VarChar, 1024);drugCmd.Parameters.Add("@Mechanism_of_Action", SqlDbType.VarChar, 1024);drugCmd.Parameters.Add("@Toxicity", SqlDbType.VarChar, 1024);drugCmd.Parameters.Add("@Metabolism", SqlDbType.VarChar);drugCmd.Parameters.Add("@Absorption", SqlDbType.VarChar, 1024);drugCmd.Parameters.Add("@Half_Life", SqlDbType.VarChar, 256);drugCmd.Parameters.Add("@Protein_Binding", SqlDbType.VarChar, 64);drugCmd.Parameters.Add("@Route_of_Elimination", SqlDbType.VarChar);drugCmd.Parameters.Add("@Volume_of_Distribution", SqlDbType.VarChar);drugCmd.Parameters.Add("@Clearance", SqlDbType.VarChar);idCmd = new SqlCommand(INSERT_DRUG_ID, conn);idCmd.Parameters.Add("@ID", SqlDbType.VarChar, 256);idCmd.Parameters.Add("@ALT_ID", SqlDbType.VarChar, 20);articleCmd = new SqlCommand(INSERT_DRUG_ARTICLE, conn);articleCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);articleCmd.Parameters.Add("@Pubmed_ID", SqlDbType.VarChar, 256);articleCmd.Parameters.Add("@Citation", SqlDbType.VarChar, 20);linkCmd = new SqlCommand(INSERT_DRUG_LINK, conn);linkCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);linkCmd.Parameters.Add("@Title", SqlDbType.VarChar, 256);linkCmd.Parameters.Add("@URL", SqlDbType.VarChar, 64);interactionCmd = new SqlCommand(INSERT_DRUG_INTERACTION, conn);interactionCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);interactionCmd.Parameters.Add("@Interaction_ID", SqlDbType.VarChar, 20);interactionCmd.Parameters.Add("@Description", SqlDbType.VarChar, 256);productCmd = new SqlCommand(INSERT_DRUG_PRODUCT, conn);productCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@Name", SqlDbType.VarChar, 128);productCmd.Parameters.Add("@Labeller", SqlDbType.VarChar, 64);productCmd.Parameters.Add("@NDC_ID", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@NDC_Product_Code", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@DPD_ID", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@EMA_Product_Code", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@EMA_MA_Number", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@Started_Marketing_On", SqlDbType.DateTime2, 20);productCmd.Parameters.Add("@Ended_Marketing_On", SqlDbType.DateTime2, 20);productCmd.Parameters.Add("@Dosage_Form", SqlDbType.VarChar, 64);productCmd.Parameters.Add("@Strength", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@Route", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@FDA_Application_Number", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@Generic", SqlDbType.Bit);productCmd.Parameters.Add("@Over_the_Counter", SqlDbType.Bit);productCmd.Parameters.Add("@Approved", SqlDbType.Bit);productCmd.Parameters.Add("@Country", SqlDbType.VarChar, 20);productCmd.Parameters.Add("@Source", SqlDbType.VarChar, 20);mixCmd = new SqlCommand(INSERT_DRUG_MIXTURE, conn);mixCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);mixCmd.Parameters.Add("@Name", SqlDbType.VarChar, 64);mixCmd.Parameters.Add("@Ingredients", SqlDbType.VarChar, 64);packagerCmd = new SqlCommand(INSERT_DRUG_PACKAGER, conn);packagerCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);packagerCmd.Parameters.Add("@Name", SqlDbType.VarChar, 64);packagerCmd.Parameters.Add("@URL", SqlDbType.VarChar, 64);priceCmd = new SqlCommand(INSERT_DRUG_PRICE, conn);priceCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);priceCmd.Parameters.Add("@Description", SqlDbType.VarChar, 128);priceCmd.Parameters.Add("@Cost", SqlDbType.Decimal);priceCmd.Parameters.Add("@Currency", SqlDbType.VarChar,20);priceCmd.Parameters.Add("@Unit", SqlDbType.VarChar, 20);categoryCmd = new SqlCommand(INSERT_DRUG_CATEGORY, conn);categoryCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);categoryCmd.Parameters.Add("@Category", SqlDbType.VarChar, 128);categoryCmd.Parameters.Add("@Mesh_ID", SqlDbType.VarChar, 20);OrganicCmd = new SqlCommand(INSERT_DRUG_ORGANISM, conn);有机体Cmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);OrganicCmd.Parameters.Add("@Organism", SqlDbType.VarChar, 128);PatentCmd = 新的 SqlCommand(INSERT_DRUG_PATENT, conn);PatentCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);PatentCmd.Parameters.Add("@Number", SqlDbType.VarChar, 20);PatentCmd.Parameters.Add("@Country", SqlDbType.VarChar, 20);PatentCmd.Parameters.Add("@Approved", SqlDbType.DateTime2);PatentCmd.Parameters.Add("@Expires", SqlDbType.DateTime2);PatentCmd.Parameters.Add("@Pediatric_Extension", SqlDbType.Bit);sequenceCmd = new SqlCommand(INSERT_DRUG_SEQUENCE, conn);sequenceCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);sequenceCmd.Parameters.Add("@Format", SqlDbType.VarChar, 20);sequenceCmd.Parameters.Add("@Sequence", SqlDbType.VarChar);sequenceCmd.Parameters.Add("@Type", SqlDbType.VarChar, 20);propertyCmd = new SqlCommand(INSERT_DRUG_PROPERTY, conn);propertyCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);propertyCmd.Parameters.Add("@Kind", SqlDbType.VarChar, 20);propertyCmd.Parameters.Add("@Value", SqlDbType.VarChar, 20);propertyCmd.Parameters.Add("@Source", SqlDbType.VarChar, 20);identifierCmd = new SqlCommand(INSERT_DRUG_IDENTIFIER, conn);identifierCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);identifierCmd.Parameters.Add("@Resource", SqlDbType.VarChar, 64);identifierCmd.Parameters.Add("@Identifier", SqlDbType.VarChar, 64);enzymCmd = new SqlCommand(INSERT_DRUG_ENZYM, conn);enzymCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);enzymCmd.Parameters.Add("@UniprotID", SqlDbType.VarChar, 20);XmlReader 阅读器 = XmlReader.Create(filename);而(!reader.EOF){if (reader.Name !="drug"){reader.ReadToFollowing("药物");}如果 (!reader.EOF){XElement 药物 = (XElement)XElement.ReadFrom(reader);string primaryID = (string)drug.Elements().Where(x => (x.Name.LocalName == "drugbank-id") && (x.Attribute("primary") != null)).FirstOrDefault();AddDrug(conn, drug, primaryID);AddArticles(conn, drug, primaryID);AddInteractions(conn, drug, primaryID);AddProducts(conn, drug, primaryID);AddMixtures(conn, drug, primaryID);AddPackagers(conn, drug, primaryID);AddPrices(conn, drug, primaryID);AddCategories(conn, drug, primaryID);AddOrganisms(conn, drug, primaryID);AddPatents(conn, drug, primaryID);AddSequences(conn, drug, primaryID);AddProperties(conn, drug, primaryID);AddIdentifiers(conn, drug, primaryID);AddEnzymes(conn, drug, primaryID);}}}

We have a complex XML Structure and really a big one (>500 MB). the XSD of the structure is: This XSD

As we know this is a complex one. and because of size or non-tab deliminator structure, I couldn't convert it to a readable better presentation.

I want to read this file via C# and search the drug name. what is wrong via my code?

        try
        {
            XmlReader xmlFile;
            xmlFile = XmlReader.Create("C:\Users\Dr\Desktop\full database.xml", new XmlReaderSettings());
            DataSet ds = new DataSet();
            ds.ReadXml(xmlFile);
            dataGridView1.DataSource = ds.Tables[0];
        }
        catch (Exception ex)
        {
            MessageBox.Show(ex.ToString());
        }

My error is as follows:

How I can search inside this XML and get the information around Drug name?

Update: Sample XML

The XML Structure

With nice answer of jdweng we want to extract all information.

解决方案

Decided to create the SQL database the hard way. None of the tools worked well. I only crate a few of the tables. You can add more if needed.

Step 1 Crate Database in SQL Server Management Studio using following query. Query may give errors if you run multiple times and database or table already exists.

The run following c# project :

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Data;
using System.Data.SqlClient;
using System.Xml;
using System.Xml.Linq;
using System.IO;

namespace DrubBank
{
    class Program
    {
        const string FILENAME = @"c:	empfull database.xml";
        static void Main(string[] args)
        {
            new UploadXml(FILENAME);
        }
    }
    public class UploadXml
    {
        const string INSERT_DRUG =
            "INSERT INTO [DrugBank].[dbo].[Drugs] (" +
            "[Type],[Created],[Updated],[ID],[Name],[Description],[Case Number],[Unii],[State]," +
            "[Synthesis Reference],[Indication] ,[Pharmacodynamics] ,[Mechanism of Action], [Toxicity]," +
            "[Metabolism] , [Absorption] ,[Half Life], [Protein Binding]," +
            "[Route of Eelimination], [Volume of Distribution] ,[Clearance])" +
            " VALUES " +
            "(@Type, @Created, @Updated, @ID, @Name, @Description, @Case_Number, @Unii, @State," +
             "@Synthesis_Reference,@Indication ,@Pharmacodynamics ,@Mechanism_of_Action, @Toxicity," +
             "@Metabolism , @Absorption ,@Half_Life, @Protein_Binding," +
             "@Route_of_Elimination, @Volume_of_Distribution ,@Clearance)";

        const string INSERT_DRUG_LINK =
            "INSERT INTO [DrugBank].[dbo].[Links] (" +
            "[ID],[Title],[URL])" +
            " VALUES " +
            "(@ID,@Title, @URL)";

        const string INSERT_DRUG_ARTICLE =
            "INSERT INTO [DrugBank].[dbo].[Articles] (" +
            "[ID],[Pubmed ID],[Citation])" +
            " VALUES " +
            "(@ID,@Pubmed_ID, @Citation)";

        const string INSERT_DRUG_INTERACTION =
            "INSERT INTO [DrugBank].[dbo].[Interactions] (" +
            "[ID],[Interaction ID],[Description])" +
            " VALUES " +
            "(@ID,@Interaction_ID, @Description)";

        const string INSERT_DRUG_ID =
            "INSERT INTO [DrugBank].[dbo].[IDs] (" +
            "[ID],[ALT ID])" +
            " VALUES " +
            "(@ID, @ALT_ID)";

        const string INSERT_DRUG_PRODUCT =
            "INSERT INTO [DrugBank].[dbo].[Products] (" +
            "[ID],[Name],[Labeller], [NDC ID], [NDC Product Code], [DPD ID]," +
            "[EMA Product Code],[EMA MA Number],[Started Marketing On], [Ended Marketing On], [Dosage Form]," +
            "[Strength],[Route],[FDA Application Number],[Generic],[Over the Counter],[Approved],[Country],[Source])" +
            " VALUES " +
            "(@ID,@Name,@Labeller, @NDC_ID, @NDC_Product_Code,@DPD_ID," +
            "@EMA_Product_Code,@EMA_MA_Number,@Started_Marketing_On, @Ended_Marketing_On, @Dosage_Form," +
            "@Strength,@Route,@FDA_Application_Number,@Generic,@Over_the_Counter,@Approved,@Country,@Source)";

        const string INSERT_DRUG_MIXTURE =
            "INSERT INTO [DrugBank].[dbo].[Mixtures] (" +
            "[ID], [Name] , [ingredients])" +
            " VALUES " +
            "(@ID, @Name, @ingredients)";

        const string INSERT_DRUG_PACKAGER =
             "INSERT INTO [DrugBank].[dbo].[Packagers] (" +
             "[ID], [Name], [URL])" +
             " VALUES " +
             "(@ID, @Name, @URL)";

        const string INSERT_DRUG_PRICE = 
             "INSERT INTO [DrugBank].[dbo].[Prices] (" +
             "[ID], [Description], [Cost], [Currency], [Unit])" +
             " VALUES " +
             "(@ID, @Description, @Cost, @Currency, @Unit)";

        const string INSERT_DRUG_CATEGORY =
            "INSERT INTO [DrugBank].[dbo].[Categories] (" +
            "[ID], [Category], [Mesh ID])" +
            " VALUES " +
            "(@ID, @Category, @Mesh_ID)";

        const string INSERT_DRUG_ORGANISM =
            "INSERT INTO [DrugBank].[dbo].[Organisms] (" +
            "[ID], [Organism])" +
            " VALUES " +
            "(@ID, @Organism)";

        const string INSERT_DRUG_PATENT =
            "INSERT INTO [DrugBank].[dbo].[Patents] (" +
            "[ID], [Number], [Country], [Approved], [Expires], [Pediatric Extension]) " +
            " VALUES " +
            "(@ID, @Number, @Country, @Approved, @Expires, @Pediatric_Extension) ";

        const string INSERT_DRUG_SEQUENCE =
            "INSERT INTO [DrugBank].[dbo].[Sequences] (" +
            "[ID], [Format], [Type], [Sequence])" +
            " VALUES " +
            "(@ID, @Format, @Type, @Sequence)";

        const string INSERT_DRUG_PROPERTY =
            "INSERT INTO [DrugBank].[dbo].[Properties] (" +
            "[ID], [Kind], [Value], [Source])" +
            " VALUES " +
            "(@ID, @Kind, @Value, @Source)";

        const string INSERT_DRUG_IDENTIFIER =
            "INSERT INTO [DrugBank].[dbo].[Identifiers] (" +
            "[ID], [Resource], [Identifier])" +
            " VALUES " +
            "(@ID, @Resource, @Identifier)";

        const string INSERT_DRUG_ENZYM =
            "INSERT INTO [DrugBank].[dbo].[Enzymes] (" +
            "[ID], [UniprotID])" +
            " VALUES " +
            "(@ID, @UniprotID)";

        SqlCommand productCmd = null;
        SqlCommand interactionCmd = null;
        SqlCommand articleCmd = null;
        SqlCommand linkCmd = null;
        SqlCommand drugCmd = null;
        SqlCommand idCmd = null;
        SqlCommand mixtureCmd = null;
        SqlCommand packagerCmd = null;
        SqlCommand priceCmd = null;
        SqlCommand categoryCmd = null;
        SqlCommand organismCmd = null;
        SqlCommand patentCmd = null;
        SqlCommand sequenceCmd = null;
        SqlCommand propertyCmd = null;
        SqlCommand identifierCmd = null;
        SqlCommand enzymCmd = null;

        public UploadXml(string filename)
        {
            string connStr = DrugBank.Properties.Settings.Default.DrugBankConnectionString;
            SqlConnection conn = new SqlConnection(connStr);
            conn.Open();

            drugCmd = new SqlCommand(INSERT_DRUG, conn);

            drugCmd.Parameters.Add("@Type", SqlDbType.VarChar, 20);
            drugCmd.Parameters.Add("@Created", SqlDbType.DateTime);
            drugCmd.Parameters.Add("@Updated", SqlDbType.DateTime);
            drugCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            drugCmd.Parameters.Add("@Name", SqlDbType.VarChar, 50);
            drugCmd.Parameters.Add("@Description", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Case_Number", SqlDbType.VarChar, 20);
            drugCmd.Parameters.Add("@Unii", SqlDbType.VarChar, 20);
            drugCmd.Parameters.Add("@State", SqlDbType.VarChar, 20);

            drugCmd.Parameters.Add("@Synthesis_reference", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Indication", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Pharmacodynamics", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Mechanism_of_Action", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Toxicity", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Metabolism", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Absorption", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Half_Life", SqlDbType.VarChar, 256);
            drugCmd.Parameters.Add("@Protein_Binding", SqlDbType.VarChar, 64);
            drugCmd.Parameters.Add("@Route_of_Elimination", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Volume_of_Distribution", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Clearance", SqlDbType.VarChar);

            idCmd = new SqlCommand(INSERT_DRUG_ID, conn);
            idCmd.Parameters.Add("@ID", SqlDbType.VarChar, 256);
            idCmd.Parameters.Add("@ALT_ID", SqlDbType.VarChar, 20);

            articleCmd = new SqlCommand(INSERT_DRUG_ARTICLE, conn);

            articleCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            articleCmd.Parameters.Add("@Pubmed_ID", SqlDbType.VarChar, 256);
            articleCmd.Parameters.Add("@Citation", SqlDbType.VarChar, 20);

            linkCmd = new SqlCommand(INSERT_DRUG_LINK, conn);
            linkCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            linkCmd.Parameters.Add("@Title", SqlDbType.VarChar, 256);
            linkCmd.Parameters.Add("@URL", SqlDbType.VarChar, 64);

            interactionCmd = new SqlCommand(INSERT_DRUG_INTERACTION, conn);

            interactionCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            interactionCmd.Parameters.Add("@Interaction_ID", SqlDbType.VarChar, 20);
            interactionCmd.Parameters.Add("@Description", SqlDbType.VarChar, 256);

            productCmd = new SqlCommand(INSERT_DRUG_PRODUCT, conn);

            productCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Name", SqlDbType.VarChar, 128);
            productCmd.Parameters.Add("@Labeller", SqlDbType.VarChar, 64);
            productCmd.Parameters.Add("@NDC_ID", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@NDC_Product_Code", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@DPD_ID", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@EMA_Product_Code", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@EMA_MA_Number", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Started_Marketing_On", SqlDbType.DateTime2, 20);
            productCmd.Parameters.Add("@Ended_Marketing_On", SqlDbType.DateTime2, 20);
            productCmd.Parameters.Add("@Dosage_Form", SqlDbType.VarChar, 64);
            productCmd.Parameters.Add("@Strength", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Route", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@FDA_Application_Number", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Generic", SqlDbType.Bit);
            productCmd.Parameters.Add("@Over_the_Counter", SqlDbType.Bit);
            productCmd.Parameters.Add("@Approved", SqlDbType.Bit);
            productCmd.Parameters.Add("@Country", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Source", SqlDbType.VarChar, 20);


            mixtureCmd = new SqlCommand(INSERT_DRUG_MIXTURE, conn);

            mixtureCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            mixtureCmd.Parameters.Add("@Name", SqlDbType.VarChar, 64);
            mixtureCmd.Parameters.Add("@Ingredients", SqlDbType.VarChar, 64);

            packagerCmd = new SqlCommand(INSERT_DRUG_PACKAGER, conn);

            packagerCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            packagerCmd.Parameters.Add("@Name", SqlDbType.VarChar, 64);
            packagerCmd.Parameters.Add("@URL", SqlDbType.VarChar, 64);


            priceCmd = new SqlCommand(INSERT_DRUG_PRICE, conn);

            priceCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            priceCmd.Parameters.Add("@Description", SqlDbType.VarChar, 128);
            priceCmd.Parameters.Add("@Cost", SqlDbType.Decimal);
            priceCmd.Parameters.Add("@Currency", SqlDbType.VarChar,20);
            priceCmd.Parameters.Add("@Unit", SqlDbType.VarChar, 20);


            categoryCmd = new SqlCommand(INSERT_DRUG_CATEGORY, conn);

            categoryCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            categoryCmd.Parameters.Add("@Category", SqlDbType.VarChar, 128);
            categoryCmd.Parameters.Add("@Mesh_ID", SqlDbType.VarChar, 20);

            organismCmd = new SqlCommand(INSERT_DRUG_ORGANISM, conn);

            organismCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            organismCmd.Parameters.Add("@Organism", SqlDbType.VarChar, 128);

            patentCmd = new SqlCommand(INSERT_DRUG_PATENT, conn);

            patentCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            patentCmd.Parameters.Add("@Number", SqlDbType.VarChar, 20);
            patentCmd.Parameters.Add("@Country", SqlDbType.VarChar, 20);
            patentCmd.Parameters.Add("@Approved", SqlDbType.DateTime2);
            patentCmd.Parameters.Add("@Expires", SqlDbType.DateTime2);
            patentCmd.Parameters.Add("@Pediatric_Extension", SqlDbType.Bit);

            sequenceCmd = new SqlCommand(INSERT_DRUG_SEQUENCE, conn);

            sequenceCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            sequenceCmd.Parameters.Add("@Format", SqlDbType.VarChar, 20);
            sequenceCmd.Parameters.Add("@Sequence", SqlDbType.VarChar);
            sequenceCmd.Parameters.Add("@Type", SqlDbType.VarChar, 20);

            propertyCmd = new SqlCommand(INSERT_DRUG_PROPERTY, conn);

            propertyCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            propertyCmd.Parameters.Add("@Kind", SqlDbType.VarChar, 20);
            propertyCmd.Parameters.Add("@Value", SqlDbType.VarChar, 20);
            propertyCmd.Parameters.Add("@Source", SqlDbType.VarChar, 20);

            identifierCmd = new SqlCommand(INSERT_DRUG_IDENTIFIER, conn);

            identifierCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            identifierCmd.Parameters.Add("@Resource", SqlDbType.VarChar, 64);
            identifierCmd.Parameters.Add("@Identifier", SqlDbType.VarChar, 64);

            enzymCmd = new SqlCommand(INSERT_DRUG_ENZYM, conn);

            enzymCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            enzymCmd.Parameters.Add("@UniprotID", SqlDbType.VarChar, 20);

            XmlReader reader = XmlReader.Create(filename);
            while (!reader.EOF)
            {
                if (reader.Name != "drug")
                {
                    reader.ReadToFollowing("drug");
                }
                if (!reader.EOF)
                {
                    XElement drug = (XElement)XElement.ReadFrom(reader);
                    string primaryID = (string)drug.Elements().Where(x => (x.Name.LocalName == "drugbank-id") && (x.Attribute("primary") != null)).FirstOrDefault();

                    AddDrug(conn, drug, primaryID);
                    AddArticles(conn, drug, primaryID);
                    AddInteractions(conn, drug, primaryID);
                    AddProducts(conn, drug, primaryID);
                    AddMixtures(conn, drug, primaryID);
                    AddPackagers(conn, drug, primaryID);
                    AddPrices(conn, drug, primaryID);
                    AddCategories(conn, drug, primaryID);
                    AddOrganisms(conn, drug, primaryID);
                    AddPatents(conn, drug, primaryID);
                    AddSequences(conn, drug, primaryID);
                    AddProperties(conn, drug, primaryID);
                    AddIdentifiers(conn, drug, primaryID);
                    AddEnzymes(conn, drug, primaryID);
                }
            }

        }

这篇关于通过非制表符分隔符的非结构化 XML?的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆