性能的XDocument [英] XDocument performance

查看:148
本文介绍了性能的XDocument的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

当你创建一个新的的XDocument 使用 XDocument.Load ,它打开XMLFILE并保留本地副本呢,还是连续读取从硬盘驱动器的文件?如果它连续读取,有没有解析XML更快的方法?



 的XDocument X = XDocument.Load(file.xml ); 


解决方案

有一对夫妇的测量来考虑:




  1. 线性运行速度(如阅读/加载)

  2. 在按需查询速度



要回答这个直接的问题:的XDocument 使用了的XmlReader 来读取每个元素,创造相应的的XElement 实例文档加载到内存中(见下面的代码)。因此,它应该是相当快(速度不够快,对于大多数用途),但分析大量文件时,它可能会消耗大量的内存。



一个生的XmlReader 是,如果你的需求是有限的,对于穿越一个很好的选择,可以不保留在内存中的文件来完成。因为没有创建显著结构,也不符合与其他节点(例如连接父和子节点)解决它的表现将优于其他方法。但是,按需查询能力几乎是不存在的;你可以应对的每个节点中值,但无法查询该文档作为一个整体。如果你需要看文件第二次,则必须再次遍历整个事情。



相比之下,一个的XDocument 将需要更长的时间来遍历,因为它实例化新对象,并执行基本结构的任务。它还将消耗存储器相称源的大小。为了换取这些权衡,你获得优良的查询能力。



这可能给作为方法的由乔恩斯基特提及,如下所示:的洒进的LINQ to XML使用C#自定义迭代器和XmlReader中



源的XDocument负荷()

 公共静态的XDocument负载(流流,LoadOptions选项)
{
XmlReaderSettings xmlReaderSettings = XNode.GetXmlReaderSettings(选件);
的XDocument结果;
使用(XmlReader中的XMLReader = XmlReader.Create(流xmlReaderSettings))
{
结果= XDocument.Load(XmlReader中,期权);
}
返回结果;
}

//它调用...

公共静态负载的XDocument(XmlReader中的读者,LoadOptions选项)
{
如果(读者== NULL)
{
抛出新的ArgumentNullException(读者);
}
如果(reader.ReadState == ReadState.Initial)
{
reader.Read();
}
的XDocument的XDocument =新的XDocument();
如果((选项和放大器;!LoadOptions.SetBaseUri)= LoadOptions.None)
{
串的基本URI = reader.BaseURI;
如果(基本URI =空&放大器;!&放大器; baseURI.Length!= 0)
{
xDocument.SetBaseUri(基本URI);
}
}
如果((选项和放大器;!LoadOptions.SetLineInfo)= LoadOptions.None)
{
IXmlLineInfo xmlLineInfo =阅读器IXmlLineInfo;
如果(xmlLineInfo = NULL&放大器;!&安培; xmlLineInfo.HasLineInfo())
{
xDocument.SetLineInfo(xmlLineInfo.LineNumber,xmlLineInfo.LinePosition);
}
}
如果(reader.NodeType == XmlNodeType.XmlDeclaration)
{
xDocument.Declaration =新XDeclaration(读卡器);
}
xDocument.ReadContentFrom(读卡器,期权);
如果
{
抛出新的InvalidOperationException异常(Res.GetString(InvalidOperation_ExpectedEndOfFile))(reader.EOF!);
}
如果(xDocument.Root == NULL)
{
抛出新的InvalidOperationException异常(Res.GetString(InvalidOperation_MissingRoot));
}
返回的XDocument;
}

//它调用...

内部空隙ReadContentFrom(XmlReader中R,LoadOptions O)
{
如果((的O&(LoadOptions.SetBaseUri | LoadOptions.SetLineInfo))== LoadOptions.None)
{
this.ReadContentFrom(R);
的回报;
}
如果(r.ReadState = ReadState.Interactive!)
{
抛出新的InvalidOperationException异常(Res.GetString(InvalidOperation_ExpectedInteractive));
}
XContainer xContainer =这一点;
XNode xNode = NULL;
NamespaceCache namespaceCache =默认(NamespaceCache);
NamespaceCache namespaceCache2 =默认(NamespaceCache);
字符串文本=((的O& LoadOptions.SetBaseUri)= LoadOptions.None!)? r.BaseURI:空;
IXmlLineInfo xmlLineInfo =((的O& LoadOptions.SetLineInfo)= LoadOptions.None!)? (R为IXmlLineInfo):空;
,而(真)
{
串的基本URI = r.BaseURI;
开关(r.NodeType)
{
情况下XmlNodeType.Element:
{
的XElement的XElement =新的XElement(namespaceCache.Get(r.NamespaceURI).GetName( r.LocalName));
如果(文= NULL&放大器;!&安培;文本=基本URI!)
{
xElement.SetBaseUri(基本URI);
}
如果(xmlLineInfo = NULL&放大器;!&安培; xmlLineInfo.HasLineInfo())
{
xElement.SetLineInfo(xmlLineInfo.LineNumber,xmlLineInfo.LinePosition);
}
如果(r.MoveToFirstAttribute())
{

{
XAttribute xAttribute =新XAttribute(namespaceCache2.Get((r.Prefix 。长度== 0)的String.Empty:r.NamespaceURI).GetName(r.LocalName),r.Value);
如果(xmlLineInfo = NULL&放大器;!&安培; xmlLineInfo.HasLineInfo())
{
xAttribute.SetLineInfo(xmlLineInfo.LineNumber,xmlLineInfo.LinePosition);
}
xElement.AppendAttributeSkipNotify(xAttribute);
}
而(r.MoveToNextAttribute());
r.MoveToElement();
}
xContainer.AddNodeSkipNotify(的XElement);
如果(r.IsEmptyElement)
{
转到IL_30A;
}
xContainer =的XElement;
如果(文字!= NULL)
{
文本=基本URI;
转到IL_30A;
}
转到IL_30A;
}
情况下XmlNodeType.Text:
情况下XmlNodeType.Whitespace:
情况下XmlNodeType.SignificantWhitespace:!
如果((文= NULL&放大器;&安培;文本=基本URI)||(xmlLineInfo = NULL&放大器;!&安培; xmlLineInfo.HasLineInfo()))
{
xNode =新XTEXT(r.Value);
转到IL_30A;
}
xContainer.AddStringSkipNotify(r.Value);
转到IL_30A;
情况下XmlNodeType.CDATA:
xNode =新XCData(r.Value);
转到IL_30A;
情况下XmlNodeType.EntityReference:
如果(r.CanResolveEntity!)
{
转到Block_25;
}
r.ResolveEntity();
转到IL_30A;
情况下XmlNodeType.ProcessingInstruction:
xNode =新XProcessingInstruction(r.Name,r.Value);
转到IL_30A;
情况下XmlNodeType.Comment:
xNode =新XComment(r.Value);
转到IL_30A;
情况下XmlNodeType.DocumentType:
xNode =新XDocumentType(r.LocalName,r.GetAttribute(公用),r.GetAttribute(SYSTEM),r.Value,r.DtdInfo);
转到IL_30A;
情况下XmlNodeType.EndElement:
{
如果(xContainer.content == NULL)
{
xContainer.content =的String.Empty;
}
的XElement xElement2 = xContainer为的XElement;
如果(xElement2 = NULL&放大器;!&安培; xmlLineInfo = NULL&放大器;!&安培; xmlLineInfo.HasLineInfo())
{
xElement2.SetEndElementLineInfo(xmlLineInfo.LineNumber,xmlLineInfo.LinePosition);
}
如果(xContainer ==本)
{
的回报;
}
如果(文= NULL&放大器;!&安培; xContainer.HasBaseUri)
{
文本= xContainer.parent.BaseUri;
}
xContainer = xContainer.parent;
转到IL_30A;
}
情况下XmlNodeType.EndEntity:
转到IL_30A;
}
中断;
IL_30A:
如果(xNode!= NULL)
{
如果(文= NULL&放大器;&安培;文本=基本URI!)
{
xNode.SetBaseUri(基本URI);
}
如果(xmlLineInfo = NULL&放大器;!&安培; xmlLineInfo.HasLineInfo())
{
xNode.SetLineInfo(xmlLineInfo.LineNumber,xmlLineInfo.LinePosition);
}
xContainer.AddNodeSkipNotify(xNode);
xNode = NULL;
}
如果
{
回报(r.Read()!);
}
}
转到IL_2E1;
Block_25:
抛出新的InvalidOperationException异常(Res.GetString(InvalidOperation_UnresolvedEntityReference));
IL_2E1:
抛出新的InvalidOperationException异常(Res.GetString(InvalidOperation_UnexpectedNodeType,新的对象[]
{
r.NodeType
}));
}


When you create a new XDocument using XDocument.Load, does it open the XMLfile and keep a local copy, or does it continuously read the document from the hard drive? If it does continuously read, is there a faster way to parse XML?

XDocument x = XDocument.Load("file.xml");

解决方案

There are a couple of measurements to consider:

  1. Linear traversal speed (e.g. reading/loading)
  2. On-demand query speed

To answer the immediate question: XDocument uses an XmlReader to load the document into memory by reading each element and creating corresponding XElement instances (see code below). As such, it should be quite fast (fast enough for most purposes), but it may consume a large amount of memory when parsing a large document.

A raw XmlReader is an excellent choice for traversal if your needs are limited to that which can be done without retaining the document in memory. It will outperform other methods since no significant structure is created nor resolved with relation to other nodes (e.g. linking parent and child nodes). However, on-demand query ability is almost non-existent; you can react to values found in each node, but you can't query the document as a whole. If you need to look at the document a second time, you have to traverse the whole thing again.

By comparison, an XDocument will take longer to traverse because it instantiates new objects and performs basic structural tasks. It will also consume memory proportionate to the size of the source. In exchange for these trade-offs, you gain excellent query abilities.

It may be possible to combine the approaches, as mentioned by Jon Skeet and shown here: Streaming Into LINQ to XML Using C# Custom Iterators and XmlReader.

Source for XDocument Load()

public static XDocument Load(Stream stream, LoadOptions options)
{
    XmlReaderSettings xmlReaderSettings = XNode.GetXmlReaderSettings(options);
    XDocument result;
    using (XmlReader xmlReader = XmlReader.Create(stream, xmlReaderSettings))
    {
        result = XDocument.Load(xmlReader, options);
    }
    return result;
}

// which calls...

public static XDocument Load(XmlReader reader, LoadOptions options)
{
    if (reader == null)
    {
        throw new ArgumentNullException("reader");
    }
    if (reader.ReadState == ReadState.Initial)
    {
        reader.Read();
    }
    XDocument xDocument = new XDocument();
    if ((options & LoadOptions.SetBaseUri) != LoadOptions.None)
    {
        string baseURI = reader.BaseURI;
        if (baseURI != null && baseURI.Length != 0)
        {
            xDocument.SetBaseUri(baseURI);
        }
    }
    if ((options & LoadOptions.SetLineInfo) != LoadOptions.None)
    {
        IXmlLineInfo xmlLineInfo = reader as IXmlLineInfo;
        if (xmlLineInfo != null && xmlLineInfo.HasLineInfo())
        {
            xDocument.SetLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
        }
    }
    if (reader.NodeType == XmlNodeType.XmlDeclaration)
    {
        xDocument.Declaration = new XDeclaration(reader);
    }
    xDocument.ReadContentFrom(reader, options);
    if (!reader.EOF)
    {
        throw new InvalidOperationException(Res.GetString("InvalidOperation_ExpectedEndOfFile"));
    }
    if (xDocument.Root == null)
    {
        throw new InvalidOperationException(Res.GetString("InvalidOperation_MissingRoot"));
    }
    return xDocument;
}

// which calls...

internal void ReadContentFrom(XmlReader r, LoadOptions o)
{
    if ((o & (LoadOptions.SetBaseUri | LoadOptions.SetLineInfo)) == LoadOptions.None)
    {
        this.ReadContentFrom(r);
        return;
    }
    if (r.ReadState != ReadState.Interactive)
    {
        throw new InvalidOperationException(Res.GetString("InvalidOperation_ExpectedInteractive"));
    }
    XContainer xContainer = this;
    XNode xNode = null;
    NamespaceCache namespaceCache = default(NamespaceCache);
    NamespaceCache namespaceCache2 = default(NamespaceCache);
    string text = ((o & LoadOptions.SetBaseUri) != LoadOptions.None) ? r.BaseURI : null;
    IXmlLineInfo xmlLineInfo = ((o & LoadOptions.SetLineInfo) != LoadOptions.None) ? (r as IXmlLineInfo) : null;
    while (true)
    {
        string baseURI = r.BaseURI;
        switch (r.NodeType)
        {
        case XmlNodeType.Element:
        {
            XElement xElement = new XElement(namespaceCache.Get(r.NamespaceURI).GetName(r.LocalName));
            if (text != null && text != baseURI)
            {
                xElement.SetBaseUri(baseURI);
            }
            if (xmlLineInfo != null && xmlLineInfo.HasLineInfo())
            {
                xElement.SetLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
            }
            if (r.MoveToFirstAttribute())
            {
                do
                {
                    XAttribute xAttribute = new XAttribute(namespaceCache2.Get((r.Prefix.Length == 0) ? string.Empty : r.NamespaceURI).GetName(r.LocalName), r.Value);
                    if (xmlLineInfo != null && xmlLineInfo.HasLineInfo())
                    {
                        xAttribute.SetLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
                    }
                    xElement.AppendAttributeSkipNotify(xAttribute);
                }
                while (r.MoveToNextAttribute());
                r.MoveToElement();
            }
            xContainer.AddNodeSkipNotify(xElement);
            if (r.IsEmptyElement)
            {
                goto IL_30A;
            }
            xContainer = xElement;
            if (text != null)
            {
                text = baseURI;
                goto IL_30A;
            }
            goto IL_30A;
        }
        case XmlNodeType.Text:
        case XmlNodeType.Whitespace:
        case XmlNodeType.SignificantWhitespace:
            if ((text != null && text != baseURI) || (xmlLineInfo != null && xmlLineInfo.HasLineInfo()))
            {
                xNode = new XText(r.Value);
                goto IL_30A;
            }
            xContainer.AddStringSkipNotify(r.Value);
            goto IL_30A;
        case XmlNodeType.CDATA:
            xNode = new XCData(r.Value);
            goto IL_30A;
        case XmlNodeType.EntityReference:
            if (!r.CanResolveEntity)
            {
                goto Block_25;
            }
            r.ResolveEntity();
            goto IL_30A;
        case XmlNodeType.ProcessingInstruction:
            xNode = new XProcessingInstruction(r.Name, r.Value);
            goto IL_30A;
        case XmlNodeType.Comment:
            xNode = new XComment(r.Value);
            goto IL_30A;
        case XmlNodeType.DocumentType:
            xNode = new XDocumentType(r.LocalName, r.GetAttribute("PUBLIC"), r.GetAttribute("SYSTEM"), r.Value, r.DtdInfo);
            goto IL_30A;
        case XmlNodeType.EndElement:
        {
            if (xContainer.content == null)
            {
                xContainer.content = string.Empty;
            }
            XElement xElement2 = xContainer as XElement;
            if (xElement2 != null && xmlLineInfo != null && xmlLineInfo.HasLineInfo())
            {
                xElement2.SetEndElementLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
            }
            if (xContainer == this)
            {
                return;
            }
            if (text != null && xContainer.HasBaseUri)
            {
                text = xContainer.parent.BaseUri;
            }
            xContainer = xContainer.parent;
            goto IL_30A;
        }
        case XmlNodeType.EndEntity:
            goto IL_30A;
        }
        break;
        IL_30A:
        if (xNode != null)
        {
            if (text != null && text != baseURI)
            {
                xNode.SetBaseUri(baseURI);
            }
            if (xmlLineInfo != null && xmlLineInfo.HasLineInfo())
            {
                xNode.SetLineInfo(xmlLineInfo.LineNumber, xmlLineInfo.LinePosition);
            }
            xContainer.AddNodeSkipNotify(xNode);
            xNode = null;
        }
        if (!r.Read())
        {
            return;
        }
    }
    goto IL_2E1;
    Block_25:
    throw new InvalidOperationException(Res.GetString("InvalidOperation_UnresolvedEntityReference"));
    IL_2E1:
    throw new InvalidOperationException(Res.GetString("InvalidOperation_UnexpectedNodeType", new object[]
    {
        r.NodeType
    }));
}

这篇关于性能的XDocument的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆