有一个错误Traverse()
这导致它多次迭代节点。
被窃听的代码
public IEnumerable<HtmlNode> Traverse()
{
foreach (var node in _context)
{
yield return node;
foreach (var child in Children().Traverse())
yield return child;
}
}
public SharpQuery Children()
{
return new SharpQuery(_context.SelectMany(n => n.ChildNodes).Where(n => n.NodeType == HtmlNodeType.Element), this);
}
public SharpQuery(IEnumerable<HtmlNode> nodes, SharpQuery previous = null)
{
if (nodes == null) throw new ArgumentNullException("nodes");
_previous = previous;
_context = new List<HtmlNode>(nodes);
}
测试代码
static void Main(string[] args)
{
var sq = new SharpQuery(@"
<a>
<b>
<c/>
<d/>
<e/>
<f>
<g/>
<h/>
<i/>
</f>
</b>
</a>");
var nodes = sq.Traverse();
Console.WriteLine("{0} nodes: {1}", nodes.Count(), string.Join(",", nodes.Select(n => n.Name)));
Console.ReadLine();
Output
19 个节点:#document,a,b,c,g,h,i,d,g,h,i,e,g,h,i,f,g,h,i
预期输出
每个字母 a-i 打印一次。
似乎无法弄清楚哪里出了问题......node.ChildNodes
does返回的只是直接子项吧? (来自 HtmlAgilityPack)
如果您想自己尝试运行,请参加完整课程(精简版)。
public class SQLite
{
private readonly List<HtmlNode> _context = new List<HtmlNode>();
private readonly SQLite _previous = null;
public SQLite(string html)
{
var doc = new HtmlDocument();
doc.LoadHtml(html);
_context.Add(doc.DocumentNode);
}
public SQLite(IEnumerable<HtmlNode> nodes, SQLite previous = null)
{
if (nodes == null) throw new ArgumentNullException("nodes");
_previous = previous;
_context = new List<HtmlNode>(nodes);
}
public IEnumerable<HtmlNode> Traverse()
{
foreach (var node in _context)
{
yield return node;
foreach (var child in Children().Traverse())
yield return child;
}
}
public SQLite Children()
{
return new SQLite(_context.SelectMany(n => n.ChildNodes).Where(n => n.NodeType == HtmlNodeType.Element), this);
}
}