Class HtmlNode
The HtmlNode class represents a single DOM element in a HTML or XML document.
Inheritance
System.Object
HtmlNode
Inherited Members
System.Object.Equals(System.Object)
System.Object.Equals(System.Object, System.Object)
System.Object.GetHashCode()
System.Object.GetType()
System.Object.MemberwiseClone()
System.Object.ToString()
System.Object.ReferenceEquals(System.Object, System.Object)
Assembly: IronWebScraper.dll
Syntax
Properties
Attributes
Gets the attributes of the HTML node (such ash href, class, style etc).
Declaration
public Dictionary<string, string> Attributes { get; }
Property Value
Type |
Description |
System.Collections.Generic.Dictionary<System.String, System.String> |
The attributes as a Dictionary<string, string>
|
ChildNodes
An array of nodes nested within the HtmlNode.
Declaration
public HtmlNode[] ChildNodes { get; }
Property Value
InnerHtml
Gets the HtmlNode's inner Html as a string. OuterHTML will not include the current tag's own html markup, only that nested inside it.
Declaration
public string InnerHtml { get; }
Property Value
Type |
Description |
System.String |
|
InnerText
Gets the text content of the HtmlNode as might be shown to a user.
Text will be trimmed, with all groups of whitespace turned into a single space character.
Declaration
public string InnerText { get; }
Property Value
Type |
Description |
System.String |
|
InnerTextClean
Gets the text content of the HtmlNode as might be shown to a user.
Declaration
public string InnerTextClean { get; }
Property Value
Type |
Description |
System.String |
|
NodeName
Name of the HtmlNode such as "div" "p" or "#text"
Declaration
public string NodeName { get; }
Property Value
Type |
Description |
System.String |
|
NodeType
Type of the HtmlNode
Possible values are: "ELEMENT_NODE", "TEXT_NODE" ,"CDATA_SECTION_NODE" ,"COMMENT_NODE", "DOCUMENT_NODE","DOCUMENT_TYPE_NODE","DOCUMENT_FRAGMENT_NODE"
Declaration
public string NodeType { get; }
Property Value
Type |
Description |
System.String |
|
OuterHtml
Gets the HtmlNode's outer Html as a string. OuterHTML will include the tag's own html markup.
Declaration
public string OuterHtml { get; }
Property Value
Type |
Description |
System.String |
|
ParentNode
Gets the Parent (enclosing) tag for this HtmlNode.
Declaration
public HtmlNode ParentNode { get; }
Property Value
Type |
Description |
HtmlNode |
The parent node as an HtmlNode.
|
TextContent
Gets the text content of the HtmlNode and all of its descendants.
Declaration
public string TextContent { get; }
Property Value
Type |
Description |
System.String |
|
TextContentClean
Gets the text content of the HtmlNode, Trimmed, with all groups of whitespace turned into a single space character.
TextContent will include the content of non-printable elements just as style and script nodes.
Declaration
public string TextContentClean { get; }
Property Value
Type |
Description |
System.String |
|
Methods
Css(String)
Uses CSS selectors to find all child nodes matching selector.
This works in the same way as $('.ClassName') in jQuery or querySelectorAll() in JavaScript.
Declaration
public HtmlNode[] Css(string selector)
Parameters
Type |
Name |
Description |
System.String |
selector |
|
Returns
CssExists(String)
Uses CSS selectors to find if there are any matching nodes within the Response Document.
This works in the same way as $('.ClassName').length > 0 in jQuery or querySelectorAll().length >0 in JavaScript.
Declaration
public bool CssExists(string selector)
Parameters
Type |
Name |
Description |
System.String |
selector |
|
Returns
Type |
Description |
System.Boolean |
|
GetAttribute(String)
Gets a single attribute vale from the HtmlNode. Returns null if that attribute is not present.
Declaration
public string GetAttribute(string Attribute)
Parameters
Type |
Name |
Description |
System.String |
Attribute |
|
Returns
Type |
Description |
System.String |
|
GetElementById(String)
Synonym of JavaScript's getElementById function. Searches inside the current HtmlNode.
Declaration
public HtmlNode GetElementById(string id)
Parameters
Type |
Name |
Description |
System.String |
id |
|
Returns
GetElementsByTagName(String)
Synonym of JavaScript's getElementsByTagName function. Searches inside the current HtmlNode.
Declaration
public HtmlNode[] GetElementsByTagName(string tagName)
Parameters
Type |
Name |
Description |
System.String |
tagName |
|
Returns
HasAtribute(String)
Tests if the HtmlNode has a given attribute such as "class" or "href"
Declaration
public bool HasAtribute(string Attribute)
Parameters
Type |
Name |
Description |
System.String |
Attribute |
|
Returns
Type |
Description |
System.Boolean |
|
QuerySelector(String)
Synonym of JavaScript's querySelector function. Searches inside the current HtmlNode.
Declaration
public HtmlNode QuerySelector(string selector)
Parameters
Type |
Name |
Description |
System.String |
selector |
|
Returns
QuerySelectorAll(String)
Synonym of JavaScript's querySelectorAll function. Searches inside the current HtmlNode.
Declaration
public HtmlNode[] QuerySelectorAll(string selector)
Parameters
Type |
Name |
Description |
System.String |
selector |
|
Returns
XPath(String)
Queries the HtmlNode to return all descendant nodes matching an XPath expression.
Declaration
public HtmlNode[] XPath(string expression)
Parameters
Type |
Name |
Description |
System.String |
expression |
|
Returns