C# 网页抓取器

VB C#

using IronWebScraper;

public class Program
{
    private static void Main(string[] args)
    {
        var ScrapeJob = new BlogScraper();
        ScrapeJob.Start();
    }
}

public class BlogScraper : WebScraper
{
    public override void Init()
    {
        LoggingLevel = LogLevel.All;
        Request("https://www.zyte.com/blog/", Parse);
    }

    public override void Parse(Response response)
    {
        foreach (HtmlNode title_link in response.Css(".oxy-post-title"))
        {
            string strTitle = title_link.TextContentClean;
            Scrape(new ScrapedData() { { "Title", strTitle } });
        }

        if (response.CssExists("div.oxy-easy-posts-pages > a[href]"))
        {
            string next_page = response.Css("div.oxy-easy-posts-pages > a[href]")[0].Attributes["href"];
            Request(next_page, Parse);
        }
    }
}

Imports IronWebScraper

Public Class Program
	Public Shared Sub Main(ByVal args() As String)
		Dim ScrapeJob = New BlogScraper()
		ScrapeJob.Start()
	End Sub
End Class

Public Class BlogScraper
	Inherits WebScraper

	Public Overrides Sub Init()
		LoggingLevel = LogLevel.All
		Request("https://www.zyte.com/blog/", AddressOf Parse)
	End Sub

	Public Overrides Sub Parse(ByVal response As Response)
		For Each title_link As HtmlNode In response.Css(".oxy-post-title")
			Dim strTitle As String = title_link.TextContentClean
			Scrape(New ScrapedData() From {
				{ "Title", strTitle }
			})
		Next title_link

		If response.CssExists("div.oxy-easy-posts-pages > a[href]") Then
			Dim next_page As String = response.Css("div.oxy-easy-posts-pages > a[href]")(0).Attributes("href")
			Request(next_page, AddressOf Parse)
		End If
	End Sub
End Class

Install-Package IronWebScraper

C# 网页抓取器

Name: IronWebScraper
Brand: Iron Software
Availability: InStock
Rating: 4.72 (37 reviews)

IronWebScraper 提供了一个强大的框架，使用 C# 代码从网站提取数据和文件。

使用NuGet将IronWebScraper安装到您的项目中
创建一个扩展WebScraper的类
创建一个Init方法，该方法使用Request方法解析至少一个URL。
创建一个Parse方法来处理请求，并进一步Request更多页面。使用 response.Css 和 jQuery 风格的 CSS 选择器来操作 HTML 元素。
在您的应用程序中，请创建一个网页抓取类的实例并调用Start();方法。
阅读我们的C# 网络抓取教程，学习如何使用 IronWebScraper 创建高级网络爬虫