C# 網頁抓取工具
IronWebScraper 提供了一個強大的框架,使用 C# 代碼從網站提取數據和文件。
using IronWebScraper; public class Program { private static void Main(string[] args) { var ScrapeJob = new BlogScraper(); ScrapeJob.Start(); } } public class BlogScraper : WebScraper { public override void Init() { LoggingLevel = LogLevel.All; Request("https://www.zyte.com/blog/", Parse); } public override void Parse(Response response) { foreach (HtmlNode title_link in response.Css(".oxy-post-title")) { string strTitle = title_link.TextContentClean; Scrape(new ScrapedData() { { "Title", strTitle } }); } if (response.CssExists("div.oxy-easy-posts-pages > a[href]")) { string next_page = response.Css("div.oxy-easy-posts-pages > a[href]")[0].Attributes["href"]; Request(next_page, Parse); } } }
Imports IronWebScraper Public Class Program Public Shared Sub Main(ByVal args() As String) Dim ScrapeJob = New BlogScraper() ScrapeJob.Start() End Sub End Class Public Class BlogScraper Inherits WebScraper Public Overrides Sub Init() LoggingLevel = LogLevel.All Request("https://www.zyte.com/blog/", AddressOf Parse) End Sub Public Overrides Sub Parse(ByVal response As Response) For Each title_link As HtmlNode In response.Css(".oxy-post-title") Dim strTitle As String = title_link.TextContentClean Scrape(New ScrapedData() From { { "Title", strTitle } }) Next title_link If response.CssExists("div.oxy-easy-posts-pages > a[href]") Then Dim next_page As String = response.Css("div.oxy-easy-posts-pages > a[href]")(0).Attributes("href") Request(next_page, AddressOf Parse) End If End Sub End Class
Install-Package IronWebScraper
IronWebScraper 提供了一個強大的框架,使用 C# 代碼從網站提取數據和文件。