C# Web Scraper

C# + VB.Net: C# Web Scraper C# Web Scraper
using IronWebScraper;

namespace WebScrapingProject
{
    class MainClass
    {
        public static void Main(string[] args)
        {
            var scraper = new BlogScraper();
            scraper.Start();
        }
    }

    class BlogScraper : WebScraper
    {
        public override void Init()
        {
            this.LoggingLevel = WebScraper.LogLevel.All;
            this.Request("https://blog.scrapinghub.com", Parse);
        }

        public override void Parse(Response response)
        {
            foreach (var title_link in response.Css("h2.entry-title a"))
            {
                string strTitle = title_link.TextContentClean;
                Scrape(new ScrapedData() { { "Title", strTitle } });
            }

            if (response.CssExists("div.prev-post > a[href]"))
            {
                var next_page = response.Css("div.prev-post > a[href]")[0].Attributes["href"];
                this.Request(next_page, Parse);
            }
        }
    }
}
Imports IronWebScraper

Namespace WebScrapingProject
	Friend Class MainClass
		Public Shared Sub Main(ByVal args() As String)
			Dim scraper = New BlogScraper()
			scraper.Start()
		End Sub
	End Class

	Friend Class BlogScraper
		Inherits WebScraper

		Public Overrides Sub Init()
			Me.LoggingLevel = WebScraper.LogLevel.All
			Me.Request("https://blog.scrapinghub.com", AddressOf Parse)
		End Sub

		Public Overrides Sub Parse(ByVal response As Response)
			For Each title_link In response.Css("h2.entry-title a")
				Dim strTitle As String = title_link.TextContentClean
				Scrape(New ScrapedData() From {
					{ "Title", strTitle }
				})
			Next title_link

			If response.CssExists("div.prev-post > a[href]") Then
				Dim next_page = response.Css("div.prev-post > a[href]")(0).Attributes("href")
				Me.Request(next_page, AddressOf Parse)
			End If
		End Sub
	End Class
End Namespace

Iron WebScraper provides a powerful framework to extract data and files from websites using C# code.

  1. Install IronWebScraper to your Project using Nuget
  2. Create a Class Extending WebScraper
  3. Create an Init method that uses the Request method to parse at least one URL.
  4. Create a Parse method to process the requests, and indeed Request more pages. Use response.Css to work with HTML elements using jQuery style CSS selectors
  5. In your application please create and instance of your web scraping class and call the Start(); method
  6. Read our C# webscraping tutorials to learn who to create advanced web crawlers using IronWebScraper