Phantomjs Automation of a website leads me to getting IP blocked -
i'm using phantomjs automate page. is:
do{ console.log(i); i++; page.open(url); { phantom.page.sendevent('mousemove'); } while (page.loading); if(page.injectjs('./search.js') == false){ console.log("search.js failed") } var links = page.evaluate(function(json){ return search(json) },json) console.log(links); } while(links == "") so leads me opening website repeated until i'm looking appears. leads me getting ip banned. can around this?
your ip getting banned because script generates many requests website in little time. so, need throttle requests, apply pause between them.
i rewrite script this:
var page = require('webpage').create(); var url = "http://www.website.tld/"; var json = {"some" : "json"}; var = 0; var links; // abstract main code function can call // again , again function getlinks (url, json) { i++; console.log(i); page.open(url); { phantom.page.sendevent('mousemove'); } while (page.loading); if(page.injectjs('./search.js') == false){ console.log("search.js failed") } var links = page.evaluate(function(json){ return search(json); }, json); if(links == "") { // no links scraped yet, wait 3 seconds , try again settimeout(function(){ getlinks(url, json); }, 3000) } else { console.log(links); phantom.exit(); } } getlinks(url, json);
Comments
Post a Comment