Perl proxy leecher
This is probably the worst hack ever. I received this from an Austrian freelancer programmer in Spring 2006 and is pretty much unreadable if you don't know how HTML::Parser works. The script itself works, but is very slow.
It has full cookie and referrer spoofing support to leech on sites using one of these methods for protection. It might be a good start if you ever wanted to go through all those proxy leech sites automatically. You can use its source code to build a faster proxy leecher.
#!C:/Perl/bin/Perl -w
use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;
use HTTP::Cookies;
use HTML::Parser 3.00 ();
my @sitelist = ("http://www.proxy4free.com",
"http://www.samair.ru/proxy/",
"http://www.proxylist.com.ru/",
"http://publicproxyservers.com",
"http://www.proxyleech.com/proxylist.txt",
"http://www.proxy4free.com/page1.html",
"http://proxyz.net/index.php?menu=anonproxys",
"http://www.publicproxyservers.com/page1.html",
"http://www.anonymitychecker.com",
"http://www.proxylist.com.ru/proxy-server/anonymous-proxy-1-2/",
"http://www.anonymitychecker.com/page1.html",
"http://www.publicproxyservers.com/page3.html",
"http://www.proxy4free.com/page2.html",
"http://www.my-proxy.com/list/index.php?list=s1",
"http://proxyz.net/index.php?menu=anonproxys&page=5",
"http://www.proxylist.com.ru/proxy-server/anonymous-proxy-1-4/",
"http://www.my-proxy.com/list/index.php?list=1",
"http://www.samair.ru/proxy/proxy-02.htm",
"http://www.freeproxylists.com/rss",
"http://proxyz.net/index.php?menu=anonproxys&page=2",
"http://www.proxy4free.com/page4.html",
"http://www.cybersyndrome.net/pla3.html",
"http://www.anonymitychecker.com/page4.html",
"http://www.proxylist.com.ru/proxy-server/anonymous-proxy-1-3/",
"http://www.publicproxyservers.com/page2.html",
"http://free-proxy-servers.com/free_proxies.php",
"http://www.my-proxy.com/list/index.php?list=2",
"http://www.anonymitychecker.com/page2.html",
"http://www.proxylist.com.ru/proxy-server/anonymous-proxy-1-5/",
"http://www.proxyforest.com/proxy.htm",
"http://www.anonymitychecker.com/page5.html",
"http://www.publicproxyservers.com/page5.html",
"http://www.my-proxy.com/list/index.php?list=s2",
"http://www.proxy4free.com/page3.html",
"http://proxyz.net/index.php?menu=anonproxys&page=3",
"http://www.my-proxy.com/list/index.php?list=3",
"http://www.proxylist.com.ru/proxy-server/anonymous-proxy-1-1/",
"http://www.anonymitychecker.com/page6.html",
"http://www.steganos.com/?area=updateproxylist",
"http://www.anonymitychecker.com/page3.html",
"http://www.publicproxyservers.com/page4.html",
"http://www.proxy4free.com/page5.html",
"http://proxyz.net/index.php?menu=anonproxys&page=4",
"http://eliteproxy.us/proxy.php",
"http://theone.ru/proxy/",
"http://proxy.mazafaka.ru/",
"http://proxy.mazafaka.ru/?c=all&t=all&m=5&checked=y",
"http://nntime.com/",
"http://nntime.com/index.php?start=51",
"http://nntime.com/index.php?start=101",
"http://nntime.com/index.php?start=151",
"http://nntime.com/index.php?start=201",
"http://www.freeproxy.ru/",
"http://www.freeproxy.ru/en/free_proxy/get.htm",
"http://www.freeproxy.ru/download/lists/goodproxy.txt",
"http://www.digitalcybersoft.com/ProxyList/",
"http://www.digitalcybersoft.com/ProxyList/fresh-proxy-list.shtml",
"http://www.digitalcybersoft.com/ProxyList/fresh-proxy-list.shtml?L3");
my @refererlist = ("http://www.proxy4free.com",
"http://www.samair.ru/proxy/",
"http://www.proxylist.com.ru/",
"http://publicproxyservers.com",
"http://www.proxyleech.com",
"http://www.proxy4free.com/page1.html",
"http://proxyz.net/",
"http://www.publicproxyservers.com/",
"http://www.anonymitychecker.com",
"http://www.proxylist.com.ru/",
"http://www.anonymitychecker.com",
"http://www.publicproxyservers.com/",
"http://www.proxy4free.com/",
"http://www.my-proxy.com/",
"http://proxyz.net/index.php",
"http://www.proxylist.com.ru/",
"http://www.my-proxy.com/",
"http://www.samair.ru/proxy/",
"http://www.freeproxylists.com/",
"http://proxyz.net/index.php",
"http://www.proxy4free.com",
"http://www.cybersyndrome.net",
"http://www.anonymitychecker.com",
"http://www.proxylist.com.ru",
"http://www.publicproxyservers.com",
"http://free-proxy-servers.com",
"http://www.my-proxy.com/",
"http://www.anonymitychecker.com/",
"http://www.proxylist.com.ru/proxy-server/",
"http://www.proxyforest.com/",
"http://www.anonymitychecker.com/",
"http://www.publicproxyservers.com//",
"http://www.my-proxy.com/list/index.php",
"http://www.proxy4free.com/page3.html",
"http://proxyz.net/index.php?menu=anonproxys&page=3",
"http://www.my-proxy.com/list/index.php?list=3",
"http://www.proxylist.com.ru/proxy-server/anonymous-proxy-1-5/",
"http://www.anonymitychecker.com/page4.html",
"http://www.steganos.com/",
"http://www.anonymitychecker.com/page1.html",
"http://www.publicproxyservers.com/page2.html",
"http://www.proxy4free.com/page2.html",
"http://proxyz.net/index.php?menu=anonproxys&page=2",
"http://eliteproxy.us/proxy.php",
"http://theone.ru/",
"http://theone.ru",
"http://proxy.mazafaka.ru/",
"http://nntime.com/",
"http://nntime.com/",
"http://nntime.com/",
"http://nntime.com/",
"http://nntime.com/",
"http://www.freeproxy.ru/",
"http://www.freeproxy.ru/en/free_proxy/get.htm",
"http://www.freeproxy.ru/download/lists/goodproxy.txt",
"http://www.digitalcybersoft.com/",
"http://www.digitalcybersoft.com/ProxyList/",
"http://www.digitalcybersoft.com/ProxyList/fresh-proxy-list.shtml");
my $target = "C:/Programme/Apache Group/Apache2/htdocs/proxy/proxy.html";
my $rip = "C:/Programme/Apache Group/Apache2/htdocs/proxy/list.html";
my $final ="C:/Programme/Apache Group/Apache2/htdocs/proxy/final.txt";
my %inside;
print "Content-type: text/html\n\n";
# Instanciate
my $request = HTTP::Request->new(GET => $url);
my $cookie_jar = HTTP::Cookies->new;
my $client = LWP::UserAgent->new();
$client->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; iOpus-I-M; .NET CLR 1.1.4322)');
$client->cookie_jar($cookie_jar);
$client->timeout('20');
open (FINAL, ">$final") or die "cant open file $final";
print FINAL "";
close(FINAL);
my $arrayprogress=0;
foreach $page (@sitelist){
$request = HTTP::Request->new(GET => $page);
$request->referer($refererlist[$arrayprogress]);
#my $hash = $client->get($page);
my $hash = $client->request($request);
my $content= $hash->content();
$cookie_jar->extract_cookies($response);
print $cookie_jar->as_string,"<br>\n";
unless ($hash->is_error())
{
open (TARGET, ">$target") or die "cant open file $target";
print TARGET $content;
close(TARGET);
}
open (RIP, ">$rip") or die "cant open file $rip";
HTML::Parser->new(api_version => 3,
handlers => [start => [\&tag, "tagname, '+1'"],
end => [\&tag, "tagname, '-1'"],
text => [\&text, "dtext"],
],
marked_sections => 1,
)->parse_file($target) || die "Can't open file: $!\n";;
close(RIP);
##############################################################################
# open file
open(F,"$rip") or die("$!\n");
# read data
$data = join("", <F>);
# remove comments
close(F);
$data =~ s/#.*$//gm;
# match pairs IP and PORT, it's pretty strict one, not greedy
@matches = ($data =~ /(\d+\.\d+\.\d+\.\d+)[\n\r\s\:]+(\d+)[\n\r\s\:]+/sg);
open (RIP, ">>$final") or die "cant open file $final";
# print them out
while(@matches) {
my $ip=shift(@matches);
my $port=shift(@matches);
print RIP $ip.":".$port."\n";
}
close(RIP);
$arrayprogress++;
}
sub tag
{
my($tag, $num) = @_;
$inside{$tag} += $num;
print " "; # not for all tags
}
sub text
{
return if $inside{script} || $inside{style};
print RIP $_[0];
}