Perl - LWP

perl

First example:

#!/usr/bin/perl
use strict;
use warnings;
use LWP();
my $url = 'http://192.168.10.9:30102/';
my $ua = LWP::UserAgent->new();
my $agent_string = 'Mozilla/4.0 (compatible, MSIE 6.0; 
    Windows NT 5.1; SV1; Maxthon; .NET CLR 1.)';
$ua->agent($agent_string);
my $h = HTTP::Headers->new();
$h->header('User-Agent' => $agent_string);
$h->header('Accept' =>  '
    image/gif, 
    image/x-bitmap, 
    image/jpeg, 
    image/pjpeg, 
    application/vnd.ms-excel, 
    application/vnd.ms-powerpoint, 
    application/msword, 
    application/x-shockwave-flash, 
    */*
');
$h->header('Accept-Language' => 'en-us');
$h->header('Accept-Encoding' => 'gzip, deflate');
my $request = HTTP::Request->new('GET',$url,$h,'');
my $response = $ua->request($request);

Second example:

This code demonstrate:

  1. Reading from a text file
  2. Writing to a text file
  3. How to make HTTP POST request, and process the response
  4. The use of split
  5. Having the request go through a local Fiddler proxy
use HTTP::Request::Common;
use LWP::UserAgent;
use Time::HiRes qw/usleep/;

$ua = LWP::UserAgent->new;
$ua->proxy(['http', 'https'], 'http://127.0.0.1:8888/');

my $cnt = 0;
my $successCnt = 0;
my $failureCnt = 0;
open (MYFILE, 'SJC_AD_File.txt');
open (OUTFILE, '>>UnableToAuthenticate.txt');
<MYFILE>;
while (<MYFILE>) {
     chomp;
    $cnt = $cnt + 1;
    $line = $_;
    if (! testAuthentication($line)) {
        print OUTFILE $line, "\n";
        $failureCnt = $failureCnt + 1;
    } else {
        $successCnt = $successCnt + 1;
    }
}
close (MYFILE); 
close (OUTFILE);
print "Success: $successCnt, Failure: $failureCnt, Total: $cnt\n";

sub testAuthentication {
    my ($line) = @_;
    my $loginID, $lastName, $firstName, $dn;
    ($loginID, $lastName, $firstName, $dn) = split /\|/,$line;
    $loginID = $loginID . '@sjhc.com';
    print "$loginID\n$dn\n----\n";

    my $response = $ua->request(
        POST 'URL', [
            param1=> 'v1', 
            param2 => "v2", 
            param3 => $dn
        ]
    );
    if ($response->is_success) {
        $body = $response->decoded_content;
        if ($body =~ /LDAP Authentication Failure/) {
            return 0;
        } elsif ($body =~ /Qxpert -- Unknown Error/) {
            return 0;
        } elsif ($body !~ /MyCQIBody/) {
            print $body, "\n";
            exit;
        } else {
            return 1;
        }
    } else {
        return 0;
    }
    usleep(100);
}

Third example:

#!/usr/bin/perl
use CGI;
use LWP::UserAgent;
use HTTP::Response;
use HTTP::Request;
use HTTP::Request::Common qw(POST);
$ua = LWP::UserAgent->new;
$request = HTTP::Request->new(GET => 'some URL');
$response = $ua->request($request);
if ($response->is_success) {
    $fxrate = $response->header('fxrate');
    print "Content-Type: text/html\n\nfxrate:$fxrate";
} else {
    print "Content-Type: text/html\n\nError";
}

How can we read a header?

#!/usr/bin/perl
use CGI;
use LWP::UserAgent;
use HTTP::Response;
use HTTP::Request;
use HTTP::Request::Common qw(POST);
$ua = LWP::UserAgent->new;
$request = HTTP::Request->new(GET => 'some URL');
$response = $ua->request($request);
if ($response->is_success) {
    $fxrate = $response->header('fxrate');
    print "Content-Type: text/html\n\nfxrate:$fxrate";
} else {
    print "Content-Type: text/html\n\nError";
}

Code to fetch a large zip file behind a login form:

This code use WWW::Mechanize to submit username and password to the web server. It then parse the result, to determine the URL for the zip file, and download a large zip file.

use HTML::TreeBuilder;
use WWW::Mechanize;
my $mech = WWW::Mechanize->new();
$mech->get('https://multumdc.cernerworks.com/');
$mech->submit_form(fields => {clientid => 'clientid'});
$mech->submit_form(fields    => {
    username => 'username', 
    password => "password", 
    client_name => 'client name', 
    client_id => 'client_id', 
    action => 'sign_in'
});
my $body = $mech->content();
$root = HTML::TreeBuilder->new_from_content($body);
$root->elementify();
@elements = $root->find('a');
$len = scalar(@elements);
$i = 0;
$url = "";
for ($i = 0; $i < $len; $i++) {
    $element = $elements[$i];
    if ($element->attr('href') =~ /\.exe/i) {
        $url = 'https://multumdc.cernerworks.com/' . $element->attr('href');
        last;
    }
}
my $outFile = dirname($0) . '\\db\\' . 'Lexicon.exe';
print "\n", $outFile, "\n";

$mech->get($url);
$mech->save_content($outFile);
exit;

Code to fetch a web page and extract its title:

This code demonstrate:

  1. using Getopt::Long to parse command-line options
  2. using WWW::Mechanize to fetch web page
  3. using HTML::TreeBuilder to extract the title of web page
  4. using IO::File to read a text file
  5. using IO::File to write a text file
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use IO::File ();
use LWP ();
use HTML::TreeBuilder;
use WWW::Mechanize;

my $infile;
my $outfile;

GetOptions(
    "infile=s" => \$infile,
    "outfile=s" => \$outfile
);

my $fho = IO::File->new();
if (! $fho->open(">>$outfile")) {
    die("Error: Not able to open $outfile for writing.");
}

my $mech = WWW::Mechanize->new();

my $fhi = IO::File->new();
my $cnt = 1;
if ($fhi->open("<$infile")) {
    my $line;
    while ($line = <$fhi>) {
        chomp($line);
        if ($line eq "") {
            next;
        }
        my $uri = URI->new($line);
        if ($uri->scheme eq "http") {
            $uri->scheme("https");
        }
        $line = $uri->as_string();
        $mech->get($line);
        my $body = $mech->content();
        my $root = HTML::TreeBuilder->new_from_content($body);
        $root->elementify();
        my ($title) = $root->look_down('_tag','title');
        $title = $title->as_text;
        $title =~ s/\-\s*YouTube//g;
        print $fho $line . ' - ' . $title,"\n";
        print STDOUT "Processing line #". $cnt . ",URL: " . $line,"\n";
        $cnt = $cnt + 1;
    }
} else {
    die("Error: Not able to open $infile for reading.");
}
$fhi->close();
$fho->close();

http://www.perlmonks.org/?node_id=280461

Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License