Skip to content

Day 4 JobFilter in Perl

Packages
|—Job
|    |—JobAd.pm

package Job::JobAd;

use strict;
use warnings;

sub new {
    my $class = shift;
    
    my $self = {
        _site        => undef,
        _id          => undef,
        _url         => undef,
        _shortDesc   => undef,
        _description => undef,
    };
    bless $self, $class;
    return $self;
}

#accessor method
sub site {
    my ( $self, $param1 ) = @_;
    $self->{_site} = $param1 if defined($param1);
    return $self->{_site};
}

#accessor method
sub id {
    my ( $self, $param1 ) = @_;
    $self->{_id} = $param1 if defined($param1);
    return $self->{_id};
}

#accessor method
sub url {
    my ( $self, $param1 ) = @_;
    $self->{_url} = $param1 if defined($param1);
    return $self->{_url};
}

#accessor method
sub shortDesc {
    my ( $self, $param1 ) = @_;
    $self->{_shortDesc} = $param1 if defined($param1);
    return $self->{_shortDesc};
}

#accessor method
sub description {
    my ( $self, $param1 ) = @_;
    $self->{_description} = $param1 if defined($param1);
    return $self->{_description};
}

1;

|    |—Site.pm

package Job::Site;

use strict;
use warnings;

use Utils::Web;
use Utils::Counter;

sub new {
    my $class = shift;
    
    my $self = {
        _displayName        => undef,
        _url                => undef,
        _searchString       => undef,
        _adString           => undef,
        _keywords           => undef,
        _counter            => Utils::Counter->new({startWith => 1, maxIterations => 10}),
    };
    
    $self->{_counter}->maxIterations($self->{_pagesToDownload});
    
    bless $self, $class;
    return $self;
}

#accessor method
sub displayName {
    my ( $self, $param1 ) = @_;
    $self->{_displayName} = $param1 if defined($param1);
    return $self->{_displayName};
}

#accessor method
sub url {
    my ( $self, $param1 ) = @_;
    $self->{_url} = $param1 if defined($param1);
    return $self->{_url};
}

#accessor method
sub searchString {
    my ( $self, $param1 ) = @_;
    $self->{_searchString} = $param1 if defined($param1);
    return $self->{_searchString};
}

#accessor method
sub adString {
    my ( $self, $param1 ) = @_;
    $self->{_adString} = $param1 if defined($param1);
    return $self->{_adString};
}

#accessor method
sub pagesToDownload {
    my ( $self, $param1 ) = @_;
    $self->{_counter}->maxIterations($param1) if defined($param1);
    return $self->{_counter}->maxIterations();
}

#accessor method
sub keywords {
    my ( $self, $param1 ) = @_;
    $self->{_keywords} = $param1 if defined($param1);
    return $self->{_keywords};
}

#accessor method
sub counter {
    my ( $self, $param1 ) = @_;
    $self->{_counter} = $param1 if defined($param1);
    return $self->{_counter};
}


# Builds the url
sub _getNextPageUrl {
    my ( $self ) = @_;
    
    my $url = $self->{_url};
    my $keywords = $self->{_keywords};
    my $counter = $self->{_counter}->getNext();
    my $searchString = $self->{_searchString};
    $searchString =~ s/<<KEYWORDS>>/$keywords/;
    $searchString =~ s/<<COUNTER>>/$counter/;
    
    return $url . $searchString;
}


# Download and process a website
# Override this method on each subclass
sub processSite {
    my ( $self ) = @_;
    
    my $content = undef;
    my $web = Utils::Web->new_instance();
    
    while($self->{_counter}->hasNext()) {
        my $nextp = $self->_getNextPageUrl();
        my $result = $web->getPage($nextp);
        $result = "ERROR - Could not get the page [$nextp]n" if !defined($result);
        
        $content .= $result;
    }
    
    return $content;
}

1;

|    |—Site
|        |—Seek.pm

package Job::Site::Seek;

use strict;
use warnings;

use base 'Job::Site';

use Utils::Web;

sub new {
    my ($class) = @_;

    #call the constructor of the parent class.
    my $self = $class->SUPER::new();

    $self->{_displayName} = "Seek - Melbourne";
    $self->{_url} = "http://www.seek.com.au";
    $self->{_searchString} = "/JobSearch?DateRange=31&location=1002&Keywords=<<KEYWORDS>>&page=<<COUNTER>>";
    $self->{_adString} = "/job/<<JOB_ID>>";

    bless $self, $class;
    return $self;
}

# Download and process a website
sub processSite {
    my ( $self ) = @_;
    
    my $content = undef;
    my $web = Utils::Web->new_instance();
    
    while($self->{_counter}->hasNext()) {
        $content .= $web->getPage($self->_getNextPageUrl());
    }
    
    return $content;
}

1;

|—Utils
     |—Counter.pm

package Utils::Counter;

use strict;
use warnings;

sub new {
    my ( $class, $param1 ) = @_;
    
    my $self = {
        _startWith      => undef,
        _current        => undef,
        _increment      => 1,
        _maxIterations  => 100,
        _currIteration  => 0,
    };
        
    if(defined($param1)) {
        $self->{_startWith} = $param1->{start} if exists($param1->{startWith});
        
        $self->{_increment} = $param1->{increment} if exists($param1->{increment});
        
        $self->{_maxIterations} = $param1->{maxIterations} if exists($param1->{maxIterations});
    }
    
    bless $self, $class;
    return $self;
}


sub increment {
    my ( $self, $param1 ) = @_;
    $self->{_increment} = $param1 if defined($param1);
    return $self->{_increment};    
}

sub startWith {
    my ( $self, $param1 ) = @_;
    $self->{_startWith} = $param1 if defined($param1);
    return $self->{_startWith};    
}

sub maxIterations {
    my ( $self, $param1 ) = @_;
    $self->{_maxIterations} = $param1 if defined($param1);
    return $self->{_maxIterations};    
}

sub currIteration {
    my ( $self, $param1 ) = @_;
    $self->{_currIteration} = $param1 if defined($param1);
    return $self->{_currIteration};    
}

sub _init {
    my ( $self ) = @_;
    
    if(!defined($self->{_startWith})) {
        $self->{_startWith} = 1;
    }
 
    $self->{_currIteration} = 0;
    $self->{_current} = $self->{_startWith};
}

sub reset {
    my ( $self ) = @_;
    
    $self->{_currIteration} = 0;
    $self->{_current} = undef;
}

sub hasNext {
    my ( $self ) = @_;
    
    my $hasNext = 0;

    if($self->{_currIteration} < $self->{_maxIterations}) {
        $hasNext = 1;
    }
    
    return $hasNext;
}

sub getNext {
    my ( $self ) = @_;
       
    if(!defined($self->{_current})) {
        $self->_init();
    }
    else {
        $self->{_current} += $self->{_increment};
    }
    
    $self->{_currIteration}++;
        
    return $self->{_current};
}

     |—Web.pm

package Utils::Web;
use base 'Class::Singleton';

use HTTP::Request;
use HTTP::Response;
use LWP::UserAgent;

# this only gets called the first time instance() is called
sub new_instance {
    my $class = shift;
    my $self  = bless { }, $class;
    
    $self->{_userAgent} = LWP::UserAgent->new;
    $self->{_userAgent}->agent("Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)");

    return $self;
}


sub setProxy {
    my ( $self, $url, $user, $password ) = @_;
    
    $self->{_userAgent}->credentials( $url, '', $user, $password );
    $self->{_userAgent}->proxy(['http', 'ftp'], $url);
}


sub getPage{
    my ( $self, $url ) = @_;
    
    my $response = $self->{_userAgent}->request(HTTP::Request->new('GET', $url, [ 'Content-length' => 0]));
    
    return $response->content;
}

1;

.

Leave a Comment

Leave a comment