Day 4 JobFilter in Perl
Packages
|—Job
| |—JobAd.pm
package Job::JobAd; use strict; use warnings; sub new { my $class = shift; my $self = { _site => undef, _id => undef, _url => undef, _shortDesc => undef, _description => undef, }; bless $self, $class; return $self; } #accessor method sub site { my ( $self, $param1 ) = @_; $self->{_site} = $param1 if defined($param1); return $self->{_site}; } #accessor method sub id { my ( $self, $param1 ) = @_; $self->{_id} = $param1 if defined($param1); return $self->{_id}; } #accessor method sub url { my ( $self, $param1 ) = @_; $self->{_url} = $param1 if defined($param1); return $self->{_url}; } #accessor method sub shortDesc { my ( $self, $param1 ) = @_; $self->{_shortDesc} = $param1 if defined($param1); return $self->{_shortDesc}; } #accessor method sub description { my ( $self, $param1 ) = @_; $self->{_description} = $param1 if defined($param1); return $self->{_description}; } 1;
| |—Site.pm
package Job::Site; use strict; use warnings; use Utils::Web; use Utils::Counter; sub new { my $class = shift; my $self = { _displayName => undef, _url => undef, _searchString => undef, _adString => undef, _keywords => undef, _counter => Utils::Counter->new({startWith => 1, maxIterations => 10}), }; $self->{_counter}->maxIterations($self->{_pagesToDownload}); bless $self, $class; return $self; } #accessor method sub displayName { my ( $self, $param1 ) = @_; $self->{_displayName} = $param1 if defined($param1); return $self->{_displayName}; } #accessor method sub url { my ( $self, $param1 ) = @_; $self->{_url} = $param1 if defined($param1); return $self->{_url}; } #accessor method sub searchString { my ( $self, $param1 ) = @_; $self->{_searchString} = $param1 if defined($param1); return $self->{_searchString}; } #accessor method sub adString { my ( $self, $param1 ) = @_; $self->{_adString} = $param1 if defined($param1); return $self->{_adString}; } #accessor method sub pagesToDownload { my ( $self, $param1 ) = @_; $self->{_counter}->maxIterations($param1) if defined($param1); return $self->{_counter}->maxIterations(); } #accessor method sub keywords { my ( $self, $param1 ) = @_; $self->{_keywords} = $param1 if defined($param1); return $self->{_keywords}; } #accessor method sub counter { my ( $self, $param1 ) = @_; $self->{_counter} = $param1 if defined($param1); return $self->{_counter}; } # Builds the url sub _getNextPageUrl { my ( $self ) = @_; my $url = $self->{_url}; my $keywords = $self->{_keywords}; my $counter = $self->{_counter}->getNext(); my $searchString = $self->{_searchString}; $searchString =~ s/<<KEYWORDS>>/$keywords/; $searchString =~ s/<<COUNTER>>/$counter/; return $url . $searchString; } # Download and process a website # Override this method on each subclass sub processSite { my ( $self ) = @_; my $content = undef; my $web = Utils::Web->new_instance(); while($self->{_counter}->hasNext()) { my $nextp = $self->_getNextPageUrl(); my $result = $web->getPage($nextp); $result = "ERROR - Could not get the page [$nextp]n" if !defined($result); $content .= $result; } return $content; } 1;
| |—Site
| |—Seek.pm
package Job::Site::Seek; use strict; use warnings; use base 'Job::Site'; use Utils::Web; sub new { my ($class) = @_; #call the constructor of the parent class. my $self = $class->SUPER::new(); $self->{_displayName} = "Seek - Melbourne"; $self->{_url} = "http://www.seek.com.au"; $self->{_searchString} = "/JobSearch?DateRange=31&location=1002&Keywords=<<KEYWORDS>>&page=<<COUNTER>>"; $self->{_adString} = "/job/<<JOB_ID>>"; bless $self, $class; return $self; } # Download and process a website sub processSite { my ( $self ) = @_; my $content = undef; my $web = Utils::Web->new_instance(); while($self->{_counter}->hasNext()) { $content .= $web->getPage($self->_getNextPageUrl()); } return $content; } 1;
|—Utils
|—Counter.pm
package Utils::Counter; use strict; use warnings; sub new { my ( $class, $param1 ) = @_; my $self = { _startWith => undef, _current => undef, _increment => 1, _maxIterations => 100, _currIteration => 0, }; if(defined($param1)) { $self->{_startWith} = $param1->{start} if exists($param1->{startWith}); $self->{_increment} = $param1->{increment} if exists($param1->{increment}); $self->{_maxIterations} = $param1->{maxIterations} if exists($param1->{maxIterations}); } bless $self, $class; return $self; } sub increment { my ( $self, $param1 ) = @_; $self->{_increment} = $param1 if defined($param1); return $self->{_increment}; } sub startWith { my ( $self, $param1 ) = @_; $self->{_startWith} = $param1 if defined($param1); return $self->{_startWith}; } sub maxIterations { my ( $self, $param1 ) = @_; $self->{_maxIterations} = $param1 if defined($param1); return $self->{_maxIterations}; } sub currIteration { my ( $self, $param1 ) = @_; $self->{_currIteration} = $param1 if defined($param1); return $self->{_currIteration}; } sub _init { my ( $self ) = @_; if(!defined($self->{_startWith})) { $self->{_startWith} = 1; } $self->{_currIteration} = 0; $self->{_current} = $self->{_startWith}; } sub reset { my ( $self ) = @_; $self->{_currIteration} = 0; $self->{_current} = undef; } sub hasNext { my ( $self ) = @_; my $hasNext = 0; if($self->{_currIteration} < $self->{_maxIterations}) { $hasNext = 1; } return $hasNext; } sub getNext { my ( $self ) = @_; if(!defined($self->{_current})) { $self->_init(); } else { $self->{_current} += $self->{_increment}; } $self->{_currIteration}++; return $self->{_current}; }
|—Web.pm
package Utils::Web; use base 'Class::Singleton'; use HTTP::Request; use HTTP::Response; use LWP::UserAgent; # this only gets called the first time instance() is called sub new_instance { my $class = shift; my $self = bless { }, $class; $self->{_userAgent} = LWP::UserAgent->new; $self->{_userAgent}->agent("Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"); return $self; } sub setProxy { my ( $self, $url, $user, $password ) = @_; $self->{_userAgent}->credentials( $url, '', $user, $password ); $self->{_userAgent}->proxy(['http', 'ftp'], $url); } sub getPage{ my ( $self, $url ) = @_; my $response = $self->{_userAgent}->request(HTTP::Request->new('GET', $url, [ 'Content-length' => 0])); return $response->content; } 1;
Leave a Comment