|
If there was extra time I was going to talk about my googling current awareness program. I thought it was a simple but effective use of SOAP. If you want to run it, download it.
1 #!/usr/bin/perl
2
3 =head 1 NAME
4
5 googling - current awareness via email
6
7 =head1 SYNOPSIS
8
9 googling --query='funkadelic' --email='george@clinton.com'
10 --key='IXJGrPF_THIS_WON'T_WORK_1bB8+FCO';
11
12 =head1 DESCRIPTION
13
14 googling will perform a search of Google and send results to an email
15 or to STDOUT if no email is given.
16
17 =head1 OPTIONS
18
19 =head2 --query
20
21 You must pass this in since it's the query you want to run against google.
22
23 =head2 --email
24
25 If you'd like to have results sent via email use this option. You can use
26 more than once if you'd like the report to go to more than one address.
27 If not supplied results will go to STDOUT. It's a useful option if you want
28 (as I did) to run the query from cron.
29
30 =head2 --db
31
32 Specify the sqlite database to use. If not supplied it defaults to
33 googling.db in the pwd.
34
35 =head2 --key
36
37 Specify your Google API Key. See http://www.google.com/apis/ to get your
38 own.
39
40 =head2 --limit
41
42 If you'd like to stop checking google after a certain amount of hits use
43 this option. Default is 100.
44
45 =head2 --debug
46
47 See diagnostic information.
48
49 =head2 --help
50
51 See this message.
52
53 =head1 SEE ALSO
54
55 =head1 AUTHOR
56
57 =over 4
58
59 =item Ed Summers E<lt>ehs@pobox.comE<gt>
60
61 =back
62
63 =cut
64
65 use strict;
66 use warnings;
67 use Getopt::Long;
68 use Pod::Usage;
69 use SOAP::Lite;
70 use DBI;
71 use Mail::Send;
72
73 ## gather options
74 my ( $limit, $query, $db, $debug, $help, $googleId, @emails );
75 GetOptions(
76 'limit:i' => \$limit,
77 'query:s' => \$query,
78 'email:s' => \@emails,
79 'key:s' => \$googleId,
80 'db:s' => \$db,
81 'debug!' => \$debug,
82 'help!' => \$help,
83 );
84
85 if ( !$query or !$googleId or $help ) { pod2usage( 1 ); }
86
87 $limit = 100 if !$limit;
88 $db = 'googling.db' if ! $db;
89
90
91 ## database access to remember what results have been seen
92 ## in previous runs
93 my $dbh = getDbh( $db );
94 my $select = $dbh->prepare( qq(
95 select count(*) from seen where url = ? and query = ?
96 ) );
97 my $insert = $dbh->prepare( qq(
98 insert into seen (url,query) values(?,?)
99 ) );
100
101
102 ## build the variables that we are going to pass to our SOAP request
103
104 SOAP::Data->import('name');
105
106
107 my $key = name('key')->value( $googleId );
108 my $q = name('q',$query)->type('string');
109 my $maxResults = name('maxResults',10)->type('int');
110 my $filter = name('filter',1)->type('boolean');
111 my $restrict = name('restrict','')->type('string');
112 my $safeSearch = name('safeSearch',0)->type('boolean');
113 my $langRestrict = name('lr','')->type('string');
114 my $inputEncoding = name('ie','')->type('string');
115 my $outputEncoding = name('oe','')->type('string');
116
117 ## storage variables
118 my @results = ();
119 my $more = 1;
120 my $startElement = 0;
121 my $results;
122
123 ## while we haven't exceeded the citation limit
124 while ( $startElement < $limit ) {
125
126 ## create start element based on the last result record we read in
127 ## google limits query results to a set of 10 at a time
128 my $start = name('start',$startElement)->type('int');
129
130 ## issue the query
131 my $response = SOAP::Lite
132 -> uri( 'urn:GoogleSearch' )
133 -> proxy( 'http://api.google.com/search/beta2' )
134 -> doGoogleSearch( $key, $q, $start, $maxResults, $filter,
135 $restrict, $safeSearch, $langRestrict, $inputEncoding,
136 $outputEncoding );
137
138 ## if we didn't get any more results we're done
139 last if ( $startElement == $response->result->{ endIndex } );
140
141 ## go through each item in the result set
142 foreach my $item ( @{ $response->result()->{ resultElements } } ) {
143
144 ## look in the db to see if we've seen this url for this query before
145 $select->execute( $item->{ URL }, $query );
146 my ( $result ) = $select->fetchrow_array();
147
148 ## if it's a new url add it to the database (so we don't
149 ## report it again), and add it to the results
150 if ( ! $result ) {
151 $insert->execute( $item->{ URL }, $query );
152 if ( !@emails ) {
153 print $item->{ URL }, "\n", $item->{ snippet }, "\n", "\n";
154 } else {
155 push( @results, [ $item->{ URL }, $item->{ snippet } ] );
156 }
157 }
158
159 }
160
161 ## set the next start element to the index of the last item in this
162 ## result set
163 $startElement = $response->result->{ endIndex };
164
165 }
166
167 ## if new results were found, and we have been asked to send an email
168 if ( @results and @emails ) {
169 my $msg = Mail::Send->new();
170 $msg->to( join( ',', @emails ) );
171 $msg->subject( "googling: $query" );
172 my $fh = $msg->open( 'qmail' );
173 my $count = 1;
174 foreach ( @results ) {
175 $fh->print( "[$count] ", $_->[0], "\n" );
176 if ( $_->[1] ) { $fh->print( $_->[1], "\n" ); }
177 $fh->print( "\n" );
178 $count++;
179 }
180 $fh->close();
181 }
182
183 ## disconnect from db
184 $select->finish();
185 $insert->finish();
186 $dbh->disconnect();
187
188
189 ## done
190
191
192 sub getDbh {
193 my $db = shift;
194 ## if the database file exists attempt to connect to it
195 if ( -f $db ) {
196 return( DBI->connect( "dbi:SQLite:$db" ) );
197 }
198 ## otherwise create the datbase, and table
199 my $dbh = DBI->connect( "dbi:SQLite:$db" );
200 $dbh->do( 'create table seen (url varchar(500), query varchar(500))' );
201 return( $dbh );
202 }
203
204
|