#!/usr/bin/perl # This program will grab webpages, scrape some content and generate # a JavaScript formatted version of the scraped output, and XHTML 1.0-Strict # output of Wunderground and NWS local forecasts. # # Ken True - http://saratoga-weather.org/ # webmaster@saratoga-weather.org # # This program resides in the WeatherLink Templates directory and runs once an hour via # a Windows XP automatic task. # # the contents of the output JavaScript is processed by WeatherLink as a template with output # file type of '.js' and uploaded as the weather station uploads are scheduled. # # the webpage just has a simple script to place on the page: # # # # # Perl V5.8.0 (806) with LWP library. # # Version 1.09 adds two more outputs: # forecast-inc.htx (WU and NWS forecast icons) # forecast-full-inc.htx (NWS forecast icons with text description) # # Note: the NWS forecast works ONLY with the SF/Monterey website: www.wrh.noaa.gov so it's not # useful outside the West Coast area (probably). # # As with all free programs, use at your own risk ;-) # # Ken True webmaster@saratoga-weather.org # $version = 'Grab-forecast.pl V1.09 - 23-Feb-2006'; require LWP::UserAgent; require HTTP::Headers; $outfilename = './forecast.htx'; $outfilenameHTML = './forecast-inc.htx'; #XHTML 1.0 compliant (hopefully) $outfilenameHTMLFULL = './forecast-full-inc.htx'; # Full NWS forecast $quiet = 0; $|=1; my %CONDS = ( 'PartlyCloudy','PtCldy', 'Partly Cloudy','PtCldy', 'MostlyCloudy','MostCldy', 'Clear','Clr' ); $ua = new LWP::UserAgent; print "$version\n"; open (OUTPUT, ">$outfilename") || die "..unable to open $outfilename: $!\n"; open (HTML, ">$outfilenameHTML") || die "..unable to open $outfilenameHTML: $!\n"; open (HTMLFULL, ">$outfilenameHTMLFULL") || die "..unable to open $outfilenameHTMLFULL: $!\n"; open (LOG,">>forecast.log"); &get_weather_WU; &get_weather_NWS; close OUTPUT; close LOG; close HTML; close HTMLFULL; print STDERR "..wrote to $outfilename and $outfilenameHTML.\n"; exit; #---------------------------------------------------------------------------- sub get_weather_WU { my($req) = new HTTP::Request('GET', 'http://www.wunderground.com/cgi-bin/findweather/getForecast?query=95070'); $req->header('Cache-control' => 'no-cache'); $req->header('Pragma' => 'no-cache'); my($content) = $ua->request($req)->content(); my($hdrs) = $ua->request($req)->headers; my($dt) = scalar localtime(time); print "..$dt X-Cache: ",$hdrs->header('X-Cache'),"\n"; my(@lines) = split(/\n/,$content); # open (IN,"WUNewSample.txt"); # while () { # s/\n//; # push(@lines,$_) if $_; # } my($i) = 0; my($line) = ""; my(@newlines); while ($i<=$#lines) { if ($lines[$i] =~ /5-Day Forecast/) { # $i = $i -2; last; } $i++; } return unless $i; print OUTPUT <, , as of // (Pacific) // var wtext = ''; wtext = '' document.write(wtext); wtext = '
'+ ''+ ' '+ EOH1 ; print HTML <
EOH1A ; my @days = (); my @conds = (); my @temps = (); while ($i<=$#lines) { $line = $lines[$i]; $line =~ s!href="/cgi-bin/findweather!href="http://www.wunderground.com/cgi-bin/findweather!; $line =~ s!'!''!g; $line =~ s!5-Day Forecast!

Weather Underground 5-Day Forecast!; $line =~ s!

!
!; $line =~ s!
!!; $line =~ s!
!
!gi; $line =~ s!id="b"!!; $line =~ s!]+)!!tr>!g; $line =~ s!TD>!td>!g; $line =~ s!TABLE>!table>!g; $line =~ s!bgcolor="#f6f6f6" style="!style="background-color: #F6F6F6; !; if ($line =~ m/href\="/ && $line =~ /\&/) { $line =~ s!\&!\&!g; } print OUTPUT " \'$line\'\+\n" unless $line =~ /DisplayMOS.asp|^\s+for$|ZIP|Today''s|DisplayPollen/; print HTML "$line\n" unless $line =~ /DisplayMOS.asp|^\s+for$|ZIP|Today''s|DisplayPollen/; # Save our data from the WeatherUnderground forecast if ($line =~ m!

([^\<]+)

!) { push(@days,substr($1,0,3)); } if ($line =~ m/align\=center class\=smalltable \>([^\<]+)\]+\>//g; $t =~ s/\°\;//g; $t =~ s/\s+//g; push(@temps,$t); } last if $line =~ m||; $i++; } #print STDERR join(":",@days), "\n"; #print STDERR join(":",@conds), "\n"; #print STDERR join(":",@temps), "\n"; $line = join("\t",join(":",@days),join(":",@conds),join(":",@temps)); #print LOG "$dt\tWUG\t$line\n"; print OUTPUT <Forecast as of $dt Pacific

'; document.write(wtext); EOH2 ; print HTML <

Forecast as of $dt Pacific


EOH2A ; } # end --- get_weather #---------------------------------------------------------------------------- sub get_weather_NWS { my($req) = new HTTP::Request('GET', 'http://www.wrh.noaa.gov/total_forecast/printable_forecast.php?wfo=mtr&zone=caz008&fire=&county=cac085&dgtl=1&lat=37.26389&lon=-122.02194'); $req->header('Cache-control' => 'no-cache'); $req->header('Pragma' => 'no-cache'); my($content) = $ua->request($req)->content(); my($hdrs) = $ua->request($req)->headers; # print $content; # print $hdrs->as_string, "\n"; my($dt) = scalar localtime(time); my($dow) = substr($dt,0,3); print "..$dt X-Cache: ",$hdrs->header('X-Cache'),"\n"; my(@lines) = split(/\n/,$content); my($i) = 0; my($line) = ""; my(@newlines); while ($i<=$#lines) { if ($lines[$i] =~ s/Forecast For: /National Weather Service Forecast for: /) { last; } $i++; } return unless $i; print OUTPUT <'+ ''+ ' '+ EOH1 ; print HTML <
EOH1A ; print HTMLFULL <
EOH1B ; my @days = (); my @conds = (); my @temps = (); my $toggle = 0; $ntabs = 0; while ($i<=$#lines) { $line = $lines[$i]; $line =~ s!/images/total_forecast!http://www.wrh.noaa.gov/images/total_forecast!; $line =~ s!'!"!g; $line =~ s/ //g; $line =~ s!alt="([^<]+)
!alt="$1 !; #Fix NOAA alt= tags for XHTML $line =~ s!
!
!gi; $line =~ s!]+)!([^<]+)!$2!; $line =~ s!([^<]+)!$2!; $line =~ s!!!; $line =~ s!!!; $line =~ s!border=0!border="0"!; if ($line =~ m!\°\;!) { my $t = $line; $t =~ s/\<[^\>]+\>//g; $t =~ s/\°\;F//g; $t =~ s/Lo|Hi//g; $t =~ s/\s+//g; push(@temps,$t); } if ($line =~ m/

|
Night/) { my $t = $line; $t =~ s/\<[^\>]+\>//g; $t =~ s/\s+//g; if ($t) { if ($t =~ /Today|Tonight|Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday/i) { $t = substr($t,0,3); $t = $dow if $t =~ /Tod|Ton/; $t .= "N" if $line =~ /Night/i; } $t = $CONDS{$t} if defined $CONDS{$t}; if ($toggle) { push(@conds,$t); $toggle = 0; } else { push(@days,$t); $toggle = 1; } } } print OUTPUT " \'$line\'\+\n"; print HTML "$line\n"; print HTMLFULL "$line\n"; $ntabs++ if $line =~ '
!!; $line =~ s!
'; last if $ntabs >= 3; $i++; } print OUTPUT <

'; document.write(wtextnws); EOH2 ; print HTML < EOH2A ; print HTMLFULL <

NWS Forecast Details


EOH2B ; #print STDERR join(":",@days), "\n"; #print STDERR join(":",@conds), "\n"; #print STDERR join(":",@temps), "\n"; $line = join("\t",join(":",@days),join(":",@conds),join(":",@temps)); print LOG "$dt\tNWS\t$line\n"; # now process the rest of the forecast for the full page my $doit = 0; while ($i<=$#lines) { $line = $lines[$i]; if ($line =~ m!!
!gi; # $line =~ s!!!; # $line =~ s!^$!!; $line =~ s!([^<]+)!$2!; $line =~ s!!

!; $line =~ s!^$!

!; print HTMLFULL "$line\n"; last if ($line =~ m!! && $doit); $i++; } print HTMLFULL < EOF3C ; } # end --- get_weather