#!/usr/bin/perl # Release history # # 2009-07-07 - Beta 0.3.1.4 # IGNOREUSER config option to ignore everything from a user - links and commands, both # # 2009-07-07 - Beta 0.3.1.3 # LWP::UserAgent object set to a MAXGETSIZE which limits the most it can fetch per HTTP GET # # 2009-06-23 - Beta 0.3.1.2 # !google improved to show link and description in a cleaner fashion # Can now take a config file as the first argument # # 2009-06-22 - Beta 0.3.1.1 # !google will now search google # Case insensitive triggers # !version # !about / !trigger take arguements to get specific value / description # Custom triggers can be added via the config file - can be modified and reloaded on the fly # # 2009-06-22 - Beta 0.3.0 # Added deNaughty to remove bad words -> add a per-channel deNaughty switch? # Added alt_nicks in case one fails # Added the URL host name to the printed response # !about # !triggers # !quit -> Owner only # !reload -> doesn't affect the stuff that happens on startup like nick, channels and nickserv # Reads a hash from a config file and evals to get the settings # Sends off a /msg nickserv identify NSPASSWD # TIMETOLIVE can be used to set how long before the same URL can be repeated # # 2009-06-21 - Beta 0.2.0 # Added a URL_LIMIT to prevent flooding - max URLs titled per line # # 2009-06-21 - Beta 0.1 # Connects to multiple channels # Recognizes http(?:s)?:// and www. urls # Reports HTTP status on non-200 # Reports content type for non-"^text/html.*" # Reports title or lack of # No config file support yet # No extra triggers # Feature list - working on it a little bit at a time # Reads a config file in hash-style # - list of channels to listen for # - per-channel choice to print titles to channel or PM to owner # - specify one owner # - specify a list of people that can operate (ie send commands) # - specify a URL-regex (to overwrite a default) # A trigger-hash with regex's that are triggers and associated function calls # !reload conf file trigger # !shutup/noshutup triggers - per channel # !about trigger # bad-word filtering to prevent kicks # URL-limit per line to prevent spam floods # Limit to the same URL no more than once every $REMEMBER seconds # identify to nickserv package Titler; use Bot::BasicBot; @ISA = 'Bot::BasicBot'; use LWP::UserAgent; use HTML::TokeParser; use Data::Dumper; use warnings; use strict; my %about = ( Author => 'Isaac Good', License => 'MIT/X11', Version => 'Titler - 2009-06-23 - Beta 0.3.1.4', ); my %config; my $CONFIG_FILE = $ARGV[0] || 'config.pl'; my %triggers = ( about => [ \&cmdAbout, 'List info about the bot. Say !about for just one entry.' ], quit => [ \&cmdQuit, 'For the owner only.' ], triggers => [ \&cmdShowTriggers, 'Lists triggers. Do !trigger for details.' ], reload => [ \&loadConfig, 'Reloads the config file. Owner only.' ], google => [ \&googleSearch, 'Search Google!' ], version => [ sub { shift(@_)->say( { channel => shift(@_)->{'channel'}, body => " Version: $about{'Version'}" } ) }, 'Prints the version.' ], ); my $SELF; # --------- Bot::BasicBot callback methods we overwrite # The main function - where we parse incoming messages sub said { my ($self, $args) = @_; return unless( $args->{'body'} ); printf "%-10s | %10s | %s\n", $args->{'channel'}, $args->{'who'}, $args->{'body'} if ( $config{'PRINTCHAT'} ); # Stop here on IGNOREUSER return if ( $config{'IGNOREUSER'} and grep { lc($_) eq lc($args->{'who'}) } @{ $config{'IGNOREUSER'} } ); my $count = 0; my $regex = $config{'REGEX'}; for my $url ( $args->{'body'} =~ /$regex/g ) { next if ( $self->isRecentUrl( $url ) ); $count++; my $response = getUrl($url); my $message; if ( $response->code() != 200 ) { $message = "Failed to fetch document: " . $response->status_line(); } elsif ( $response->header('content-type') !~ qr{^text/html} ) { $message = "Not an HTML document. Content Type: " . $response->header('content-type'); } elsif ( not defined $response->title() ) { $message = "No title found."; } else { $message = "Title: " . $response->title(); } $message = "$message (at " . $response->request->uri()->host() . ")"; $message = deNaughty($message); $self->say( { channel => $args->{'channel'}, body => $message } ); last if ( $count >= $config{'URL_LIMIT'} ); } my $trigger; # Built in triggers for $trigger ( keys %triggers ) { &{ $triggers{$trigger}->[0] }($self, $args) if ( $args->{'body'} =~ /^!$trigger/i ); } # Triggers from the config file for $trigger ( keys %{ $config{'TRIGGERS'} } ) { &{ $config{'TRIGGERS'}->{$trigger}->[0] }($self, $args) if ( $args->{'body'} =~ /^!$trigger/i ); } return 0; } sub connected { $SELF->say( { channel => 'nickserv', body => 'identify ' . $config{'NSPASSWD'} } ) if ( $config{'NSPASSWD'} ); } # --------- Helper functions sub isRecentUrl { my ($self, $url) = @_; return unless( $config{'TIMETOLIVE'} ); $self->{'seen'} = [ grep { $_->{'when'} + $config{'TIMETOLIVE'} > time } @{ $self->{'seen'} } ]; return 1 if ( grep { $_->{'url'} eq $url } @{ $self->{'seen'} } ); push ( @{ $self->{'seen'} }, { when => time, url => $url } ); return; } sub getUrl { my ($url) = @_; # Prepend http:// if needed $url = "http://$url" unless ( $url =~ qr(^http://) ); my $response = $SELF->{'ua'}->get($url); return $response; } sub cmsg { my ( $self, $nick, $str ) = @_; $self->say( { channel => 'msg', who => $nick, body => $str } ); } sub loadConfig { my ($self, $args) = @_; return if ( defined $args and exists $args->{'who'} and $args->{'who'} ne $config{'OWNER'} ); open my $FH, "<", $CONFIG_FILE; my $file = join(" ", <$FH>); %config = eval ( $file ); } sub deNaughty { my ($msg) = @_; my $replace = $config{'CENSOR'}; open my $FH, "<", $config{'BADWORDS'} || return $msg; while (my $line = <$FH>) { chomp $line; # Expand the mIRC '*' to the perl '.*' $line =~ s/[*]/.*/g; $msg =~ s/$line/$replace/ig; } close $FH; return $msg; } # --------- cmds that get !triggered sub cmdAbout { my ($self, $args) = @_; my ($trig, $spec) = split(/\s/, $args->{'body'}); $spec = lc($spec); $spec =~ s/^(.)/uc($1)/e; if ($spec and exists $about{$spec}) { $self->say( { channel => $args->{'channel'}, body => " $spec: $about{$spec}" } ); } else { $self->say( { channel => $args->{'channel'}, body => " $_: $about{$_}" } ) for ( keys %about ); } } sub cmdQuit { my ($self, $args) = @_; if ( $args->{'who'} eq $config{'OWNER'} ) { exit; } else { $self->say( { channel => $args->{'channel'}, body => $args->{'who'} . ": Shoo!" } ); } } sub cmdShowTriggers { my ($self, $args) = @_; my ($trig, $spec) = split(/\s/, $args->{'body'}); $spec = lc($spec); if ($spec and exists $triggers{$spec}) { $self->say( { channel => $args->{'channel'}, body => " $spec: $triggers{$spec}->[1]" } ); } elsif ($spec and exists $config{'TRIGGERS'}->{$spec}) { $self->say( { channel => $args->{'channel'}, body => " $spec: $config{'TRIGGERS'}->{$spec}->[1]" } ); } else { $self->say( { channel => $args->{'channel'}, body => "Default triggers are: " . join(", ", sort keys %triggers ) } ); $self->say( { channel => $args->{'channel'}, body => "Extra triggers are: " . join(", ", sort keys %{ $config{'TRIGGERS'} } ) } ); } } sub googleSearch { my ($self, $args) = @_; my $term = $args->{'body'}; # Remove the first word my @arr = split(/\s/, $term); $term = join('+', @arr[1..$#arr]) || return; # URL encode $term =~ s/\%([A-Fa-f0-9]{2})/pack('C', hex($1))/seg; # Search and parse my $response = $self->{'ua'}->get("http://www.google.com/search?q=$term&hl=en&num=1"); $response = $response->decoded_content(); my $p = HTML::TokeParser->new( \$response ); my $t; while( $t = $p->get_tag('h2') ) { last if ( $p->get_trimmed_text() eq 'Search Results' ); } $t = $p->get_tag('a'); my $url = $t->[1]->{'href'}; my $title = $p->get_phrase(); $p->get_tag('div'); my $description = $p->get_trimmed_text('b'); $self->say( { channel => $args->{'channel'}, body => "[GOOG] $title ( $url )" } ); $self->say( { channel => $args->{'channel'}, body => "Desc: $description" } ); } # --------- Create and start the Bot loadConfig(); $about{'Owner'} = $config{'OWNER'}; $SELF = Titler->new( server => $config{'SERVER'} || die, channels => $config{'CHAN'} || die, nick => $config{'NICK'} || die, alt_nicks => $config{'ANICK'} || die, username => $config{'NAME'} || die, port => $config{'PORT'} || die, ); $SELF->{'ua'} = LWP::UserAgent->new( agent => $config{'USERAGENT' } || die, timeout => $config{'UATIMEOUT' } || die, max_size => $config{'MAXGETSIZE'} || die, ); $SELF->{'seen'} = []; $SELF->run();