#!/usr/bin/perl -w
# laconica2IRC.pl
# Posts your dents to IRC.
# Based on an RSS->IRC script customized for the Identica microblogging service. 
#
# Petr Baudis (c) 2004, public domain
# Slightly inspired by Stefan "tommie" Tomanek's newsline.pl.
# Modified by Jean-Marc Liotier for the Laconi.ca microblogging service
#
# Initial verion modified by Jean-Marc Liotier was found in
# IRC Hacks: 100 Industrial-Strength Tips & Tools
# By Paul Mutton
# Published by O'Reilly, 2004
# ISBN 059600687X, 9780596006877
#
# Modifications by Jean-Marc Liotier to rssbot.pl found in "IRC Hacks"
# to let his Laconica stream pass as himself on an IRC channel :
# - 20090122 IRC password authentication
# - 20090122 Eliminated a buggy string sanitation loop
# - 20090122 Only print the title
# - 20090122 Removal of Laconi.ca user name and extraneous punctuation
# - 20090513 Optional @replies filtering
#
# Known bugs and limitations :
# - JML 20090123 Dies gruesomely if RSS feed is unreachable. A try/catch would
# be nice.
# - JML 20090122 Does not persist anything to disk, so if it exits and restarts
# it will ignore at least the items that appeared since as long as the refresh
# period.
#
# Unusual dependancies :
# Net::IRC alias libnet-irc-perl in Debian
# XML::RSS alias libxml-rss-perl in Debian
#
# The original rssbot.pl was a way to announce items from an RSS stream in an
# IRC channel. What I wanted is to have my IRC user "say" my own Identi.ca
# entries just as if I was typing them myself.
#
# So once I got the original script working (after isolating and commenting out
# a malfunctioning string sanitation loop) I modified the output to include
# only the content, and filtered the user name prefix out of it.
#
# I wanted my own IRC user "say" the output, and that part was very easy
# because I use Bip, an IRC proxy that supports multiple clients on one irc
# server connection. This script was just going to be another client, and that
# is why I added password authentication. Bip is available in Debian and is
# very handy : I usually have an IRC client at home, one in the office,
# occasionnaly a CGI-IRC, rarely a mobile client and now this script - and to
# the dwellers of my favorite IRC channel there is no way to tell. And
# whichever client I choose, I never missing anything thanks to logging and
# replay on login. Screen with a command-line IRC client provides part of this
# functionnality, but the zero maintainance Bip does so much more and is so
# reliable that one has to wonder if my friends cling to Irssi and Screen out
# of sheer traditionalism.
#
# All that remained to do was to launch the script in a sane way. To control
# this sort of simple and permanently executed piece of code and keep it from
# misbehaving, Daemon is a good way. Available in Debian, it proved its worth
# when the RSS file went missing during the Identi.ca upgrade and the script
# crashed everytime it tried to access it for lack of exception catching. Had I
# simply put it in an infinite loop, it would have hogged significant
# ressources just by running in circles like a headless chicken. Daemon not
# only restarted it after each crash, but also killed it after a set number of
# retries in a set duration - thus preventing any interference with the rest of
# what runs on our server. Here is the Daemon launch command that I have used :
#
# #!/bin/bash
# path=/usr/local/bin/laconica2IRC
# daemon -a 16 -L 16 -M 3 -D $path -N -n laconica2IRC_JML -r -O $path/laconica2IRC.log -o $path/laconica2IRC.log $path/laconica2IRC.pl
#
# And that's it... Less cut and paste from Identi.ca to my favorite IRC
# hangout, and my IRC friends who have not yet adopted microblogging don't feel
# left out of my updates anymore. And I can still jump into IRC from time to
# time for a real time chat. I have the best of both worlds - what more could I
# ask ?

use strict;

### Configuration section.
use vars qw ($nick $server $port $channel $rss_url $refresh $password $identica_user $replies);
$nick = 'mynick';
$server = 'irc.example.net';
$port = 6667;
$channel = '#test';
$refresh = 11*60; # seconds
# Password can be left empty if you don't authenticate to your IRC server
# Bip password format is 'nick:password:channel' but yours may be different - probably a simple password
# Public IRC servers require no password, so this field is empty by default
$password = '';
$identica_user = 'liotier';
# I used it with Identi.ca but you could use it with any Laconi.ca instance
$rss_url = 'http://identi.ca/'.$identica_user.'/rss';
# Show @replies ("yes" or "no"). Defaults to "yes" 
$replies = 'no';

### Preamble.
use POSIX;
use LWP::UserAgent;
use Net::IRC;
use XML::RSS;

### IRC connection initialization.
use vars qw ($irc $conn);
$irc = new Net::IRC;
print "Connecting to server ".$server.":".$port." with nick ".$nick."...\n";
$conn = $irc->newconn (Nick => $nick, Server => $server, Port => $port, Ircname => 'IRC RSS announcement automaton', Password => $password);
# Connect event handler - we immediately try to join our channel.
sub on_connect {
   my ($self, $event) = @_;
   print "Joining channel ".$channel."...\n";
   $self->join ($channel);
}
$conn->add_handler ('welcome', \&on_connect);

# Joined the IRC channel, so log that.
sub on_joined {
   my ($self, $event) = @_;
   print "Joined channel ".$channel."...\n";
}
$conn->add_handler ('endofnames', \&on_joined);

# It is a good custom to reply to the CTCP VERSION request.
sub on_cversion {
   my ($self, $event) = @_;
   $self->ctcp_reply ($event->nick, 'IRC RSS announcement automaton');
}
$conn->add_handler ('cversion', \&on_cversion);

### The RSS feed
use vars qw (@items);

# Fetches the RSS from server and returns a list of RSS items.
sub fetch_rss {
   my $ua = LWP::UserAgent->new (env_proxy => 1, keep_alive => 1, timeout => 30);
   my $request = HTTP::Request->new('GET', $rss_url);
   my $response = $ua->request ($request);
   return unless ($response->is_success);
   my $data = $response->content;
   my $rss = new XML::RSS ();
   $rss->parse($data);
## 20090122 JML - This loop obliterates the data so I commented it out
## The script works without it, but dont believe that I really know what I'm doing.
#   foreach my $item (@{$rss->{items}}) {
#      # Make sure to strip any possible newlines and similiar stuff.
#     $item->{title} = s/\s/ /g; 
#     }
   return @{$rss->{items}};
}

# Attempts to find some newly appeared RSS items.
sub delta_rss {
   my ($old, $new) = @_;
   # If @$old is empty, it means this is the first run and
   # we will therefore not do anything.
   return () unless ($old and @$old);
   # We take the first item of @$old and find it in @$new.
   # Then anything before its position in @$new are the newly appeared items which we return.
   my $sync = $old->[0];
   # If it is at the start of @$new, nothing has changed.
   return () if ($sync->{title} eq $new->[0]->{title});
   my $item;
   for ($item = 1; $item < @$new; $item++) {
      # We are comparing the titles which might not be 100% reliable but RSS
      # streams really should not contain multiple items with same title.
      last if ($sync->{title} eq $new->[$item]->{title});
      }
   return @$new[0 .. $item - 1];
   }

# Check RSS feed periodically.
sub check_rss {
   my (@new_items);
   print "Checking RSS feed [".$rss_url."]...\n";
   @new_items = fetch_rss ();
   if (@new_items) {
      my @delta = delta_rss (\@items, \@new_items);
      foreach my $item (reverse @delta) {
## 20090122 JML - I just want the title, not the link
## But there are other attributes of the RSS stream that you can use
#         $conn->privmsg ($channel, '"'.$item->{title}.'" :: '.$item->{link});
## 20090122 JML - The following two lines are Laconi.ca specific 
        my $title = $item->{title};
## 20090122 JML - this is to remove the "username :" prefix from the line
        $title =~ s/$identica_user\:\ //;
## 20090513 JML - write the item in IRC channel, depending on $replies setting
## This should be filtered earlier in the flow, but this works
        if ($replies eq "no") { $conn->privmsg ($channel, $title) unless ($title =~ /^@.*/); }
        else { $conn->privmsg ($channel, $title); }
        }
      @items = @new_items;
      }
   alarm $refresh;
   }

$SIG{ALRM} = \&check_rss;
check_rss();

# Start the IRC loop.
$irc->start;