spool2txt

#!/usr/bin/perl

# This PERL program splits up a mail spool of the form...
#
#
# From edward_g_speyer@hotmail.com
# HEADER_STUFF
# HEADER_STUFF
#
#
# content
# content
# content
# [dd-mm-yyyy] with a possible `part#' but not implemented yet
# placeName
#
#
# ______...
# etc.
#
# ...by creating ppppppii.txt files where:
#
# o pppppp is the 6 figure version of placeName
# o ii is the number of days the date is after 01-08
# o txt is a standard extension
#
# the file contains the content lines concatentated to a single line,
# but this is not strictly necessary for the txt2HTML conversion.
# All the files are stored in the relative path TXTs/ .
# --egs

# --- PREPERATION ---

print "spool2txt::::::Opening mail spool for conversion...\n";

# Open a filehandle on the newSpool file and prepare
# @existingPlaceids for use in &placeidExist:
open (SPOOL, "cat newSpool |");
# REMOVED AS PART OF THE CHANGES IN &findtags --> &placeidExistPrep;

# --- TXT COLLECTION ---

print "spool2txt::::::Processing spool for text files...\n";

# while $_ is not going to hit EOF, process for pseudoTags...
while (!eof(SPOOL)) {&findTags;}
close (SPOOL);

# --- OUTPUT TO FILES ---

foreach $placeid (@placeids) {

print "spool2txt::::::NEW! Processing --> `$places{$placeid}\' - $dates{$placeid}...\n";

open (PFILE, "| cat >../TXTs/$placeid.txt");
print PFILE "$places{$placeid}\n";
print PFILE "$dates{$placeid}\n";
print PFILE "$contents{$placeid}\n";
close (PFILE);
}

# --- SUBROUTINES ---

# --- &findTags ---
# to find a TXT block between <TXT> tags and
# place data into %places %dates %contents and
# @pids
# usage: &findTags;
# rtrns: nothing
sub findTags {
# these variables only survive this code block...
local(@txt,$place,$date,$content,$placeid,$suspicion,$txt);

# find the `From' line...
while (($_=<SPOOL>)&&!($_=~/^From .*\@hotmail\.com/)) {

# or stop &findTags if EOF is reached...
if (eof(SPOOL)) {return;}
}

# then push all lines of text until the beginning
# of the `hotmail' .sig (i.e. lots of __s) into @txt...
while (($_=<SPOOL>)&&(!($_=~/____/))) {

# muppet testing! Did we overshoot or not terminate?...
if (($_=~/^From .*\@hotmail\.com/)||(eof(SPOOL))) {
print "spool2txt::::::!!!!!Unterminated e-mail: examining...";
($suspicion)=join(" ",@txt)=~/(\[\d{2,2}-\d{2,2}-\d{4,4}\].{20,20})/;
(!$suspicion)?
print "malformed date, no text extracted\n":
print "helpful text???\`$suspicion\'\n";
return;
}

# remove \n and add to @txt...
chop;
push(@txt,$_);
}

# discard terminal blankLines (i.e. ^$)...
do {$line=pop(@txt);}

# until we $line is something that is not a blank line...
until (!$line=~/^$/);

# $line is therefore the place...
$place=$line;

# extract the date...
$date=pop(@txt);

# now that the $place and $date vars are got, see if they are legit. ...
if (
!($date=~/\[\d{2,2}-\d{2,2}-\d{4,4}]/)
&&
!($place=~/^\w*$/)
) {print "spool2txt::::::INVALID ENTRY IN SPOOL!\n"; return;}

# an echo for mike and me...
print "spool2txt::::::Found report on --> `$place\' - $date...\n";

# If this is an append, then flag it so...
if ($date=~/\]\&/) {$version="add";} else {$version="new";}

# Fold in the content until a blank like is reached...
do {
$content=$thisLine." ".$content;
$thisLine=pop(@txt);
} until ($thisLine=~/^$/);

# Create a placeid for this TXT block...
$placeid=&placeidCreate($place,$date);

# I HAVE REMOVED THIS CHECK SO ALL E-MAILS ARE CONVERTED
# USING THE MOST RECENT REPORT AS THE .TXT FILE --egs21

# if the placeid has not been created yet...
# REMOVED --> if (!&placeidExist($placeid)) {

# Store the three pieces of data in %arrays...
$places{$placeid}=$place;
$dates{$placeid}=$date;
$contents{$placeid}=$content;

# and the placeid in the @array...
push(@placeids,$placeid);

# REMOVED --> }
}

# --- &placeidCreate ---
# to create a placeid[entifier]:
# usage: &placeidCreate($somePlace,$someDate);
# rtrns: somePid string
sub placeidCreate {
# local vars...
local ($placeBit,$dayBit,$monthBit,$dateNumber);

# constrain the placename to <6 chars...
($placeBit)=$_[0]=~/(\w{1,6})/;

# extract the day and month date from [xx-xx-xxxx]...
($dayBit,$monthBit)=$_[1]=~/\[(\d\d)\-(\d\d)/;

# find out no. of days after [01-08-2000]...
$dateNumber=($monthBit==8)?$dayBit:($dayBit+31);

# return the placeid...
return "$placeBit$dateNumber";
}

# --- &placeidExistPrep ---
# preparation for &placeidExist:
# usage: &placeidExistPrep;
# rtrns: nothing
sub placeidExistPrep {
# put the contents of the TXTs folder minus
# the .txt extention into @existingPlaceids...
open (PLACEIDS, "ls ../TXTs/*.txt |");
while (<PLACEIDS>) {
/(\w+).txt/;
push(@existingPlaceids,$1);
}
}

# --- &placeidExist ---
# to determine whether a placeid exists yet:
# usage: &placeidExist($somePid);
# rtrns: boolean '1' or '0'
sub placeidExist {

# local vars...
local ($exisitingPlaceid);

# if the argument is in @existingPlaceids return '1'...
foreach $existingPlaceid (@existingPlaceids) {
if ($_[0] eq $existingPlaceid) {return "1";}
}

# otherwise return '0'...
return "0";
}