#!/usr/bin/perl
# This PERL program splits up a mail spool of the form...
#
#
# From edward_g_speyer@hotmail.com
# HEADER_STUFF
# HEADER_STUFF
#
#
# content
# content
# content
# [dd-mm-yyyy] with a possible `part#' but not implemented yet
# placeName
#
#
# ______...
# etc.
#
# ...by creating ppppppii.txt files where:
#
# o pppppp is the 6 figure version of placeName
# o ii is the number of days the date is after 01-08
# o txt is a standard extension
#
# the file contains the content lines concatentated to a single line,
# but this is not strictly necessary for the txt2HTML conversion.
# All the files are stored in the relative path TXTs/ .
# --egs
# --- PREPERATION ---
print "spool2txt::::::Opening mail spool for conversion...\n";
# Open a filehandle on the newSpool file and prepare
# @existingPlaceids for use in &placeidExist:
open (SPOOL, "cat newSpool |");
# REMOVED AS PART OF THE CHANGES IN &findtags --> &placeidExistPrep;
# --- TXT COLLECTION ---
print "spool2txt::::::Processing spool for text files...\n";
# while $_ is not going to hit EOF, process for pseudoTags...
while (!eof(SPOOL)) {&findTags;}
close (SPOOL);
# --- OUTPUT TO FILES ---
foreach $placeid (@placeids) {
print "spool2txt::::::NEW! Processing --> `$places{$placeid}\' - $dates{$placeid}...\n";
open (PFILE, "| cat >../TXTs/$placeid.txt");
print PFILE "$places{$placeid}\n";
print PFILE "$dates{$placeid}\n";
print PFILE "$contents{$placeid}\n";
close (PFILE);
}
# --- SUBROUTINES ---
# --- &findTags ---
# to find a TXT block between <TXT> tags and
# place data into %places %dates %contents and
# @pids
# usage: &findTags;
# rtrns: nothing
sub findTags {
# these variables only survive this code block...
local(@txt,$place,$date,$content,$placeid,$suspicion,$txt);
# find the `From' line...
while (($_=<SPOOL>)&&!($_=~/^From .*\@hotmail\.com/)) {
# or stop &findTags if EOF is reached...
if (eof(SPOOL)) {return;}
}
# then push all lines of text until the beginning
# of the `hotmail' .sig (i.e. lots of __s) into @txt...
while (($_=<SPOOL>)&&(!($_=~/____/))) {
# muppet testing! Did we overshoot or not terminate?...
if (($_=~/^From .*\@hotmail\.com/)||(eof(SPOOL))) {
print "spool2txt::::::!!!!!Unterminated e-mail: examining...";
($suspicion)=join(" ",@txt)=~/(\[\d{2,2}-\d{2,2}-\d{4,4}\].{20,20})/;
(!$suspicion)?
print "malformed date, no text extracted\n":
print "helpful text???\`$suspicion\'\n";
return;
}
# remove \n and add to @txt...
chop;
push(@txt,$_);
}
# discard terminal blankLines (i.e. ^$)...
do {$line=pop(@txt);}
# until we $line is something that is not a blank line...
until (!$line=~/^$/);
# $line is therefore the place...
$place=$line;
# extract the date...
$date=pop(@txt);
# now that the $place and $date vars are got, see if they are legit. ...
if (
!($date=~/\[\d{2,2}-\d{2,2}-\d{4,4}]/)
&&
!($place=~/^\w*$/)
) {print "spool2txt::::::INVALID ENTRY IN SPOOL!\n"; return;}
# an echo for mike and me...
print "spool2txt::::::Found report on --> `$place\' - $date...\n";
# If this is an append, then flag it so...
if ($date=~/\]\&/) {$version="add";} else {$version="new";}
# Fold in the content until a blank like is reached...
do {
$content=$thisLine." ".$content;
$thisLine=pop(@txt);
} until ($thisLine=~/^$/);
# Create a placeid for this TXT block...
$placeid=&placeidCreate($place,$date);
# I HAVE REMOVED THIS CHECK SO ALL E-MAILS ARE CONVERTED
# USING THE MOST RECENT REPORT AS THE .TXT FILE --egs21
# if the placeid has not been created yet...
# REMOVED --> if (!&placeidExist($placeid)) {
# Store the three pieces of data in %arrays...
$places{$placeid}=$place;
$dates{$placeid}=$date;
$contents{$placeid}=$content;
# and the placeid in the @array...
push(@placeids,$placeid);
# REMOVED --> }
}
# --- &placeidCreate ---
# to create a placeid[entifier]:
# usage: &placeidCreate($somePlace,$someDate);
# rtrns: somePid string
sub placeidCreate {
# local vars...
local ($placeBit,$dayBit,$monthBit,$dateNumber);
# constrain the placename to <6 chars...
($placeBit)=$_[0]=~/(\w{1,6})/;
# extract the day and month date from [xx-xx-xxxx]...
($dayBit,$monthBit)=$_[1]=~/\[(\d\d)\-(\d\d)/;
# find out no. of days after [01-08-2000]...
$dateNumber=($monthBit==8)?$dayBit:($dayBit+31);
# return the placeid...
return "$placeBit$dateNumber";
}
# --- &placeidExistPrep ---
# preparation for &placeidExist:
# usage: &placeidExistPrep;
# rtrns: nothing
sub placeidExistPrep {
# put the contents of the TXTs folder minus
# the .txt extention into @existingPlaceids...
open (PLACEIDS, "ls ../TXTs/*.txt |");
while (<PLACEIDS>) {
/(\w+).txt/;
push(@existingPlaceids,$1);
}
}
# --- &placeidExist ---
# to determine whether a placeid exists yet:
# usage: &placeidExist($somePid);
# rtrns: boolean '1' or '0'
sub placeidExist {
# local vars...
local ($exisitingPlaceid);
# if the argument is in @existingPlaceids return '1'...
foreach $existingPlaceid (@existingPlaceids) {
if ($_[0] eq $existingPlaceid) {return "1";}
}
# otherwise return '0'...
return "0";
}