txt2HTML

#!/usr/bin/perl

# This PERL program takes files of the for
# pppppp*.txt from the TXTs/ directory
# and assembles them into HTML pages based
# on each place name denoted by pppppp, and
# each file has it's own section sorted by
# date with extra versions concatenated under
# the same date.
# --egs

# -------- COLLECT THE DATA TOGETHER --------

# check for an argument...
if (!$ARGV[0]) {die "give an argument!!\n";}

print "txt2HTML:::::::Finding files with $placeBit prefix\n";

# reduce the place name to <=6 letters.
($placeBit)=$ARGV[0]=~/(\w{1,6})/;

# get a list of files with that 6 letter prefix,
# and store in @files...
open (FILES, "ls ../TXTs/$placeBit* |");
while (<FILES>) {
/TXTs\/(.*)/;
push(@files,$1);
}
close (FILES);

# for each of these files, do the following...
foreach $openFile (@files) {
local (@openFile);

print "txt2HTML:::::::Extracting information from $openFile...\n";

# open it, and store in the array @openFile...
open (OPENFILE, "cat ../TXTs/$openFile |");
while (<OPENFILE>) {chop; push(@openFile,$_);}
close (OPENFILE);

# sort the data from @openFile array...
$thisPlace=shift(@openFile);
$thisDate=shift(@openFile);
$thisContent=join(' ',@openFile);

# reorganize data into the arrays @dates %places %contents...

# Everything will be processed by pseudoDate,
# so make an array of these...
push(@dates,$thisDate);

$contents{$thisDate}=$thisContent;

# and store the placeName...
$placeName=$thisPlace;
}

# Capture the template into 6 code blocks...
($b1,$b2,$b3,$b4,$b5,$b6)=&codeCapture;

print "txt2HTML:::::::Writing out $placeBit.html...\n";

# open output fileHandle...
open (OUTFILE, "| cat >../HTMLs/$placeBit.html");

# Print the first header block...
print OUTFILE $b1.$placeName."\n".$b2;

# foreach report (i.e. date) create the text...
foreach $date (@dates) {print OUTFILE $b3.$date."\n".$b4.$contents{$date}."\n".$b5;}

# close off the file with the last block...
print OUTFILE $b6;

# -------- SUBROUTINES --------

# --- &codeCapture ---
# to split the PAGE_TEMP.html file into the code
# blocks, ($b1, $b2, $b3, $b4, $b5, $b6)
# usage: &codeCapture
# rtrns: 6 string list, as above

sub codeCapture {
open (TEMPLATE, "cat ../templates/PAGE_TEMP.html |");
while (<TEMPLATE>) {push(@code,$_);}

# --How To Split Up Your Template File--
# line no.
# 0 -- | <-.
# <PLACE_CT> |__ b1
# | |
# lA -- PLACE_CT <-' <:::::::::::::{$place}
# | <-.__ b2
# </PLACE_CT> |
# lB -- | <-. <-'
# <DATE_CT> |-- b3
# | |
# lC -- DATE_CT <-' <:::::::::::::{$date}
# | <-.
# </DATE_CT> |
# | |-- b4
# <CONTENT_CT> |
# | |
# lD -- CONTENT_CT <-' <:::::::::{$content}
# | <-.__ b5
# </CONTENT_CT> |
# lE -- | <-' <-.__ b6 (the rest)
# | <-'

for (;$d<$#code;) {
$now=$code[$d];
if ($now=~/PLACE_CT\n/) {$lA=$d;}
if ($now=~/<\/PLACE_CT>/) {$lB=$d+1;}
if ($now=~/DATE_CT\n/) {$lC=$d;}
if ($now=~/CONTENT_CT\n/) {$lD=$d;}
if ($now=~/<\/CONTENT_CT>/) {$lE=$d+1;}
$d++;
}

return (
&htmlFragGet('0',$lA), #b1
&htmlFragGet($lA+1,$lB), #b2
&htmlFragGet($lB,$lC), #b3
&htmlFragGet($lC+1,$lD), #b4
&htmlFragGet($lD+1,$lE), #b5
&htmlFragGet($lE,$#code+1),#b6
)
}

# --- &htmlFragGet ---
# to get sections of the @code array from
# a to b excluding line b
# usage: &htmlFragGet($a, $b);
sub htmlFragGet {
if ($_[0]==$_[1]) {return;}
return $code[$_[0]].&htmlFragGet($_[0]+1,$_[1]);
}