txt2HTML


#!/usr/bin/perl




# This PERL program takes files of the for

# pppppp*.txt from the TXTs/ directory

# and assembles them into HTML pages based

# on each place name denoted by pppppp, and

# each file has it's own section sorted by

# date with extra versions concatenated under

# the same date.

# --egs







# -------- COLLECT THE DATA TOGETHER --------




# check for an argument...

if (!$ARGV[0]) {die "give an argument!!\n";}




print "txt2HTML:::::::Finding files with $placeBit prefix\n";




# reduce the place name to <=6 letters.

($placeBit)=$ARGV[0]=~/(\w{1,6})/;




# get a list of files with that 6 letter prefix,

# and store in @files...

open (FILES, "ls ../TXTs/$placeBit* |");

while (<FILES>) {

    /TXTs\/(.*)/;

    push(@files,$1);

}

close (FILES);




# for each of these files, do the following...

foreach $openFile (@files) {

    local (@openFile);




    print "txt2HTML:::::::Extracting information from $openFile...\n";




    # open it, and store in the array @openFile...

    open (OPENFILE, "cat ../TXTs/$openFile |");

    while (<OPENFILE>) {chop; push(@openFile,$_);}

    close (OPENFILE);




    # sort the data from @openFile array...

    $thisPlace=shift(@openFile);

    $thisDate=shift(@openFile);

    $thisContent=join(' ',@openFile);




    # reorganize data into the arrays @dates %places %contents...




    # Everything will be processed by pseudoDate,

    # so make an array of these...

    push(@dates,$thisDate);

    

    $contents{$thisDate}=$thisContent;

    

    # and store the placeName...

    $placeName=$thisPlace;

}




# Capture the template into 6 code blocks...

($b1,$b2,$b3,$b4,$b5,$b6)=&codeCapture;




print "txt2HTML:::::::Writing out $placeBit.html...\n";




# open output fileHandle...

open (OUTFILE, "| cat >../HTMLs/$placeBit.html");




# Print the first header block...

print OUTFILE $b1.$placeName."\n".$b2;




# foreach report (i.e. date) create the text...

foreach $date (@dates) {print OUTFILE $b3.$date."\n".$b4.$contents{$date}."\n".$b5;}




# close off the file with the last block...

print OUTFILE $b6;










# -------- SUBROUTINES --------




# --- &codeCapture ---

# to split the PAGE_TEMP.html file into the code

# blocks, ($b1, $b2, $b3, $b4, $b5, $b6)

# usage: &codeCapture

# rtrns: 6 string list, as above







sub codeCapture {

    open (TEMPLATE, "cat ../templates/PAGE_TEMP.html |");

    while (<TEMPLATE>) {push(@code,$_);}

    

# --How To Split Up Your Template File--

# line no.

# 0  --         |       <-.

#          <PLACE_CT>     |__ b1

#               |         |

# lA --    PLACE_CT     <-' <:::::::::::::{$place}

#               |           <-.__ b2

#         </PLACE_CT>         |

# lB --         |       <-. <-'

#          <DATE_CT>      |-- b3

#               |         |

# lC --    DATE_CT      <-' <:::::::::::::{$date}

#               |           <-.

#          </DATE_CT>         |

#               |             |-- b4

#         <CONTENT_CT>        |

#               |             |

# lD --   CONTENT_CT        <-' <:::::::::{$content}

#               |       <-.__ b5

#         </CONTENT_CT>   |

# lE --         |       <-' <-.__ b6 (the rest)

#               |           <-'







    for (;$d<$#code;) {

	$now=$code[$d];

	if ($now=~/PLACE_CT\n/) {$lA=$d;}

	if ($now=~/<\/PLACE_CT>/) {$lB=$d+1;}

	if ($now=~/DATE_CT\n/) {$lC=$d;}

	if ($now=~/CONTENT_CT\n/) {$lD=$d;}

	if ($now=~/<\/CONTENT_CT>/) {$lE=$d+1;}

	$d++;

    }

    

    return (

	    &htmlFragGet('0',$lA),   #b1

	    &htmlFragGet($lA+1,$lB), #b2

	    &htmlFragGet($lB,$lC),   #b3

	    &htmlFragGet($lC+1,$lD), #b4

	    &htmlFragGet($lD+1,$lE), #b5

	    &htmlFragGet($lE,$#code+1),#b6

	    )

}
# --- &htmlFragGet --- # to get sections of the @code array from # a to b excluding line b # usage: &htmlFragGet($a, $b); sub htmlFragGet { if ($_[0]==$_[1]) {return;} return $code[$_[0]].&htmlFragGet($_[0]+1,$_[1]); }