#!/bin/sh
#if [ -x /usr/bin/perl5.00503 ]; then
# exec perl5.00503 -S -x -- "$0" "$@"
#else
exec perl -S -CSD -x -- "$0" "$@"
#fi
#!perl -w -CSD
# $Id: format-people-html,v 1.39 2022/10/13 13:51:54 jbw Exp $
use utf8;
use English;
use strict;
use User::pwent;
my $JBW_Home;
BEGIN {
my $pw = getpwnam('jbw') || getpwnam('user') || die "Can't find user jbw or user: $OS_ERROR";
$JBW_Home = $pw->dir;
push (@INC, "$JBW_Home/share/share/perl");
push (@INC, "$JBW_Home/church/lib/perl"); }
use JBW::Utils;
use Getopt::Long;
require TextDB;
#print STDERR "A\n";
#my $List_File = glob ('~/admin/people/people');
my $List_File = glob ('~/{jbw-cvs/,}admin/people/peopl[e]');
#print STDERR "List_File: [$List_File]\n";
#exit 1;
my $Short_Prog_Name = $PROGRAM_NAME;
$Short_Prog_Name =~ s(.*/)();
#print STDERR "B\n";
sub get_people_hash {
my $list_file = shift;
my %file_hash;
my @settings = (key_field => 'key',
repeat_join => ', ',
field_name => '^([^\n]*?[^\s][^\n]*?):\s*');
#print STDERR "F\n";
tie %file_hash, 'TextDB', $list_file, "h", @settings;
#print STDERR "G\n";
# Why does the next line take several minutes under Perl 5.6.0?
#my %real_hash = %file_hash;
my %real_hash;
my ($key, $val);
while (($key,$val) = each %file_hash) {
$real_hash{$key} = $val; }
#print STDERR "H\n";
untie %file_hash;
#print STDERR "I\n";
return \%real_hash; }
# Complaints about TextDB:
#
# * Can't forbid some field names from being repeated within a record.
# * Can't parse one field differently from another.
# * Can't complain about text not belonging to a valid record.
# * Can't complain about records without keys when using key_field.
# * Can't complain about two records with same key.
sub dump_hash_hash {
my $hashref = shift;
foreach my $key (sort (keys (%$hashref))) {
print "key: [$key]\n";
foreach my $field_name (sort (keys %{$hashref->{$key}})) {
print "field_name: [$field_name], ";
print "value: [$hashref->{$key}->{$field_name}]\n"; }
print "\n"; }}
sub dump_hash {
my $hashref = shift;
foreach my $key (sort (keys (%$hashref))) {
print "key: [$key]\n";
print "value: [$hashref->{$key}]\n"; }}
sub format_one_person_html {
my ($Key, $people_hash_ref) = @_;
my $Record;
if (! ($Record = $people_hash_ref->{$Key})) {
die "no record for key: [$Key]"; }
if (! $Record->{'name'}) {
die "record without name for key: [$Key]"; }
my $Line = '
';
#$Line .= "";
if ($Record->{'url'}) {
$Line .= "{'url'}\">"; }
$Line .= $Record->{'name'};
if ($Record->{'url'}) {
$Line .= ''; }
if ($Record->{'institution'}) {
$Line .= " - $Record->{'institution'}" }
if ($Record->{'e-mail'} && ! $Record->{'url'}) {
my $email = $Record->{'e-mail'};
$email =~ m/^([^@]*)@(.*)$/;
my $mbox = $1;
my $host = $2;
$host =~ s/\./ dOt /g;
$Line .= " (e-mail: $mbox aT $host)";
#$Line .= " ({'e-mail'}\">e-mail)";
}
$Line .= "\n";
if ($Record->{'topic'} || $Record->{'note'}) {
$Line .= "";
if ($Record->{'topic'}) {
$Line .= $Record->{'topic'}; }
if ($Record->{'note'}) {
$Line .= " ($Record->{'note'})"; }
$Line .= "\n"; }
return $Line; }
sub format_people_html_list {
my ($list_name, $people_hash_ref) = @_;
#print STDERR "list_name: [$list_name]\n";
my $HTML = '';
#print STDERR "Keys: @{[keys %$people_hash_ref]}\n";
foreach my $key (sort (keys (%$people_hash_ref))) {
#print STDERR "key: [$key]\n";
if ((defined $$people_hash_ref{$key}{'list'})
&& ($$people_hash_ref{$key}{'list'} =~ m/(^| )$list_name($|,)/)) {
#print STDERR "key in list\n";
$HTML .= format_one_person_html($key,$people_hash_ref);}}
return $HTML; }
sub update_lists_in_file {
my ($html_file, $people_hash_ref) = @_;
with_file_contents $html_file, sub {
s{(\n).*^( *)}{
#(print STDERR "2: $2\n"),
$1 . format_people_html_list($2, $people_hash_ref) . $3
}mseg; }; }
sub generate_abbrev_list {
my ($names) = @_;
my (@abbrev_list);
#print STDERR "names: [$names]\n";
while ($names =~ s/([A-Z])(?:[^ ]*[^ .]) /$1. /) {
#print STDERR "names after 1 more abbrev: [$names]\n";
push (@abbrev_list, $names);
# Also do all the abbreviations without the periods.
my $names_no_periods = $names;
$names_no_periods =~ s/([A-Z])\. /$1 /g;
push (@abbrev_list, $names_no_periods); }
return @abbrev_list; }
sub all_name_abbreviations {
my ($name, $surname) = @_;
my %abbrevs;
#print STDERR "name: [$name]\n";
$abbrevs{$name} = 1;
if ((defined $surname) && ($name =~ m/\Q$surname/)) {
my $prefix = $PREMATCH;
my $suffix = $POSTMATCH;
#print STDERR "surname: [$surname], prefix: [$prefix], suffix: [$suffix]\n";
$abbrevs{$surname} = 1;
$abbrevs{ucfirst ($surname)} = 1;
if ($prefix !~ m/^\s*$/) {
if ($suffix !~ m/^\s*$/) {
print STDERR "name: [$name], surname: [$surname]\n";
print STDERR "prefix: [$prefix], suffix: [$suffix]\n";
die "unhandled"; }
foreach my $abbrev_prefix (generate_abbrev_list ($prefix)) {
$abbrevs{$abbrev_prefix . $surname} = 1; }}
elsif ($suffix !~ m/^\s*$/) {
# I don't know whether it is sensible to abbreviate
# Chinese names, so I won't.
1; }
else {
# WTF? Surname only? Indonesian perhaps?
1; }}
else {
# We guess each possible division into given name and
# surname.
foreach my $abbrev_name (generate_abbrev_list ($name)) {
$abbrevs{$abbrev_name} = 1; }}
#print STDERR ("names: " . join ('; ', keys %abbrevs) . "\n");
return (keys %abbrevs); }
sub get_people_url_maps {
my $people_hash_ref = shift;
my %name_to_url;
foreach my $person (values %$people_hash_ref) {
if ((defined $$person{'url'}) && ($$person{'url'} ne '')) {
my $url = $$person{'url'};
my $surname = (defined $$person{'surname'}
? $$person{'surname'}
: undef);
my @names;
foreach my $key ('name', 'full-name', 'informal-name',
'formal-name', 'alt-name', 'short-name') {
#print STDERR "record key: [$$person{'key'}], field name: [$key]\n";
if (defined $$person{$key}) {
#print STDERR "value: [$$person{$key}]\n";
push (@names, $$person{$key}); }}
if (defined $$person{'alt-cite-names'}) {
push (@names,
split (/; */, $$person{'alt-cite-names'})); }
grep {
grep {
if (! defined $name_to_url{$_}) {
$name_to_url{$_} = $url; }
elsif ($name_to_url{$_} ne $url) {
print STDERR ("$Short_Prog_Name: WARNING: "
. "\"$_\" already mapped to $name_to_url{$_}.\n");
print STDERR ("Ignoring new mapping to $url.\n"); }}
all_name_abbreviations ($_, $surname); }
@names; }}
# foreach my $name (keys %name_to_url) {
# my $abbrev_name = $name;
# while ($abbrev_name =~ s/([A-Z])(?:[^ ]*[^ .]) /$1. /) {
# $name_to_url{$abbrev_name} = $name_to_url{$name}; }}
return \%name_to_url; }
sub verbosify_latex {
local $_ = shift;
# *** TODO write comment explaining the backslashing!
s/É/(?:\\\{\\\\'E}|\\\\'E|\\\\'\\\{E}|{\\\\'\\\{E}}|É)/g;
s/é/(?:\\\{\\\\'e}|\\\\'e|\\\\'\\\{e}|{\\\\'\\\{e}}|é)/g;
s/í/(?:\\\{\\\\'i}|\\\\'i|\\\\'\\\{i}|{\\\\'\\\{i}}|í)/g;
s/á/(?:\\\{\\\\'a}|\\\\'a|\\\\'\\\{a}|{\\\\'\\\{a}}|á)/g;
s/a̧/(?:\\\{\\\\k a}|\\\\k a|\\\\k\\\{a}|{\\\\k\\\{a}}|a̧)/g;
s/č/(?:\\\{\\\\v c}|\\\\v c|\\\\v\\\{c}|{\\\\v\\\{c}}|č)/g;
s/ /(?:~|\\s+)/g;
return $_; }
sub deverbosify_latex {
local $_ = shift;
# *** TODO write comment explaining the backslashing!
s/\{\\\'E}|\\\'E|\{\\\'\{E}}|\\\'\{E}/É/g;
s/\{\\\'e}|\\\'e|\{\\\'\{e}}|\\\'\{e}/é/g;
s/\{\\\'i}|\\\'i|\{\\\'\{i}}|\\\'\{i}/í/g;
s/\{\\\'a}|\\\'a|\{\\\'\{a}}|\\\'\{a}/á/g;
s/\{\\k a}|\\k a|\{\\k\{a}}|\\k\{a}/a̧/g;
s/\{\\v c}|\\v c|\{\\v\{c}}|\\v\{c}/č/g;
s/~/ /g;
s/\s+/ /g;
return $_; }
sub link_latex {
my ($name, $url) = @_;
return "\\href{$url}{$name}"; }
sub make_link_pattern_latex {
my ($offset, $name_patt) = @_;
$offset++;
my $patt
= ("\\\\href\\{[^\}]*\\}\\{($name_patt)\\}(?{\$main::name=\$$offset})"
. "|\\\\(?:cite[a-z]*|bibitem|href|url)(?:\\s*\\[[^\]]*\\])?\\s*\\{[^\}]*\\}(?{\$main::name=undef})"
. "|% LocalWords.*(?{\$main::name=undef})");
return ($patt, $offset); }
sub verbosify_html {
local $_ = shift;
#print STDERR "sample: [á], before: [$_]\n";
s/á/(?:á|á)/g;
#print STDERR "after: [$_]\n";
s/é/(?:é|é)/g;
s/í/(?:í|í)/g;
s/ø/(?:ø|ø)/g;
s/ /(?: |\\s+)/g;
return $_; }
sub deverbosify_html {
local $_ = shift;
s/á/á/g;
s/é/é/g;
s/í/í/g;
s/ø/ø/g;
s/ / /g;
s/\s+/ /g;
return $_; }
sub link_html {
my ($name, $url) = @_;
return "$name"; }
sub make_link_pattern_html {
my ($offset, $name_patt) = @_;
$offset++;
my $patt
= ("($name_patt)(?{\$main::name=\$$offset})"
. "|(?is:\"]|\"[^\"]*\")*>(?:(?!).)*)(?{\$main::name=undef})"
. "|(?is:(?:(?!).)*)(?{\$main::name=undef})");
return ($patt, $offset); }
#print STDERR "C\n";
use re 'eval';
#print STDERR "D\n";
sub link_names_in_file {
my ($file, $name_to_url_ref, $update_only) = @_;
my $verbosify_ref;
my $deverbosify_ref;
my $link_ref;
my $make_link_pattern_ref;
my $is_bbl = 0;
if ($file =~ m/\.(?:tex|bbl)$/) {
($is_bbl = 1) if ($file =~ m /\.bbl$/);
$verbosify_ref = \&verbosify_latex;
$deverbosify_ref = \&deverbosify_latex;
$link_ref = \&link_latex;
$make_link_pattern_ref = \&make_link_pattern_latex; }
elsif ($file =~ m/html$/) {
$verbosify_ref = \&verbosify_html;
$deverbosify_ref = \&deverbosify_html;
$link_ref = \&link_html;
$make_link_pattern_ref = \&make_link_pattern_html; }
else {
die "unhandled"; }
my $name_patt1
= join ('|',
grep {
#print STDERR "A: [$_]\n";
s/\./\\./;
#print STDERR "B: [$_]\n";
$_ = &$verbosify_ref ($_);
#print STDERR "C: [$_]\n";
}
# We sort in reverse order in the hope that this will
# cause Perl to prefer longer matches when one version
# of a name is a prefix of another.
(sort { $b cmp $a }
keys %$name_to_url_ref));
my ($name_patt2, $offset)
= &$make_link_pattern_ref (0, $name_patt1);
$offset++;
# The empty pattern matching at beginning is to work around a Perl
# bug where the assignment to $main::name doesn't happen for the
# first match for some bizarre reason.
my $name_patt3 = "^(?{\$main::name=undef})|$name_patt2";
my $name_patt4;
if (! $update_only) {
#print STDERR "update_only false\n";
$name_patt4
= ($name_patt3
#. "|((? \$Show_Help,
"version" => \$Show_Version,
"update-lists" => \$Update_Lists,
"link-names" => \$Link_Names,
"update-name-links" => \$Update_Name_Links,
)) {
show_help();
exit 1; }
show_version() if $Show_Version;
show_help() if $Show_Help;
exit 0 if ($Show_Version || $Show_Help);
if ((! $Update_Lists) && (! $Link_Names) && (! $Update_Name_Links)) {
print STDERR "$Short_Prog_Name: Must supply at least one of these options:\n";
print STDERR " --update-lists, --link-names, --update-name-links\n";
exit 1; }
if ($Link_Names && $Update_Name_Links) {
print STDERR "$Short_Prog_Name: Must supply at at most one of these options:\n";
print STDERR " --link-names, --update-name-links\n";
exit 1; }
foreach my $File (@ARGV) {
if ($Update_Lists) {
update_lists_in_file ($File, $People_Hash_Ref); }
if ($Link_Names || $Update_Name_Links) {
link_names_in_file ($File, $People_Url_Maps, $Update_Name_Links); }}
# # Old implementation from before when I discovered TextDB.
#
# my %Field_Cumulative_Separator;
# $Field_Cumulative_Separator{'topic'} = ', ';
#
# sub fill_researcher_hash_old {
# my $list_file = shift;
# # print "list_file: [$list_file]\n";
# my $ListString = get_file_contents($list_file);
# my @Records = split (/^\n/m, $ListString);
# #print "Records: [@Records]\n";
# foreach my $Record (@Records) {
# if ($Record !~ m/^(([-a-z0-9]+:.*\n)+)$/) {
# die "malformed record: [$Record]"; }
# chop $Record;
# #print STDERR "Record: [$Record]\n";
# my @Fields = split (/\n/, $Record);
# my %Field;
# while (my $Field = shift (@Fields)) {
# ($Field =~ m/^([-a-z0-9]+): *(.*)$/) || die "impossible";
# if ($Field{$1}) {
# if ($Field_Cumulative_Separator{$1}) {
# $Field{$1} .= $Field_Cumulative_Separator{$1} . $2; }
# else {
# die "duplicate field: [$1], record: [$Record]\n"; }}
# else {
# $Field{$1} = $2; }}
# if (! $Field{'key'}) {
# die "bad record: [$Record]\n"; }
# if ($Researcher{$Field{'key'}}) {
# die "duplicate records for key: [$Field{'key'}]\n"; }
# $Researcher{$Field{'key'}} = \%Field; }}
# Local variables:
# mode: perl
# end: