#!/bin/sh #if [ -x /usr/bin/perl5.00503 ]; then # exec perl5.00503 -S -x -- "$0" "$@" #else exec perl -S -CSD -x -- "$0" "$@" #fi #!perl -w -CSD # $Id: format-people-html,v 1.39 2022/10/13 13:51:54 jbw Exp $ use utf8; use English; use strict; use User::pwent; my $JBW_Home; BEGIN { my $pw = getpwnam('jbw') || getpwnam('user') || die "Can't find user jbw or user: $OS_ERROR"; $JBW_Home = $pw->dir; push (@INC, "$JBW_Home/share/share/perl"); push (@INC, "$JBW_Home/church/lib/perl"); } use JBW::Utils; use Getopt::Long; require TextDB; #print STDERR "A\n"; #my $List_File = glob ('~/admin/people/people'); my $List_File = glob ('~/{jbw-cvs/,}admin/people/peopl[e]'); #print STDERR "List_File: [$List_File]\n"; #exit 1; my $Short_Prog_Name = $PROGRAM_NAME; $Short_Prog_Name =~ s(.*/)(); #print STDERR "B\n"; sub get_people_hash { my $list_file = shift; my %file_hash; my @settings = (key_field => 'key', repeat_join => ', ', field_name => '^([^\n]*?[^\s][^\n]*?):\s*'); #print STDERR "F\n"; tie %file_hash, 'TextDB', $list_file, "h", @settings; #print STDERR "G\n"; # Why does the next line take several minutes under Perl 5.6.0? #my %real_hash = %file_hash; my %real_hash; my ($key, $val); while (($key,$val) = each %file_hash) { $real_hash{$key} = $val; } #print STDERR "H\n"; untie %file_hash; #print STDERR "I\n"; return \%real_hash; } # Complaints about TextDB: # # * Can't forbid some field names from being repeated within a record. # * Can't parse one field differently from another. # * Can't complain about text not belonging to a valid record. # * Can't complain about records without keys when using key_field. # * Can't complain about two records with same key. sub dump_hash_hash { my $hashref = shift; foreach my $key (sort (keys (%$hashref))) { print "key: [$key]\n"; foreach my $field_name (sort (keys %{$hashref->{$key}})) { print "field_name: [$field_name], "; print "value: [$hashref->{$key}->{$field_name}]\n"; } print "\n"; }} sub dump_hash { my $hashref = shift; foreach my $key (sort (keys (%$hashref))) { print "key: [$key]\n"; print "value: [$hashref->{$key}]\n"; }} sub format_one_person_html { my ($Key, $people_hash_ref) = @_; my $Record; if (! ($Record = $people_hash_ref->{$Key})) { die "no record for key: [$Key]"; } if (! $Record->{'name'}) { die "record without name for key: [$Key]"; } my $Line = '

'; #$Line .= ""; if ($Record->{'url'}) { $Line .= "{'url'}\">"; } $Line .= $Record->{'name'}; if ($Record->{'url'}) { $Line .= ''; } if ($Record->{'institution'}) { $Line .= " - $Record->{'institution'}" } if ($Record->{'e-mail'} && ! $Record->{'url'}) { my $email = $Record->{'e-mail'}; $email =~ m/^([^@]*)@(.*)$/; my $mbox = $1; my $host = $2; $host =~ s/\./ dOt /g; $Line .= " (e-mail: $mbox aT $host)"; #$Line .= " ({'e-mail'}\">e-mail)"; } $Line .= "\n"; if ($Record->{'topic'} || $Record->{'note'}) { $Line .= "

"; if ($Record->{'topic'}) { $Line .= $Record->{'topic'}; } if ($Record->{'note'}) { $Line .= " ($Record->{'note'})"; } $Line .= "\n"; } return $Line; } sub format_people_html_list { my ($list_name, $people_hash_ref) = @_; #print STDERR "list_name: [$list_name]\n"; my $HTML = ''; #print STDERR "Keys: @{[keys %$people_hash_ref]}\n"; foreach my $key (sort (keys (%$people_hash_ref))) { #print STDERR "key: [$key]\n"; if ((defined $$people_hash_ref{$key}{'list'}) && ($$people_hash_ref{$key}{'list'} =~ m/(^| )$list_name($|,)/)) { #print STDERR "key in list\n"; $HTML .= format_one_person_html($key,$people_hash_ref);}} return $HTML; } sub update_lists_in_file { my ($html_file, $people_hash_ref) = @_; with_file_contents $html_file, sub { s{(\n).*^( *)}{ #(print STDERR "2: $2\n"), $1 . format_people_html_list($2, $people_hash_ref) . $3 }mseg; }; } sub generate_abbrev_list { my ($names) = @_; my (@abbrev_list); #print STDERR "names: [$names]\n"; while ($names =~ s/([A-Z])(?:[^ ]*[^ .]) /$1. /) { #print STDERR "names after 1 more abbrev: [$names]\n"; push (@abbrev_list, $names); # Also do all the abbreviations without the periods. my $names_no_periods = $names; $names_no_periods =~ s/([A-Z])\. /$1 /g; push (@abbrev_list, $names_no_periods); } return @abbrev_list; } sub all_name_abbreviations { my ($name, $surname) = @_; my %abbrevs; #print STDERR "name: [$name]\n"; $abbrevs{$name} = 1; if ((defined $surname) && ($name =~ m/\Q$surname/)) { my $prefix = $PREMATCH; my $suffix = $POSTMATCH; #print STDERR "surname: [$surname], prefix: [$prefix], suffix: [$suffix]\n"; $abbrevs{$surname} = 1; $abbrevs{ucfirst ($surname)} = 1; if ($prefix !~ m/^\s*$/) { if ($suffix !~ m/^\s*$/) { print STDERR "name: [$name], surname: [$surname]\n"; print STDERR "prefix: [$prefix], suffix: [$suffix]\n"; die "unhandled"; } foreach my $abbrev_prefix (generate_abbrev_list ($prefix)) { $abbrevs{$abbrev_prefix . $surname} = 1; }} elsif ($suffix !~ m/^\s*$/) { # I don't know whether it is sensible to abbreviate # Chinese names, so I won't. 1; } else { # WTF? Surname only? Indonesian perhaps? 1; }} else { # We guess each possible division into given name and # surname. foreach my $abbrev_name (generate_abbrev_list ($name)) { $abbrevs{$abbrev_name} = 1; }} #print STDERR ("names: " . join ('; ', keys %abbrevs) . "\n"); return (keys %abbrevs); } sub get_people_url_maps { my $people_hash_ref = shift; my %name_to_url; foreach my $person (values %$people_hash_ref) { if ((defined $$person{'url'}) && ($$person{'url'} ne '')) { my $url = $$person{'url'}; my $surname = (defined $$person{'surname'} ? $$person{'surname'} : undef); my @names; foreach my $key ('name', 'full-name', 'informal-name', 'formal-name', 'alt-name', 'short-name') { #print STDERR "record key: [$$person{'key'}], field name: [$key]\n"; if (defined $$person{$key}) { #print STDERR "value: [$$person{$key}]\n"; push (@names, $$person{$key}); }} if (defined $$person{'alt-cite-names'}) { push (@names, split (/; */, $$person{'alt-cite-names'})); } grep { grep { if (! defined $name_to_url{$_}) { $name_to_url{$_} = $url; } elsif ($name_to_url{$_} ne $url) { print STDERR ("$Short_Prog_Name: WARNING: " . "\"$_\" already mapped to $name_to_url{$_}.\n"); print STDERR ("Ignoring new mapping to $url.\n"); }} all_name_abbreviations ($_, $surname); } @names; }} # foreach my $name (keys %name_to_url) { # my $abbrev_name = $name; # while ($abbrev_name =~ s/([A-Z])(?:[^ ]*[^ .]) /$1. /) { # $name_to_url{$abbrev_name} = $name_to_url{$name}; }} return \%name_to_url; } sub verbosify_latex { local $_ = shift; # *** TODO write comment explaining the backslashing! s/É/(?:\\\{\\\\'E}|\\\\'E|\\\\'\\\{E}|{\\\\'\\\{E}}|É)/g; s/é/(?:\\\{\\\\'e}|\\\\'e|\\\\'\\\{e}|{\\\\'\\\{e}}|é)/g; s/í/(?:\\\{\\\\'i}|\\\\'i|\\\\'\\\{i}|{\\\\'\\\{i}}|í)/g; s/á/(?:\\\{\\\\'a}|\\\\'a|\\\\'\\\{a}|{\\\\'\\\{a}}|á)/g; s/a̧/(?:\\\{\\\\k a}|\\\\k a|\\\\k\\\{a}|{\\\\k\\\{a}}|a̧)/g; s/č/(?:\\\{\\\\v c}|\\\\v c|\\\\v\\\{c}|{\\\\v\\\{c}}|č)/g; s/ /(?:~|\\s+)/g; return $_; } sub deverbosify_latex { local $_ = shift; # *** TODO write comment explaining the backslashing! s/\{\\\'E}|\\\'E|\{\\\'\{E}}|\\\'\{E}/É/g; s/\{\\\'e}|\\\'e|\{\\\'\{e}}|\\\'\{e}/é/g; s/\{\\\'i}|\\\'i|\{\\\'\{i}}|\\\'\{i}/í/g; s/\{\\\'a}|\\\'a|\{\\\'\{a}}|\\\'\{a}/á/g; s/\{\\k a}|\\k a|\{\\k\{a}}|\\k\{a}/a̧/g; s/\{\\v c}|\\v c|\{\\v\{c}}|\\v\{c}/č/g; s/~/ /g; s/\s+/ /g; return $_; } sub link_latex { my ($name, $url) = @_; return "\\href{$url}{$name}"; } sub make_link_pattern_latex { my ($offset, $name_patt) = @_; $offset++; my $patt = ("\\\\href\\{[^\}]*\\}\\{($name_patt)\\}(?{\$main::name=\$$offset})" . "|\\\\(?:cite[a-z]*|bibitem|href|url)(?:\\s*\\[[^\]]*\\])?\\s*\\{[^\}]*\\}(?{\$main::name=undef})" . "|% LocalWords.*(?{\$main::name=undef})"); return ($patt, $offset); } sub verbosify_html { local $_ = shift; #print STDERR "sample: [á], before: [$_]\n"; s/á/(?:á|á)/g; #print STDERR "after: [$_]\n"; s/é/(?:é|é)/g; s/í/(?:í|í)/g; s/ø/(?:ø|ø)/g; s/ /(?: |\\s+)/g; return $_; } sub deverbosify_html { local $_ = shift; s/á/á/g; s/é/é/g; s/í/í/g; s/ø/ø/g; s/ / /g; s/\s+/ /g; return $_; } sub link_html { my ($name, $url) = @_; return "$name"; } sub make_link_pattern_html { my ($offset, $name_patt) = @_; $offset++; my $patt = ("($name_patt)(?{\$main::name=\$$offset})" . "|(?is:\"]|\"[^\"]*\")*>(?:(?!).)*)(?{\$main::name=undef})" . "|(?is:(?:(?!).)*)(?{\$main::name=undef})"); return ($patt, $offset); } #print STDERR "C\n"; use re 'eval'; #print STDERR "D\n"; sub link_names_in_file { my ($file, $name_to_url_ref, $update_only) = @_; my $verbosify_ref; my $deverbosify_ref; my $link_ref; my $make_link_pattern_ref; my $is_bbl = 0; if ($file =~ m/\.(?:tex|bbl)$/) { ($is_bbl = 1) if ($file =~ m /\.bbl$/); $verbosify_ref = \&verbosify_latex; $deverbosify_ref = \&deverbosify_latex; $link_ref = \&link_latex; $make_link_pattern_ref = \&make_link_pattern_latex; } elsif ($file =~ m/html$/) { $verbosify_ref = \&verbosify_html; $deverbosify_ref = \&deverbosify_html; $link_ref = \&link_html; $make_link_pattern_ref = \&make_link_pattern_html; } else { die "unhandled"; } my $name_patt1 = join ('|', grep { #print STDERR "A: [$_]\n"; s/\./\\./; #print STDERR "B: [$_]\n"; $_ = &$verbosify_ref ($_); #print STDERR "C: [$_]\n"; } # We sort in reverse order in the hope that this will # cause Perl to prefer longer matches when one version # of a name is a prefix of another. (sort { $b cmp $a } keys %$name_to_url_ref)); my ($name_patt2, $offset) = &$make_link_pattern_ref (0, $name_patt1); $offset++; # The empty pattern matching at beginning is to work around a Perl # bug where the assignment to $main::name doesn't happen for the # first match for some bizarre reason. my $name_patt3 = "^(?{\$main::name=undef})|$name_patt2"; my $name_patt4; if (! $update_only) { #print STDERR "update_only false\n"; $name_patt4 = ($name_patt3 #. "|((? \$Show_Help, "version" => \$Show_Version, "update-lists" => \$Update_Lists, "link-names" => \$Link_Names, "update-name-links" => \$Update_Name_Links, )) { show_help(); exit 1; } show_version() if $Show_Version; show_help() if $Show_Help; exit 0 if ($Show_Version || $Show_Help); if ((! $Update_Lists) && (! $Link_Names) && (! $Update_Name_Links)) { print STDERR "$Short_Prog_Name: Must supply at least one of these options:\n"; print STDERR " --update-lists, --link-names, --update-name-links\n"; exit 1; } if ($Link_Names && $Update_Name_Links) { print STDERR "$Short_Prog_Name: Must supply at at most one of these options:\n"; print STDERR " --link-names, --update-name-links\n"; exit 1; } foreach my $File (@ARGV) { if ($Update_Lists) { update_lists_in_file ($File, $People_Hash_Ref); } if ($Link_Names || $Update_Name_Links) { link_names_in_file ($File, $People_Url_Maps, $Update_Name_Links); }} # # Old implementation from before when I discovered TextDB. # # my %Field_Cumulative_Separator; # $Field_Cumulative_Separator{'topic'} = ', '; # # sub fill_researcher_hash_old { # my $list_file = shift; # # print "list_file: [$list_file]\n"; # my $ListString = get_file_contents($list_file); # my @Records = split (/^\n/m, $ListString); # #print "Records: [@Records]\n"; # foreach my $Record (@Records) { # if ($Record !~ m/^(([-a-z0-9]+:.*\n)+)$/) { # die "malformed record: [$Record]"; } # chop $Record; # #print STDERR "Record: [$Record]\n"; # my @Fields = split (/\n/, $Record); # my %Field; # while (my $Field = shift (@Fields)) { # ($Field =~ m/^([-a-z0-9]+): *(.*)$/) || die "impossible"; # if ($Field{$1}) { # if ($Field_Cumulative_Separator{$1}) { # $Field{$1} .= $Field_Cumulative_Separator{$1} . $2; } # else { # die "duplicate field: [$1], record: [$Record]\n"; }} # else { # $Field{$1} = $2; }} # if (! $Field{'key'}) { # die "bad record: [$Record]\n"; } # if ($Researcher{$Field{'key'}}) { # die "duplicate records for key: [$Field{'key'}]\n"; } # $Researcher{$Field{'key'}} = \%Field; }} # Local variables: # mode: perl # end: