#!/usr/local/bin/perl # # bibview : view a BibTeX database and interactively search through it # Copyright 1992, Dana Jacobsen (jacobsd@cs.orst.edu) # #version = "0.1.0"; # 25 Aug 92 jacobsd Wrote original version #version = "0.2.0"; # 25 Aug 92 jacobsd added options #version = "0.2.1"; # 26 Aug 92 jacobsd added options $version = "0.2.2"; # 26 Aug 92 jacobsd fast status, command changes # # todo: # things in help that are not yet implemented # understand multiple bibliographies # allow "show 1-10 15" # support for AND and OR # # All bug-fixes, suggestions, flames, and compliments gladly accepted. # $ignorewords = "of and the in a on to for from an with by at as its"; $displaymode = 'brief'; $autoshow = 0; $sortoutput = 0; while (@ARGV) { $_ = shift @ARGV; /^--$/ && do { push(@files, @ARGV); undef @ARGV; next; }; /^-deb/ && do { $debugging = 1; next; }; /^-wor/ && do { $worddebug = 1; next; }; push (@files, $_); } # globals: # # $records{$citekey} : the full entry, verbatim # $authors{$citekey} : the author field # $titles{$citekey} : the title field # $inauthors{$name} : a $; seperated list of citekeys # $intitles{$word} : a $; seperated list of citekeys # # @result : a numbered array with citekeys of last result print "bibview $version by Dana Jacobsen, 1992\n"; foreach $infile (@files) { &readbibfile($infile); } $timetoquit = 0; until ($timetoquit) { print "> "; chop($command = ); $_ = $command; if (/^load /i) { ($dummy, $infile) = split; &readbibfile($infile); } elsif (/^write /i) { } elsif (/^\w+\s*=/i) { &handlesearch($command); print " -- ", ($#result == 0) ? "1 entry" : ($#result == -1 ? "no" : $#result+1, " entries"), " found.\n"; $autoshow && &printrecs(0, @result); } elsif (s/^and\s+(\w+\s*=)/$1/i) { local(%dummyaar); local(@dresult) = @result; &handlesearch($_); grep($dummyaar{$_}++, @dresult); @result = grep($dummyaar{$_}, @result); print " -- ", ($#result == 0) ? "1 entry" : ($#result == -1 ? "no" : $#result+1, " entries"), " found.\n"; $autoshow && &printrecs(0, @result); } elsif (s/^or\s+(\w+\s*=)/$1/i) { local(@iresult, %dummyaar); local(@dresult) = @result; &handlesearch($_); grep($dummyaar{$_}++, @dresult); @iresult = grep(!$dummyaar{$_}, @result); @result = (@dresult, @iresult); print " -- ", ($#result == 0) ? "1 entry" : ($#result == -1 ? "no" : $#result+1, " entries"), " found.\n"; $autoshow && &printrecs(0, @result); } elsif (/^set\s+display\s+brief/i) { $displaymode = 'brief'; print "display mode set to $displaymode.\n"; } elsif (/^set\s+display\s+(full|bibtex)/i) { $displaymode = 'full'; print "display mode set to $displaymode.\n"; } elsif (/^set\s+autoshow\s+true/i) { $autoshow = 1; print "autoshow set to true.\n"; } elsif (/^set\s+autoshow\s+false/i) { $autoshow = 0; print "autoshow set to false.\n"; } elsif (/^set\s+sort\s+true/i) { $sortoutput = 1; print "output sorting set to true.\n"; } elsif (/^set\s+sort\s+false/i) { $sortoutput = 0; print "output sorting set to false.\n"; } elsif (/^sort/i) { @result = &titlesort(@result); } elsif (/^status/i) { print $#result+1, " records in last search result.\n"; if ($] > 4.019) { print scalar(keys(%records)), " records loaded.\n"; print scalar(keys(%inauthors)), " unique author last names.\n"; print scalar(keys(%intitles)), " unique words in titles.\n"; } else { $count = 0; $count++ while each %records; print $count, " records loaded.\n"; $count = 0; $count++ while each %inauthors; print $count, " unique author last names.\n"; $count = 0; $count++ while each %intitles; print $count, " unique words in titles.\n"; } print "display mode is $displaymode.\n"; print "autoshow is ", $autoshow ? "true" : "false", ".\n"; print "automatic output sorting is ", $sortoutput ? "true" : "false", ".\n"; } elsif (/^select\s+all/i) { @result = sort keys(%records); } elsif (/^display\s+authors/i) { local(@dresult) = sort keys(%inauthors); print join("\n", @dresult), "\n"; } elsif (/^display\s+titles/i) { local(@dresult) = sort keys(%intitles); print join("\n", @dresult), "\n"; } elsif (/^find\s+duplicates/i) { &finddups(); } elsif (/^debug\s+on/i) { $debugging = 1; } elsif (/^debug\s+off/i) { $debugging = 0; } elsif (/^(detail|brief|show)/i) { $oldmode = $displaymode; if (s/^detail\s*//) { $displaymode = 'full'; } elsif (s/^brief\s*//) { $displaymode = 'brief'; } elsif (s/^show\s*//) { # nothing } /^$/ && ($_ = "all"); foreach $num (split) { $debugging && print "$num\n"; if ($num eq all) { &printrecs(0, @result); } else { &printrecs($num-1, $result[$num-1]); } } $displaymode = $oldmode; } elsif (/^quit/i) { $timetoquit = 1; } elsif (/^(\?|help)/i) { print "load -- load \n"; print "a= -- find author exactly matching \n"; print "a=/ -- find author matching the regex \n"; print "t= -- find title containing the word \n"; print "t=/ -- find title containing the regex \n"; print " = / -- find field containing the regex \n"; print "and -- logical and the results with \n"; print "or -- logical or the results with \n"; print "set display brief -- use one line per record when displaying\n"; print "set display full -- use BibTeX record when displaying\n"; print "set autoshow true -- display results after search\n"; print "set autoshow false -- don't display results after search\n"; print "select all -- select all records loaded\n"; print "show [all | ..] -- show th result(s) or all\n"; print "brief [all | ..] -- display result(s) in brief format\n"; print "detail [all | ..] -- display result in BibTeX format\n"; print "status -- show status of bibview\n"; print "quit -- quit bibview\n"; print "! -- execute via the shell\n"; print "-----------------------\n"; print "not implemented yet:\n"; print "write -- write search results to \n"; print "load all in -- load all files in biblio directory\n"; print "unload -- remove entries from \n"; print "unload all in -- remove entries from biblios in \n"; print "display mode lib -- display records in tagged field format\n"; print "delete [all | ..] -- delete results from memory\n"; print "find duplicates -- selects duplicate entries\n"; } elsif (/^\s*$/) { # nothing } elsif (s/^!//) { system($_); } else { print "Unrecognized command. Use ? for help.\n"; } } ######################################## ######################################## ######################################## sub readbibfile { local ($file) = @_; local ($num) = 0; local ($dups) = 0; local ($oldpipe) = $|; # this little gem is from Larry Wall -- expand ~user. $file =~ s#^(~([a-z0-9]+))(/.*)?$#((getpwnam($2))[7]||$1).$3#e; # this is mine -- handle ~/file $file =~ s#^(~)(/.*)?$#((getpwnam(getlogin))[7]||$1).$2#e; open (IN, $file) || open (IN, "$file.bib") || ((warn "Can't open $file: $!\n"), return 0); $| = 1; print "loading $file.."; while (! eof(IN)) { $key = &bibtexread(*IN); if ((!$records{$key}) && $key) { $num++; ($num % 50) || print "."; $records{$key} = $entry{FULL}; $authors{$key} = $entry{author}; if ($entry{booktitle}) { if ($entry{title}) { $titles{$key} = $entry{title} . ' in ' . $entry{booktitle}; } else { $titles{$key} = $entry{booktitle}; } } else { $titles{$key} = $entry{title}; } foreach $auth (split(/ and /, $entry{author})) { $name = &parsename($auth); $name =~ tr/A-Za-z0-9\-//cd; # delete non-alphanumerics $name =~ tr/A-Z/a-z/; # everything lowercase $inauthors{$name} .= $; . $key; } foreach $word (split(/\s+/, $titles{$key})) { $word =~ tr/A-Za-z0-9\-//cd; $word =~ tr/A-Z/a-z/; if ($word && (index($ignorewords, $word) == -1)) { if ($worddebug) { print "$word\n"; } $intitles{$word} .= $; . $key; } } } else { $key && $debugging && print "Duplicate cite key: not adding $key\n"; $key && $dups++; } } $| = $oldpipe; print "$num entries."; print $dups ? " $dups duplicate cite keys.\n" : "\n"; } ######################################## sub handlesearch { local($_) = @_; local($afield, $lfield, $lvalue, $lval, $cite, $val); local(%resaar); local(%atolar) = ('a', 'author', 't', 'title', ); @result = (); if ( ($afield, $lvalue) = /^(\w)\s*=\s*(.*)$/ ) { $lfield = $atolar{$afield}; if (!$lfield) { print "No abbreviation for $afield. Spell out the field name.\n"; return; } } else { ($lfield, $lvalue) = /^(\w+)\s*=\s*(.*)$/; $lfield =~ tr/A-Z/a-z/; } $debugging && print "lfield: $lfield, lvalue: $lvalue\n"; if (substr($lvalue, 0, 1) eq '/') { # regex search substr($lvalue, 0, 1) = ''; print "$lfield is /$lvalue/"; if ($lfield eq author) { while (($cite, $val) = each %authors) { ($val =~ /$lvalue/i) && (push(@result, $cite)); } } elsif ($lfield eq title) { while (($cite, $val) = each %titles) { ($val =~ /$lvalue/i) && (push(@result, $cite)); } } else { # long search. # (this implementation could result in false matches) while (($cite, $val) = each %records) { ($val =~ /$lfield\s*=.*$lvalue/i) && (push(@result, $cite)); } } } else { # exact search print "$lfield is $lvalue"; $lvalue =~ tr/A-Za-z0-9\-//cd; $lvalue =~ tr/A-Z/a-z/; if ($lfield =~ /^author$/) { @result = split(/$;/, $inauthors{$lvalue}); } elsif ($lfield =~ /^title$/) { @result = split(/$;/, $intitles{$lvalue}); } else { print " -- Exact matching on $lfield not available.\n"; return; } shift @result; } # weed out any duplicates that might have cropped up grep($resaar{$_}++, @result); @result = grep($resaar{$_}-- == 1, @result); $sortoutput && (@result = &titlesort(@result)); } ######################################## sub titlesort { return(sort { $titles{$a} cmp $titles{$b}; } @_); } ######################################## sub printrecs { local($cite, $auth, $names); local($num) = shift(@_); foreach $cite (@_) { next if (!$cite); $num++; $debugging && print "cite: $cite\n"; if ($displaymode eq full) { print $records{$cite}, "\n"; } else { $names = ''; foreach $auth (split(/ and /, $authors{$cite})) { $names .= ', ' . &parsename($auth); } $names =~ s/^, //; $debugging && print ":$num:$names:$titles{$cite}:\n"; printf "%3d %-20s %-50s\n", $num, substr($names, 0, 20), substr($titles{$cite}, 0, 50); } } } ######################################## # sets @result to a list of citekeys of duplicates sub finddups { local($curtitle); local($cite, $ocite, $name); local($type, $otype); local(@auths, @restauths); local(%resaar); @result = (); foreach $name (keys %inauthors) { $debugging && print "$name:"; @auths = split(/$;/, $inauthors{$name}); shift @auths; @restauths = @auths; foreach $cite (@auths) { $curtitle = $titles{$cite}; ($type) = ($records{$cite} =~ /^\s*@\s*(\w+)/); shift(@restauths); foreach $ocite (@restauths) { next if $cite eq $ocite; ($otype) = ($records{$ocite} =~ /^\s*@\s*(\w+)/); next if $type ne $otype; next if $curtitle ne $titles{$ocite}; # author in common, same type, and same title push(@result, $cite, $ocite); } } } $debugging && print "\n"; grep($resaar{$_}++, @result); @result = grep($resaar{$_}-- == 1, @result); print "", ($#result == 0) ? "1 duplicate" : ($#result == -1 ? "no" : $#result+1, " duplicates"), " found.\n"; } ######################################## # # parsename takes a name in BibTeX format, and parses it into # parts. It returns the last name. The following globals are # set: # $First, $von, $Last, $Jr sub parsename { local($name) = @_; local($doinglast) = 0; local($part) = 0; local($p1, $p2, $p3); local($sing, $dummy); ($dummy, $sing, $dummy) = $name =~ /(^|\s){(.*)}(\s|$)/; $name =~ s/(^|\s){(.*)}(\s|$)/$1ASingleNameString$3/; $First = $von = $Last = $Jr = ''; ($p1, $p2, $p3) = split(/,/, $name, 3); if ($p3) { $First = $p3; $Jr = $p2; if ($p1 =~ s/^\s*{(.*)}\s*$/$1/) { $Last = $p1; } else { while ($p1 =~ /^[a-z]/) { ($part) = $p1 =~ /^(\S+)/; $p1 =~ s/^(\S+)\s*//; $von .= ' ' . $part; } $Last = $p1; } } elsif ($p2) { $First = $p2; if ($p1 =~ s/^\s*{(.*)}\s*$/$1/) { $Last = $p1; } else { while ($p1 =~ /^[a-z]/) { ($part) = $p1 =~ /^(\S+)/; $p1 =~ s/^(\S+)\s*//; $von .= ' ' . $part; } $Last = $p1; } } else { if ($p1 =~ s/^\s*{(.*)}\s*$/$1/) { $Last = $p1; } else { while ($p1 =~ /^[A-Z]/) { ($part) = $p1 =~ /^(\S+)/; $p1 =~ s/^(\S+)\s*//; $First .= ' ' . $part; } while ($p1 =~ /^[a-z]/) { ($part) = $p1 =~ /^(\S+)/; $p1 =~ s/^(\S+)\s*//; $von .= ' ' . $part; } if ($p1) { $Last = $p1; } else { ($Last) = $First =~ /\s+(\S+)\s*$/; $First =~ s/\s+\S+\s*$//; } } } $Last =~ s/ASingleNameString/$sing/; $Last =~ s/^\s+//; $von =~ s/^\s+//; $First =~ s/^\s+//; $Jr =~ s/^\s+//; # handle "et al" or "others" if ( (!$Last) && ($von =~ /^(others|et\.?\s*al)\.?$/i) ) { $Last = "others"; $von = ''; } if ($debugging) { $name =~ s/ASingleNameString/$sing/; $name =~ s/^\s+//; printf "%20s: %-15s %-10s %-20s %-10s\n", $name, $First, $von, $Last, $Jr; } return ($Last); } ######################################## # # split out the field parsing into a seperate routine. # so we read in verbatim, then call &bibtexexplode to seperate # into %entry. # sub bibtexread { local(*FILE) = @_; local($braces) = 1; local($ent, $delim); local($field, $value, @values); %entry = (); while () { last if /^\s*@/; } if (!/,/) { # preamble is split on multiple lines $ent = $_; while () { $ent .= $_; last if /,/; } $_ = $ent; } return 0 if eof(FILE); return 0 if /^\s*@\s*string/i; return 0 if /^\s*@\s*preamble/i; $ent = $_; ( ($entry{type}, $delim, $entry{citekey}) = /^\s*@\s*(\w+)\s*([{(])\s*(\S+)\s*,\s*$/) || do { print "Error getting line: $_\n"; return 0; }; $debugging && print $entry{type}, $delim, $entry{citekey}, "\n"; if ($delim eq '{') { while () { $ent .= $_; $braces += s/{/{/g; $braces -= s/}/}/g; last if ($braces <= 0); } $entry{FULL} = $ent; $ent =~ s/}\s*$//; } else { while () { $ent .= $_; last if $ent =~ s/[)]\s*$//; } $entry{FULL} = $ent . ')'; } $ent =~ s/\s+/ /g; @values = split(/,\s*(\w+)\s*=\s*/, $ent); $debugging && print join("//", @values), "\n"; shift(@values); # zap the beginning info while (@values) { $field = shift(@values); $field =~ tr/A-Z/a-z/; $value = shift(@values); $value =~ s/^\s*{(.*)}\s*$/$1/; $value =~ s/^\s*"(.*)"\s*$/$1/; $entry{$field} = $value; } return($entry{citekey}); }