#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell
#========================================================================
#
# mimetypes
#
# DESCRIPTION
#   Script for extracting the mimetypes on the current host
#
# AUTHOR
#   Bryce Harrington <bryce@osdl.org>
#
# COPYRIGHT
#   Copyright (C) 2002 Bryce W. Harrington.  All Rights Reserved.
#
#   This module is free software; you can redistribute it and/or
#   modify it under the same terms as Perl itself.
#
#------------------------------------------------------------------------
#
# $Id: mimetypes,v 1.6 2002/06/06 07:15:02 bryce Exp $
#
# $Log: mimetypes,v $
# Revision 1.6  2002/06/06 07:15:02  bryce
# Adding webpages
#
# Revision 1.5  2002/06/05 20:43:11  bryce
# Testing build/install and versioning process
#
# Revision 1.4  2002/06/05 20:35:57  bryce
# Fixing up VERSION to automatically update itself with CVS's revision.
#
# Revision 1.3  2002/06/05 20:30:58  bryce
# Moving completed todo items from mimetypes to Changes.  All done.  :-)
#
# Revision 1.2  2002/06/05 19:21:57  bryce
# Adding mega optimization, eliminating a few last quirks, tinkered around
# with install/uninstall process, and writing most of the rest of the POD
# documentation.
#
# Revision 1.1  2002/06/04 19:40:25  bryce
# Initial checkin of mimetypes utility
#

use strict;
use Pod::Usage;
use Getopt::Long;
use File::Find;

use vars qw($VERSION);
$VERSION = sprintf("%d.%d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/);

# Config variables
our $opt_debug           = 0;
our $opt_help            = 0;
our $opt_man             = 0;
our $opt_version         = 0;
our $opt_skipKde         = 0;
our $opt_skipGnome       = 0;
our $opt_skipBlank       = 0;
our $opt_gnomePrefix     = '/usr/share/mime-info';
our $opt_kdePrefix       = '/usr/share';

# Global data structures
my %assoc;			# Table of ext's->mimetypes

#------------------------------------------------------------------------
# Commandline option processing
#------------------------------------------------------------------------

# Process options
Getopt::Long::Configure ("bundling");
GetOptions( "help|h"          , # print usage, then exit
            "debug|d=i"       , # display debug messages during execution
            "version|V"       , # display version, then exit
            "man"             , # display manual (POD)
	    "skipKde"         , # don't scan KDE registry
	    "skipGnome"       , # don't scan GNOME registry
	    "skipBlank"       , # don't display unassociated mimetypes
	    "gnomePrefix=s"   , # path for where GNOME mime info is located
	    "kdePrefix=s"       # path for where KDE mime info is located
            )
    || pod2usage(0);
GetOptions('debug');
pod2usage(-verbose => 0, -exit => 1)  if ($opt_help);
pod2usage(-verbose => 2, -exit => 1)  if ($opt_man);

if ($opt_version) {
    print "$VERSION\n";
    exit 0;
}

#####################################################################

sub trim {
    my ($buf) = @_;
    $buf =~ s/^\s+//g;
    $buf =~ s/\s+$//g;
    return $buf;
}

sub parseGnomeFile {
    my ($filename) = $File::Find::name;
    return unless -f && ($filename =~ /\.mime$/);
    # From GNOME's mime-info/gnome.mime file:
    #
    # Applications can provide more mime types by installing other
    # .mime files in the $share/mime-info directory.
    #
    # The format of this file is:
    #
    # mime-type
    #         ext[,prio]:   list of extensions for this mime-type
    #         regex[,prio]: a regular expression that matches the filename
    #
    # more than one ext: and regex: fields can be present.

    # Parsing GNOME's mimetype info is algorithmically simpler than KDE
    # since patterns and extensions are kept together and we don't need
    # to go through the filename association step to map them.

    open(FILE, "<$filename")
	|| die "Could not open $filename for reading GNOME mime types\n";

    my $mimetype = '';
    while (my $line = <FILE>) {
	chomp($line);
	print STDERR "$line\n" if $opt_debug>2;

	# Skip comments and blank lines
	next if ($line =~ /^\s*\#/ || $line =~ /^\s*$/);

	if ($line =~ /^\w/) {
	    # We have a new mimetype
	    $mimetype = trim($line);

	    # Strip out any trailing colons
	    $mimetype =~ s/:$//g;

	    # This ensures the mimetype gets printed even if
	    # no extensions get associated with it
	    unless ($opt_skipBlank) {
		$assoc{$mimetype} = $mimetype;
	    }

	} elsif ($line =~ /^\s+ext/) {
	    # Line contains a list of file extensions
	    
	    my @buf = split /\:/, $line;
	    shift @buf;
	    my $buffer = join ':', @buf;
	    foreach my $ext (split /\s+/, $buffer) {
		$assoc{trim($ext)} = $mimetype;
	    }

	} elsif ($line =~ /^\s+regex/) {
	    # Line is a regular expression - We'll just skip these
	}

    }    
    close(FILE);
}

sub parseKdeFile {
    return unless -f && ($File::Find::name =~ /\.desktop$/);
    my ($filename) = $File::Find::name;
    my $mimetype = '';
    my $pattern  = '';

    # KDE Mimetype info is stored within individual application's
    # resource files, as the parameters MimeType and Patterns.
    # Thus, we must extract these two lines for all applications 
    # within the KDE tree

    # Scan for MimeType= and Patterns= lines
    open (FILE, "<$filename") || die "Could not open $filename";

    while (my $line = <FILE>) {
	if ($line =~ /^MimeType=(.*)$/) {
	    print STDERR $line if $opt_debug>2;
	    $mimetype = trim($1);
	} elsif ($line =~ /^Patterns=(.*)$/) {
	    print STDERR $line if $opt_debug>2;
	    $pattern = $1;
	}
    }
    close(FILE);

    # This ensures the mimetype gets printed even if
    # no extensions get associated with it
    unless ($opt_skipBlank) {
	$assoc{$mimetype} = $mimetype;
    }

    # Strip out the *'s (unnecessary)
    $pattern =~ s/\*//g;

    my @patterns = split /[\;\s+]/, trim($pattern);
    foreach $pattern (@patterns) {
	# Strip any leading .'s
	$pattern =~ s/^\.//;
	# Add to registry unless it looks like a regexp
	if ($pattern =~ /^[\w\+\.\,\-\~\%]+$/) {
	    $assoc{trim($pattern)} = $mimetype;
	}
	# else - it has unusual punctuation or funky characters 
	# maybe it's a regexp?  If so, skip for now
    }
}

# Takes an input stream of mime info from an apache formatted file
sub parseApacheFile {
    my ($filename) = @_;

    open (FILE, "$filename") 
	|| die "Could not open Apache mimeinfo file $filename";
    while (my $line = <FILE>) {
	chomp($line);
	print STDERR "$line\n" if $opt_debug>2;
	next if $line =~ /^\s*\#/;

	my ($mimetype, @exts) = split /\s+/, $line;
	next unless $mimetype;
	$mimetype = trim($mimetype);

	# This ensures the mimetype gets printed even if
	# no extensions get associated with it
	unless ($opt_skipBlank) {
	    push @exts, $mimetype;
	}
	foreach my $ext (@exts) {
	    # Strip any leading .'s
	    $ext =~ s/^\.//;
	    # Add to registry
	    $assoc{trim($ext)} = $mimetype;
	}
    }
    close(FILE);

}

#######################################################################
# Main program

# Gather the GNOME mime info
if (-e $opt_gnomePrefix && ! $opt_skipGnome) {
    print STDERR "Gathering registered GNOME mimetypes\n";
    # Foreach file (*.mime) - recursively search
    find(\&parseGnomeFile, ($opt_gnomePrefix));
}

if (-e $opt_kdePrefix && ! $opt_skipKde) {
    print STDERR "Gathering registered KDE mimetypes\n";
    # Foreach file (*.desktop) - recursively search
    find(\&parseKdeFile, ($opt_kdePrefix));
}

# Parse any apache formatted files given by the user on commandline
foreach my $filename (@ARGV) {
    print STDERR "Loading mimetypes from $filename\n" if $opt_debug>0;
    parseApacheFile($filename);
}

my %exts;
# Invert the hash to get the normalized map of mimetype to extensionw
print STDERR "Inverting hash\n" if $opt_debug>0;
while (my ($ext, $mimetype) = each(%assoc)) {
    # Ignore anything that has unusual characters
    print STDERR "$ext, $mimetype\n" if $opt_debug>2;
    next unless $mimetype =~ /^[\/\w\-\+\.\~]+$/; # Only allow these
    push(@{$exts{trim($mimetype)}}, trim($ext));
}

# Finally, display all of the mimetype info, merged into apache style
print STDERR "Displaying results\n" if $opt_debug>0;
my $type = '';
foreach my $mimetype (sort keys %exts) {
    my ($newtype) = split /\//, $mimetype;
    if ($newtype ne $type) {
	print "\n# ".ucfirst($newtype)." values\n";
	$type = $newtype;
    }
    print "$mimetype ";
    print ' ' x (40-length($mimetype));
    foreach my $ext (sort @{$exts{$mimetype}}) {
	next if ($ext eq $mimetype || $ext eq '');
	print " $ext";
    }
    print "\n";
}

=pod

=head1 NAME

mimetypes - Script for extracting the mimetypes on the current host

=head1 SYNOPSIS

mimetypes [options] [file ...]

 Options:
   -h, --help                    print usage, then exit
       --man                     display manual (POD)
   -d, --debug                   display debug messages while executing
   -V, --version                 display version, then exit
       --skipKde                 don't scan KDE registry
       --skipGnome               don't scan GNOME registry
       --skipBlank               don't display unassociated mimetypes
       --gnomePrefix             path for location of GNOME mime info
                                 default:  /usr/share/mime-info
       --kdePrefix               path for location of KDE mime info
                                 default:  /usr/share

 Inputs:  
       None

 Outputs:  
       An Apache-formatted listing of mimetypes will be printed to
       stdout.

 Errors:
       Warnings about inaccessible directories is normal and can be 
       safely ignored.

=head1 DESCRIPTION

B<mimetypes> will scan the current machine and extract the registered
mimetypes for KDE and GNOME applications, merge the information and
return a sorted, formatted list of mimetypes and file extensions (if
any).

=head1 OPTIONS

=over 8

=item B<-h, --help>

Print a brief help message and exits.

=item B<--man>

Prints the manual page and exits.

=item B<-d, --debug=N>

Prints various debug messages to stdout during execution.  Specify the
debug level as a positive integer; the higher the number the more
output.  

=item B<-V, --version>

Displays the current version of the program and exits.

=item B<--skipKde>

Suppresses scanning of the KDE mimetype registry.  This causes mimetypes
to only display mimetype mappings for GNOME (and whatever files provided
on commandline).

=item B<--skipGnome>

Suppresses scanning of the GNOME mimetype registry.  This causes mimetypes
to only display mimetype mappings for KDE (and whatever files provided
on commandline).

=item B<--skipBlank>

Suppresses display of mimetypes that have no file extensions mapped to
them.  

=item B<--gnomePrefix=/usr/share/mime-info>

Path to where GNOME's mime information files (such as gnome.mime,
gnome-vfs.mime, etc.) are located.  By default, the directory
/usr/share/mime-info is used.


=item B<--kdePrefix>

Path to where KDE's resource files (*.desktop) can be found.  By default
the directory /usr/share is assumed.

=back

=head1 PREREQUISITES

This script requires the C<File::Find>, C<Pod::Usage> and
C<Getopt::Long> modules.

=head1 TOPIC CATEGORIES

Utilities

=head1 BUGS

Warnings about not having permission to cd into certain directories is
normal and can safely be ignored.

The GNOME and KDE mimetype registries include some entries with regular
expressions (e.g., [.|]gtkrc, [Mm]akefile, README.*, etc.)  For GNOME,
these regular expressions are explicitly identified and thereby ignored.
For KDE, they are not pointed out, so the mimetypes program attempts to
identify and exclude them.  However, some patterns, that include only
alphanumerics and/or the punctuation characters _.,+-~% will be allowed
through, because those characters are allowable in file extensions.

=head1 SEE ALSO

L<mime.types|mime.types>

L<mime.confs|mime.confs>

L<gnome-mime|gnome-mime>

=head1 COPYRIGHT

 mimetypes is Copyright (c) 2002, by Bryce W. Harrington. 
 All rights reserved.

 You may distribute this code under the terms of either the GNU General
 Public License or the Artistic License, as specified in the Perl README
 file.

=cut

=head1 AUTHOR

Bryce Harringtion <bryce@osdl.org> <bryce@neptune.net>

=head1 REVISION

Revision: $Revision: 1.6 $

=cut
