#! /usr/bin/python
################################################################################
#
# File:         bibmakeindex.py
# RCS:          $Header: /u/testbed/CVSROOT/dldev/src/interbib2007/Scripts/bibmakeindex,v 1.1 2007/07/28 02:53:46 paepcke Exp $
# Description:  Interactive script to index a set of files
# Author:       Andreas Paepcke, Stanford University
# Created:      Thu Jul  4 08:04:01 1996
# Modified:     Fri Jul 27 19:51:00 2007 (Andreas Paepcke) paepcke@SNUGGLES
# Language:     Python
# Package:      N/A
# Status:       Experimental (Do Not Distribute)
#
# (c) Copyright 1996, Stanford University, all rights reserved.
#
################################################################################
#
# Revisions:
#
# Wed Mar 19 07:52:27 1997 (Andreas Paepcke) paepcke@Haddock.Stanford.EDU
#  Added expansion of directory name, so '.' and env vars will work.
################################################################################

# NOTE: For some reason this runs incorrectly on Decstations such as Phoebe.
#       it creates empty .idx.dat files, instead of proper .idx files.

# This is a script which indexes InterBib databases to make them searchable
# with InterBib's search engine. You can pass a list of files, or just specify
# a directory.
#
# Options:
#
#  * -v  run verbose
#  * -d  the following path is a directory. Indexer will find all files
#        with no extension and try to index them
#  * -c  conservative: Don't index a file for which a .idx file already exists
#        in the current directory
#
# Examples:
#
#   bibmakeindex -vd .             ;; index current dir
#   bibmakeindex -v foo bar baz    ;; index files foo, bar, baz in current dir
#   bibmakeindex -vdc foo bar baz  ;; index foo, bar and baz unless
#                                  ;; .idx files exit
# The index generator is part of Arturo Crespo's search engine.

import bibsearch
import os
import sys
import getopt


usage = "usage: bibmakeindex [-vc] -d <directory>\n\
   or: bibmakeindex [-vc] <InterBib-file-Name1> <InterBib-file-Name2>...\n\
                    (With '-c' dbs with index in current directory are ignored)"
verbose = 0
doDirectory = 0

class Indexer:
  verbose = 0
  doDirectory = 0

  def __init__(self):
    self.verbose = 0
    self.doDirectory = 0
    self.conservative = 0
    self.indexEngine = bibsearch.bibSearch()

  def maybeInterBibFileName(self, fileName):
    fileName = os.path.expandvars(os.path.expanduser(fileName))
    if os.path.splitext(fileName)[1] == '':
      return(1)
    else:
      return(0)
  
  def inputErrorCheck(self):
    try:
      optlist, args = getopt.getopt(sys.argv[1:], 'vdc')
    except getopt.error, info:
      print "Illegal option: " + info
      print usage
      return(0)
    for option in optlist:
      if option[0] == '-v':
	self.verbose = 1
      if option[0] == '-d':
	self.doDirectory = 1
      if option[0] == '-c':
	self.conservative = 1
	
    if len(args) < 1:
      print usage
      return(0)
  
    if self.doDirectory:
      if not os.path.isdir(args[0]):
	print "'" + args[0] + "' " + 'is not a readable directory.'
	print usage
	return(0)
      else:
        args[0] = os.path.expandvars(os.path.expanduser(args[0]))
    else:
      for fileName in args:
	try:
	  os.stat(fileName)
	  if not self.maybeInterBibFileName(fileName):
	    print "Only InterBib databases can be indexed. They don't have file name extensions."
	    print usage
	    return(0)
	except os.error:
	  print("Cannot access file '" + fileName + "'")
	  return(0)
    return(args)
    
  # Index the files in the list of file names. We assume they are all
  # legal InterBib db files:

  def indexFileList(self, fileNames):
    for fileName in fileNames:
      if self.conservative and os.path.isfile(fileName + '.idx'):
	if self.verbose:
	  print 'Ignoring ' + fileName + ": index file already exists"
	continue
      if self.verbose:
	print 'Indexing ' + fileName + '...'
      self.indexEngine.verbosity = self.verbose
      self.indexEngine.createIndex(fileName)
      self.indexEngine.closeIndex()
      if self.verbose:
	print '   done with ' + fileName + '.'

  def indexDirectory(self, dir):
    if dir == '.':
      dir = os.getcwd()
    potentialFileNames = os.listdir(dir)
    fileNames = []
    for fileName in potentialFileNames:
      fileName = os.path.join(dir, fileName)
      if self.maybeInterBibFileName(fileName):
	fileNames.append(fileName)
      else:
        if self.verbose:
          print "Ignoring '" + fileName + "': Only InterBib databases can be indexed. They don't have extensions."
    if fileNames:
      self.indexFileList(fileNames)
    
def main():
  indexer = Indexer()
  # Get a list of files or a list of one element w/ a dir name, & error-check
  # the command line:
  filesOrDirectory = indexer.inputErrorCheck()
  if not filesOrDirectory:
    return(0)
  if indexer.doDirectory:
    indexer.indexDirectory(filesOrDirectory[0])
  else:
    indexer.indexFileList(filesOrDirectory)

main()    
