#! /usr/bin/python
################################################################################
#
# File:         bibload.py
# RCS:          $Header: /u/testbed/CVSROOT/dldev/src/interbib2007/Scripts/bibload,v 1.1 2007/07/28 02:53:46 paepcke Exp $
# Description:  Pre-load bibliography dbs into one InterBib database
# Author:       Andreas Paepcke, Stanford University
# Created:      Mon Oct 30 00:16:44 1995
# Modified:     Fri Jul 27 19:49:46 2007 (Andreas Paepcke) paepcke@SNUGGLES
# Language:     Python
# Package:      N/A
# Status:       Experimental (Do Not Distribute)
#
# (c) Copyright 1995, Stanford University, all rights reserved.
#
################################################################################
#
# Revisions:
#
# Thu Dec 28 23:49:50 1995 (Andreas Paepcke) paepcke@AndreasHomePC
#  Correctly passed duplicate recovery into to bibdb.
# Wed Dec  6 22:53:43 1995 (Andreas Paepcke) paepcke@Stingray
#  Added acceptance of refer files.
################################################################################

# Usage: bibload [-q-d-p<duplicate-policy>] <outFileName> <bibFile1> <bibFile2>...

# Takes all the bib files (may be mixed InterBib dbs, BibTeX or Refer files).
# puts them all into one InterBib database in outFileName. The InterBib out
# db may already exist. In that case, the files are imported
#
# The optional -p option takes one of three args:
#       - error     (duplicate keys are errors)
#       - resolve   (system invents new keys)
#       - overwrite (duplicates are overwritten)
#       - discard   (duplicates are discarded, i.e. the oldest entry is kept)
#
# Using -d causes script to find all the .bib and .ref files in the current directory.
# It deletes all corresponding InterBib dbs in the current directory and rebuilds them.


import sys
import os
import getopt
import bibdb
import bibconvert

resolutions = {'error': bibdb.duplicatesCauseErrors,
	       'resolve': bibdb.autoResolveRecordConflicts,
	       'overwrite': bibdb.overwriteDuplicates,
	       'discard': bibdb.discardDuplicates}

usage = "Usage: bibload [-q\n\
                -v\n\
                -d <directory>\n\
                -p error |\n\
                   resolve |\n\
                   overwrite |\n\
                   discard]\n\
                <outputFile>\n\
		<bibFile1> <bibFile2>..."

class bibLoader:

  def __init__(self):
    self.runQuiet = 0
    # Remember the stdout which the final result is to go to.
    # Redirect stderr to stdout during the duration of processing:
    self.savedStdout = sys.stdout
    sys.stdout = sys.stderr
    self.bibFiles = []
    self.duplicatePolicy = None
    self.db = None
    self.doDirectory = 0
    
  def inputErrorCheck(self):

    optlist, args = getopt.getopt(sys.argv[1:], 'd:vqp:')
    for option in optlist:
      if option[0] == '-d':
	self.doDirectory = 1
      if option[0] == '-v':
	self.runQuiet = 0
      if option[0] == '-q':
	self.runQuiet = 1
      if option[0] == '-p':
        if not resolutions.has_key(option[1]):
          print "Legal duplicate key policies are: 'error', 'resolve', 'overwrite' and 'discard'"
          return(0)
	else:
          self.duplicatePolicy = resolutions[option[1]]
      else:
        self.duplicatePolicy = bibdb.duplicatesCauseErrors

    if (len(args) < 2) and not self.doDirectory:
      print usage
      return(0)

    if self.doDirectory:
      dir = os.path.expanduser(os.path.expandvars(option[1]))
      if not os.path.isdir(dir):
        print usage
        return(0)
      self.prepareDirectoryWholesale(dir)
      if self.filesToDo:
	return(1)
      else:
	return(0)

    self.outFile = os.path.expanduser(os.path.expandvars(args[0]))
    pathBasePair = os.path.split(self.outFile)
    # Check for non-existent directory:
    if pathBasePair[0] <> '':
      # Non-empty path before the out file name:
      if not os.path.isdir(pathBasePair[0]):
        if not self.runQuiet:
	  print "File '" + pathBasePair[0] + "' not a directory."
	return(0)
    # Make sure the out file has no extension:
    if os.path.splitext(os.path.split(pathBasePair[1]))[1]:
      if not os.path.isdir(pathBasePair[0]):
        if not self.runQuiet:
	  print "Output file must not have an extension."
	return(0)

    for bibFile in args[1:]:

      # Make sure all env vars and ~ are resolved:
      bibFile = os.path.expanduser(os.path.expandvars(bibFile))

      if not os.path.isfile(bibFile):
        if not self.runQuiet:
	  print "Bibliography file '" + bibFile + "' not found."
	return(0)

      # Check whether bib file is supported:
      bibsys = checkBibFile(bibFile)
      if not bibsys:
	if not self.runQuiet:
	  print "Only BibTex, Refer and InterBib supported: '.bib', '.ref' or no extension, respectively"
	return(0)

      self.bibFiles.append(bibFile)
      
    return(1)

  def prepareDirectoryWholesale(self, dir):
    self.filesToDo = []
    for file in os.listdir(dir):
      if (os.path.splitext(file)[1] == '.bib') or (os.path.splitext(file)[1] == '.ref'):
	self.filesToDo.append(os.path.join(dir, file))

  def bibliographyImport(self):

    self.db = bibdb.BibDB(dbStore = self.outFile)

    for bibFile in self.bibFiles:

      # Its input char set is LaTex by default. Set output char set
      # to Latin1 which is appropriate for X-Windows:
      if isReferFileName(bibFile) or isInterBibFileName(bibFile):
        self.db.inputCharSet = bibconvert.FrameCharSet
      else:
	self.db.inputCharSet = bibconvert.LatexCharSet

      self.db.outputCharSet = bibconvert.Latin1CharSet

      bibsys = checkBibFile(bibFile)
      self.db.importBibliography(sourceFile = bibFile,
				 bibsystem = bibsys,
                                 duplicatePolicy = self.duplicatePolicy,
  			         verbosity= (not self.runQuiet)) 

    self.db.close()
    return(1)

  def importDirectoryWholesale(self):
    for file in self.filesToDo:
      self.bibFiles = [file]
      self.outFile = os.path.splitext(file)[0]
      try:
	os.system('rm ' + self.outFile)
      except:
	pass
      if not self.runQuiet:
	print "Importing file '" + file + "'..."
      self.bibliographyImport()
  

def isBibtexFileName(fileName):
  return(os.path.splitext(os.path.basename(fileName))[1] == '.bib')

def isReferFileName(fileName):
  return(os.path.splitext(os.path.basename(fileName))[1] == '.ref')

def isInterBibFileName(fileName):
  return(os.path.splitext(os.path.basename(fileName))[1] == '')

# Return a string with the bibliography format of the file, as derived
# from the file name extension: If no extension, assume file is an InterBib
# db. If it's a '.bib' extension, assume BibTeX. Else, return 0:

def checkBibFile(bibFile):
  components = os.path.splitext(bibFile)
  if components[1] == '.bib':
    return('bibtex')
  elif components[1] == '.ref':
    return('refer')
  # an InterBib database?:
  elif components[1] == '':
    return('interbib')
  else:
    return(0)

def main():

  myBibLoader = bibLoader()
  if not myBibLoader.inputErrorCheck():
    return(0)
  if myBibLoader.doDirectory:
    myBibLoader.importDirectoryWholesale()
  else:
    myBibLoader.bibliographyImport()
  return(1)
  
main()
