#! /usr/bin/env python
#===================================================
# OOo2sDBK : OpenOffice-Writer to simplified Docbook
#===================================================
#
# :author: Eric Bellot
# :email: ebellot@netcourrier.com
# :date: 2002-12-02 20:48:29
# :version: 0.4.2
# :Copyright: (C) 2002 Eric Bellot
#
# This script is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# See ``COPYING`` for more information

import zipfile
import os, os.path, sys
from string import join, upper, split, find
import codecs
from xml.dom import minidom
import time, base64
import re
import xml.sax


# ----------------------
# Command line arguments
# ----------------------

def execArgs():
	import options
	options.GetOptions(("d|dbkfile=s"),("h|?|help"),\
	("f|flatxml"),("c|commandxsl=s"))
	args = sys.argv
	if len(args) <= 1:
		printHelp()
		sys.exit()
	argOOo = args[0]
	print argOOo
	argDbk = options.myOptions.d
	if argDbk == None:
		argDbk = 0
	argFlat  = options.myOptions.f
	if argFlat == None:
		argFlat = 1
	else:
		argFlat = 0
	argCmd  = options.myOptions.c
	if argCmd == None:
		argCmd = 0
	argHelp = options.myOptions.h
	if argHelp == None:
		argHelp = 0
	else:
		printHelp()
	convert(argOOo, command=argCmd, docbook=argDbk, deltemp=argFlat)
	
# -----------------
# Commons functions
# -----------------

def modulePath():
	moduleFullname =  os.path.abspath(execArgs.func_code.co_filename)
	modulePath = os.path.split(moduleFullname)[0]
	return modulePath

def fileExist(file):
	if file != "":
		fExist = os.path.isfile(file)
		return fExist
	else:
		print "Bad filename :", file
		sys.exit()

def writeFile(file,strContent):
	b = open(file,"w")
	b.write(strContent)
	b.close()

# Current system identification
def verifSys():
	global currentSys
	if sys.platform in ['win32', 'dos', 'ms-dos']:
		currentSys = "windows"
	elif sys.platform.lower().find('linux') > -1:
		currentSys = "linux"
	else:
		raise Error('Only Win32 and Linux are supported.')
	
# Syntax path verification
def verifPath(path):
	global currentSys
	if currentSys == "windows":
		modPathWin=re.compile(r"^(([a-zA-Z]:\\)?|(\.\.\\)*)([^\?:/\*\"<>\|]+[^\s\?:/\*\"<>\|]\\)*[^\?:/\*\"<>\|]+(\.[a-zA-Z0-9]+)?$")
		verifPath = modPathWin.match(path)
	if currentSys == "linux":
		modPathLinux=re.compile(r"^(~|(\.\./)*)?([^\\\?:\*\"<>\|]+[^\\\s\?:\*\"<>\|]/)*[^\\\?:\*\"<>\|]+(\.[a-zA-Z0-9]+)*$")
		verifPath = modPathLinux.match(path)
	if verifPath == None:
		print "Bad path :\n", path
		sys.exit()
	else:
		return verifPath.group()

# Create a directory
def createDirectory(path):
	drive = ""
	if currentSys == "windows" and os.path.isabs(path):
		dualWin = os.path.splitdrive(path)
		drive = dualWin[0]
		path = dualWin[1][1:]
	listDir = split(path, os.sep)
	i = 1
	while i <= len(listDir):
		testPath = join(listDir[:i], os.sep)
		if drive != "":
			testPath = join([drive, testPath], os.sep)
		if os.path.isdir(testPath) == 0:
			os.mkdir(testPath)
		i = i + 1
	
# --------
# SETTINGS 
# --------

# Configuration file settings
def configSettings():
	global contentXML, metaXML, stylesXML,\
	sDocbookXSL, configXML, configElts,\
	imgRelDir, imgRootName 
	# Openoffice XML Files (don't change)
	contentXML = "content.xml"
	metaXML = "meta.xml"
	stylesXML = "styles.xml"
	# Configuration file
	configXML = os.path.join(modulePath(), "config.xml")
  	configParse = minidom.parse(configXML)
	configDocElt = configParse.documentElement
	eltsParse = configDocElt.childNodes
	configElts = []
	for node in eltsParse:
		if node.nodeType == node.ELEMENT_NODE:
			lenAtt = node.attributes.length
			dictAtt = {}
			i = 0
			while i < lenAtt:
				att = node.attributes.item(i)
				dictAtt[att.name] = att.value
				i = i + 1
			tupleElt = (node.nodeName, dictAtt)
			configElts.append(tupleElt)
	# Default XSLT stylesheets
	if currentSys == "linux":
		sDb = configValue("xslt-stylesheet","stylesheetPath",name="o2d4linux")
	else:
		sDb = configValue("xslt-stylesheet","stylesheetPath",name="o2d4windows")
	sDb = verifPath(sDb)
	if sDb == None:
		msg = "Bad filename for 'xslt-stylesheet' in 'config.xml'"
		print msg
	sDocbookXSL = os.path.join(modulePath(), sDb)
	# Images relative directory
	imgRelDir = configValue("images","imagesRelativeDirectory")
	verifPathIRD = re.match(r"^[a-zA-Z0-9]+$", imgRelDir)
	if verifPathIRD == None:
		msg = "Only one depth relative directory (no '" + os.sep + "')"\
		+ " and only alphanum chars for 'imagesRelativeDirectory' in 'config.xml'\n"\
		+ "Actual name is : '" + imgRelDir + "'"
		print msg
		sys.exit()
	# Images root name
	imgRootName = configValue("images","imageNameRoot")
	verifPathIR = re.match(r"^[a-zA-Z0-9]+$", imgRootName)
	if verifPathIR == None:
		print "Only alphanum chars for 'imageNameRoot' in 'config.xml'"
		print "Actual name is :", imgRootName
		sys.exit()

def configValue(element, attribute, name=""):
	global configElts
	value = ""
	i = len(configElts) - 1
	while i >= 0 :
		elt = configElts[i]
		if name=="":
			if elt[0] == element:
				value = elt[1][attribute]
		else:
			if elt[0] == element and elt[1]['name'] == name:
				value = elt[1][attribute]
		i = i - 1
	if value != "":
		return value
	else:
		msgName = ""
		if name != "": msgName = ", name '" + name + "'"
		msg = "Error in 'config.xml' : element '" +\
		element + "', attribute '" + attribute + "'" + msgName + "\nConversion failed."
		sys.exit()

# User settings
def userSettings(ooofile, docbook, command, imagesrew, deltemp, dtd,\
xslParams):
	global docOOoSXW, sDocbookXML, globalXML,\
	imgRelDir, rewriteImg, XSLCmdTemplate, dtdPublic, dtdSystem,\
	XSLParams
	# OpenOffice Filename
	ooofile = verifPath(ooofile)
	if fileExist(ooofile) == 0:
		errorMsg = "\n>>  ERROR : Incorrect OpenOffice file : \n>>  " + \
		ooofile + "\n"
		print errorMsg
		sys.exit()
	else:
		docOOoSXW = ooofile
	# Docbook Filename
	if docbook != 0:
		docbook = verifPath(docbook)
		path = os.path.split(docbook)[0]
		sDocbookXML = docbook
	else:
		OOoSplit = os.path.split(docOOoSXW)
		path = OOoSplit[0]
		rootName = os.path.splitext(OOoSplit[1])[0]
		sDocbookXML = os.path.join(path,rootName) + ".xml"
	# Destination directory
	if path != "" and os.path.isdir(path) == 0:
		createDirectory(path)
	# Temporary files names
	if deltemp == 0:
		globalXML = os.path.join(path,"global.xml")
	else:
		import tempfile
		tempfile.tempdir = path
		globalXML = tempfile.mktemp("g.xml")
	# Images Directory
	imgRelDir2 = os.path.join(path, imgRelDir)
	# Force image rewriting (0|1)
	rewriteImg = imagesrew
	# XSL processor command
	if command != 0:
		XSLCmdTemplate = configValue("xslt-command", "command", command)
	else:
		XSLCmdTemplate = configValue("xslt-command", "command")
	# DTD
	if dtd != 0:
		dtdPublic = configValue("dtd", "doctype-public", dtd)
		dtdSystem = configValue("dtd", "doctype-system", dtd)
	else:
		dtdPublic = configValue("dtd", "doctype-public")
		dtdSystem = configValue("dtd", "doctype-system")
	# XSLT Params
	if xslParams != 0:
		XSLParams = xslParams
	else:
		XSLParams = " "

def initializeSets(ooofile, docbook, command, imagesrew, deltemp, \
dtd, xslParams):
	verifSys()
	configSettings()
	userSettings(ooofile, docbook, command, imagesrew, deltemp, dtd, \
	xslParams)

# ----------------------
# Conversion's functions
# ----------------------

# Generic XML files extraction
def extractZip(docOOoSXW, XMLFile):
	# On vrifie que le fichier est bien au format Zip
	if zipfile.is_zipfile(docOOoSXW):
		#On ouvre le Zip
		monZip = zipfile.ZipFile(docOOoSXW,"r")
		# On liste le contenu des documents
		contentListZip = monZip.namelist()
		# On vrifie la prsence de "content.xml" dans la liste
		for i in contentListZip:
			if i == XMLFile:
				# Si "content.xml" est prsent, on l'ouvre
				# Le rsultat, ``docOOoXML`` est le contenu textuel
				docOOoXMLExist = 1
				strOOoXML = monZip.read(XMLFile)
				monZip.close()
				return strOOoXML

# Extract and parse Zip XML files for concat
def listChildsNodes(docOOoSXW, XMLFile):
	# Extract and parse XML file
	strXML = extractZip(docOOoSXW, XMLFile)
	XMLparse = minidom.parseString(strXML)
	rootNode = XMLparse.documentElement
	vChildNodes = rootNode.childNodes
	# Images treatment
	if XMLFile == contentXML:
		global dictImg, myZip, numImg, dictNamespace
		numImg = 0
		dictImg = {}
		dictNamespace = {}
		myZip = zipfile.ZipFile(docOOoSXW,"r")
		replaceImageNode(vChildNodes)
		myZip.close()
	# Extract all root element's childs
	listChildsElts = []
	for node in vChildNodes:
		if node.nodeType == node.ELEMENT_NODE:
			listChildsElts.append(node)
	return listChildsElts

# Replace the incorporated images links by the new images links
# Extract and copy all incorporated images
def replaceImageNode(vChildNodes):
	global numImg
	for node in vChildNodes:
		if node.nodeName == "draw:image":
			hRefValue = node.attributes["xlink:href"].value
			if find(hRefValue, "#Pictures/", 0) != -1:
				nameImgOld = os.path.split(hRefValue)[1]
				if dictImg.has_key(nameImgOld):
					node.attributes["xlink:href"].value = dictImg[nameImgOld]
				else:
					extImg = os.path.splitext(nameImgOld)[1]
					numImg = numImg + 1
					nameImgNew = imgRootName + "%03i" % numImg + extImg
					hrefImgNew = join([imgRelDir, nameImgNew],"/")
					pathImgNew = os.path.join(imgRelDir, nameImgNew)
					if os.path.isdir(imgRelDir) == 0:
						os.mkdir(imgRelDir)
					pathImgZip = hRefValue[1:]
					zipImg = myZip.read(pathImgZip)
					if os.path.isfile(pathImgNew) == 1 and rewriteImg == 1:
						os.remove(pathImgNew)
					if os.path.isfile(pathImgNew) == 0:
						imgNew = open(pathImgNew, "wb")
						imgNew.write(zipImg)
						imgNew.close()
					dictImg[nameImgOld] = hrefImgNew
					node.attributes["xlink:href"].value = dictImg[nameImgOld]
			else:
				# A FAIRE : verifier l'emplacement des images liees
				# les copier dans imgDir si ailleurs
				pass
		if node.hasChildNodes():
			wChilNodes = node.childNodes
			replaceImageNode(wChilNodes)

# Concatened XML file creation
def createGlobalXML(globalFile):
	globalRootHead="""\
<?xml version="1.0" encoding="UTF-8"?>
<office:document xmlns:office="http://openoffice.org/2000/office" xmlns:style="http://openoffice.org/2000/style" xmlns:text="http://openoffice.org/2000/text" xmlns:table="http://openoffice.org/2000/table" xmlns:draw="http://openoffice.org/2000/drawing" xmlns:fo="http://www.w3.org/1999/XSL/Format" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:number="http://openoffice.org/2000/datastyle" xmlns:svg="http://www.w3.org/2000/svg" xmlns:chart="http://openoffice.org/2000/chart" xmlns:dr3d="http://openoffice.org/2000/dr3d" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="http://openoffice.org/2000/form" xmlns:script="http://openoffice.org/2000/script" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="http://openoffice.org/2000/meta" office:class="text" office:version="1.0">
"""
	globalRootFoot="""\
</office:document>
"""
	globalRootStr = globalRootHead + globalRootFoot
	globalStrParse = minidom.parseString(globalRootStr)
	globalRoot = globalStrParse.documentElement
	metaListElts = listChildsNodes(docOOoSXW, metaXML)
	contentListElts = listChildsNodes(docOOoSXW, contentXML)
	globalListElts = metaListElts + contentListElts
	for node in globalListElts:
		globalRoot.appendChild(node)
	strXML = globalRoot.toxml()
	listLine = split(strXML, "\n")[1:]
	strXMLNS = join([globalRootHead, join(listLine,u"\n")],u"\n")
	fileXML = codecs.open(globalFile, "w", "utf-8")
	fileXML.write(strXMLNS)
	fileXML.close()

# Temporary file destruction
def tempFilesDelete(deltemp):
	if deltemp == 1:
		os.remove(globalXML)

# Create current XSL command
def XSLCmd(input, output, stylesheet):
	global XSLCmdTemplate, modulePath
	cmd = XSLCmdTemplate
	gListeVar = ["%o","%i","%s","%p", "%y", "%v"]
	listeVar = []
	for var in gListeVar:
		if find(cmd, var) != -1:
			listeVar.append(var)
	for var in listeVar:
		varSplit = split(cmd, var)
		if var == "%o":
			varSplit = varSplit[0] + output + varSplit[1]
		elif var == "%i":
			varSplit = varSplit[0] + input + varSplit[1]
		elif var == "%s":
			varSplit = varSplit[0] + stylesheet + varSplit[1]
		elif var == "%p":
			varSplit = varSplit[0] + dtdPublic + varSplit[1]
		elif var == "%y":
			varSplit = varSplit[0] + dtdSystem + varSplit[1]
		elif var == "%v":
			varSplit = varSplit[0] + XSLParams + varSplit[1]
		cmd = join(varSplit,"")
	print cmd
	return cmd

# Generic conversion
def o2dConvert(input, output, stylesheet):
	startTime = time.time()
	os.system(XSLCmd(input, output, stylesheet))
	endTime = time.time()
	duree = round(endTime - startTime, 2)
	print "       ==>", duree, "sec."

# -------------
# User commands
# -------------

# OpenOffice to Flat-File conversion
def extract(ooofile, docbook=0, globalXML=0, imgpath=0, imagesrew=1,\
			  deltemp=1, dtd=0, xslParams=0):
	command=""

	print """
OOo2sDBK - OpenOffice to simple Docbook conversion
--------------------------------------------------
"""	
	print "Run conversion..."
	print "   1 - Initialization"
	startTime = time.time()
	initializeSets(ooofile, docbook, command, imagesrew, deltemp, dtd, xslParams)
	endTime = time.time()
	duree = round(endTime - startTime, 2)
	print "       - OpenOffice file :", docOOoSXW
	print "       - Global file :", globalXML
	print "       - imgDir:", imgpath
	print "       ==>", duree, "sec.\n"

	print "   2 - Unzip and concat OpenOffice XML's files" 

	global imgRelDir, imgRootName 
	imgRootName = "img"
	imgRelDir = imgpath
	startTime = time.time()
	createGlobalXML(globalXML)
	endTime = time.time()
	duree = round(endTime - startTime, 2)
	print "       ==>", duree, "sec.\n"

	#tempFilesDelete(deltemp)
	print "Conversion completed\n"

# OpenOffice to Docbook conversion
def convert(ooofile, command=0, docbook=0, imagesrew=1, deltemp=1, \
dtd=0, xslParams=0):
	print """
OOo2sDBK - OpenOffice to simple Docbook conversion
--------------------------------------------------
"""	
	print "Run conversion..."
	print "   1 - Initialization"
	startTime = time.time()
	initializeSets(ooofile, docbook, command, imagesrew, deltemp, \
	dtd, xslParams)
	endTime = time.time()
	duree = round(endTime - startTime, 2)
	print "       - OpenOffice file :", docOOoSXW
	print "       -  file :", sDocbookXML
	print "       ==>", duree, "sec.\n"

	print "   2 - Unzip and concat OpenOffice XML's files" 
	startTime = time.time()
	createGlobalXML(globalXML)
	endTime = time.time()
	duree = round(endTime - startTime, 2)
	print "       ==>", duree, "sec.\n"

	print "   3 - Docbook file creation"
	o2dConvert(globalXML, sDocbookXML, sDocbookXSL)

	tempFilesDelete(deltemp)
	print "Conversion completed\n"

# Free XSL conversion
def convert2(input, output, XSLPath, command, xslParams):
	print """
OOo2sDBK - Free conversion
--------------------------
"""
	print "Run conversion..."
	print "   1 - Initialization"
	startTime = time.time()
	initializeSets(input, output, command, 0, 0, 0, xslParams)
	endTime = time.time()
	duree = round(endTime - startTime, 2)
	print "       - Input file :", input
	print "       - Stylesheet :", XSLPath
	print "       - Output file :", output,"\n"
	print "       ==>", duree, "sec."
	print "   2 - Conversion"
	o2dConvert(input, output, XSLPath)
	print "Conversion completed"

def printHelp():
	helpStr ="""
ooo2sdbk
--------

Convert OpenOffice-Writer's files (*.sxw) to simplified Docbook

Usage : ooo2sdbk [OPTION] openoffice-filename

  openoffice-filename    Input OpenOffice-Writer filename (needed).
  
  -h, -?, --help         Print help.
  
  -d, --dbkfile FILE     Output Docbook filename.
                         Without this option the Docbook file as
                         the same name as the OOo file (with an 
                         *.xml extension).
					 
  -f, --flatxml          Preserve the intermediate OpenOffice-XML
                         file ("global.xml").

  -c, --cmdxsl NAME      Command name for the XLST processor. 
                         Commands names are defined in the
                         ``<ooo2sdbk dir>/config.xml`` file.
                         Default : "xsltproc".
						 
Examples:

  1) ooo2sdbk mydoc.sxw
	
  2) ooo2sdbk -d mydoc-dbk.xml mydoc.sxw 
	
  3) ooo2sdbk -d mydoc2.xml -c saxon mydoc.sxw

	"""
	print helpStr
	sys.exit()

# Shell conversion
if __name__=="__main__":
	execArgs()

"""
Changelog

2002-12-01
- Add command line front end
"""
