"""Convert HTML page to Word 97 document

This script is used during the build process of "Dive Into Python"
(http://diveintopython.org/) to create the downloadable Word 97 version
of the book (http://diveintopython.org/diveintopython.doc)

Looks for 2 arguments on the command line.  The first argument is the input (HTML)
file; the second argument is the output (.doc) file.

Only runs on Windows.  Requires Microsoft Word 2000.

Safe to run on the same file(s) more than once.  The output file will be
silently overwritten if it already exists.
"""

__author__ = "Mark Pilgrim (f8dy@diveintopython.org)"
__version__ = "$Revision: 1.1.1.1 $"
__date__ = "$Date: 2002/02/21 18:45:45 $"
__copyright__ = "Copyright (c) 2001 Mark Pilgrim"
__license__ = "Python"

import sys, os
from win32com.client import gencache, constants

def makeRealWordDoc(infile, outfile, headline):
	word = gencache.EnsureDispatch("Word.Application")
	try:
		worddoc = word.Documents.Open(FileName=infile)
		try:
			worddoc.TablesOfContents.Add(Range=word.ActiveWindow.Selection.Range, \
										 RightAlignPageNumbers=1, \
										 UseHeadingStyles=1, \
										 UpperHeadingLevel=1, \
										 LowerHeadingLevel=2, \
										 IncludePageNumbers=1, \
										 AddedStyles='', \
										 UseHyperlinks=1, \
										 HidePageNumbersInWeb=1)
			worddoc.TablesOfContents(1).TabLeader = constants.wdTabLeaderDots
			worddoc.TablesOfContents.Format = constants.wdIndexIndent

			word.ActiveWindow.ActivePane.View.SeekView = constants.wdSeekCurrentPageHeader
			word.Selection.TypeText(Text=headline)
			word.ActiveWindow.ActivePane.View.SeekView = constants.wdSeekCurrentPageFooter
			word.NormalTemplate.AutoTextEntries("- SEITE -").Insert(Where=word.ActiveWindow.Selection.Range)
			word.ActiveWindow.View.Type = constants.wdPrintView

			worddoc.TablesOfContents(1).Update()
			
			worddoc.SaveAs(FileName=outfile, \
				FileFormat=constants.wdFormatDocument)
		finally:
			worddoc.Close(0)
			del worddoc
	finally:
		word.Quit()
		del word

if __name__ == "__main__":

	headline = "DocBook\t\thttp://www.stefan-rinke.de/"
	if sys.argv[3:]:
		headline = sys.argv[3]

	infile = os.path.normpath(os.path.join(os.getcwd(), sys.argv[1]))
	outfile = os.path.normpath(os.path.join(os.getcwd(), sys.argv[2]))
	makeRealWordDoc(infile, outfile, headline)
