Bib2LaTeX Converter
Revision as of 16:29, 25 February 2008 by PeterHarding (talk | contribs) (New page: =Overview= ==Script== <pre> #!/usr/bin/env python # bib2ltx.py # Author: Christopher Arnt <chris.arndt@web.de> # Version: 0.2b # Date: Monday, 21.01.2002 # Copyleft: GPL """Thi...)
Overview
Script
#!/usr/bin/env python
# bib2ltx.py
# Author: Christopher Arnt <chris.arndt@web.de>
# Version: 0.2b
# Date: Monday, 21.01.2002
# Copyleft: GPL
"""This script parses a list of bibliographic entries and outputs it in
LaTeX.
"""
import xml.sax
import UserDict, UserList
def latex_escape(s):
"""Escape LaTEx special characters."""
#s = s.replace('$', '\$')
s = s.replace('|', '$\backslash$')
s = s.replace('{', '\{')
s = s.replace('}', '\}')
s = s.replace('#', '\#')
s = s.replace('%', '\%')
s = s.replace('&', '\&')
s = s.replace('_', '\_')
s = s.replace('"', r'\dq{}')
s = s.replace('^', '\verb|^|')
s = s.replace('~', '\verb|~|')
return s
class BibItemField:
"""Container for single tag value and attributes"""
def __init__(self, value=None, attrs={}):
self.value = value
self.attrs = attrs
class BibItem(UserDict.UserDict):
"""Item (entry) of the bibliography. Basically a dict of tag lists."""
def __init__(self, itemType='book', dict=None):
UserDict.UserDict.__init__(self, dict)
self.itemType = itemType
def getField(self, key, default="", sep=", "):
if self.data.has_key(key):
l = map(lambda x: x.value, self.data[key])
if len(l) > 3:
s = self.data[key][0].value + '[u.a.]'
else:
s = sep.join(l)
return latex_escape(s)
return latex_escape(default)
def __str__(self):
s = ""
for key in self.data.keys():
s = s + "'%s': %s\n" % (key, self.getField(key))
return s
class Bibliography(UserList.UserList):
"""The bibliography is basically a list of BibItems.
It has a method for sorting the bibliography and for outputting as
a LaTeX document.
"""
itemTypes = ['book', 'article', 'injournal', 'webpage']
format = 'latex'
standalone = 1
sortOutput = 1
preamble = r"""\documentclass[german,a4paper]{scrartcl}
\usepackage{babel}
\usepackage[T1]{fontenc}
\usepackage[latin1]{inputenc}
\usepackage{times}
\begin{document}
"""
postamble = "\\end{document}\n"
def __init__(self, items=None):
UserList.UserList.__init__(self, items)
def sortItems(self, x, y):
ax = x.get('author')
if ax:
ax = ax[0].value.lower()
else:
ax = 'anonymous'
ay = y.get('author')
if ay:
ay = ay[0].value.lower()
else:
ay = 'anonymous'
r = cmp(ax,ay)
if r == 0:
try:
return cmp(x['year'][0].value, y['year'][0].value)
except:
return 0
return r
def output(self, fp):
self.out = fp
if self.sortOutput:
self.sort(self.sortItems)
# print out the the LaTeX preamble
if self.standalone:
self.out.write(self.preamble)
self.out.write("\\section*{Literatur}\n\n")
self.out.write("\\begin{enumerate}\n\\raggedright\n")
for i in range(len(self)):
self.outputItem(i)
self.out.write("\\end{enumerate}\n")
if self.standalone:
self.out.write(self.postamble)
def outputItem(self, i):
self.out.write(r"\item ")
# XXX check for "[ders.]"
# author and title
self.out.write(r"\textsc{%s}, " % \
self[i].getField('author', 'Anonymous'))
self.out.write(r"\emph{%s}, " % self[i].getField('title'))
# titel and author (editor) of containing book
if self[i].itemType == 'article':
self.out.write("in: %s, " % self[i].getField('intitle'))
if self[i].has_key('inauthor') and \
self[i]['inauthor'][0].attrs.get('is_ed'):
self.out.write("hrsg. v. ")
self.out.write("%s, " % self[i].getField('inauthor', '', ' -- '))
if self[i].itemType == 'injournal':
self.out.write('%s ' % self[i].getField('journal'))
# volume number
if self[i].has_key('volume'):
if self[i].itemType in ['book', 'article']:
self.out.write('Bd. ')
self.out.write('%s, ' % self[i].getField('volume'))
# where published and when
if self[i].itemType in ['book', 'article']:
self.out.write("%s " % self[i].getField('city', 'o.O.'," -- "))
if self[i].itemType in ['book', 'article', 'injournal']:
self.out.write("%s" % self[i].getField('year', 'o.J.'))
if self[i].itemType == 'webpage':
self.out.write("%s, " % self[i].getField('url'))
self.out.write(self[i].getField('date'))
# page numbers
if self[i].itemType in ['article', 'injournal']:
self.out.write(", %s." % self[i].getField('pages', 'XXX'))
else:
self.out.write(".")
# signature, when present
if self[i].has_key('signature'):
self.out.write("""\\\\\nSign.: %s\n""" % \
self[i].getField('signature', '', " / "))
self.out.write("\n\n")
class ContentHandler(xml.sax.ContentHandler):
"""Handler fro SAX parsing.
Collects bibliography entries in a Bibliography object.
"""
def __init__(self):
self.item = None
self.items = Bibliography()
self.current = None
self.text = []
def startElement(self, name, attrs):
"""Handle an event for the start tag."""
if name in self.items.itemTypes:
if name == getattr(self.item, 'itemType', None):
raise xml.sax.SAXParseException, "Malformed XML"
self.item = BibItem(name)
elif self.item != None:
self.current = name
self.attrs = attrs
def endElement(self, name):
"""Handle an event for the closing tag."""
if name in self.items.itemTypes:
if name != getattr(self.item, 'itemType', None):
raise xml.sax.SAXParseException, "Malformed XML"
self.items.append(self.item)
self.item = None
elif self.item != None:
self.setField()
self.current = None
self.text = []
self.attrs = None
def characters(self, data, *args):
"""Handle a character data event."""
#data = data.strip()
if data and self.current:
self.text.append(data.encode('iso8859_1'))
def setField(self):
if not self.item.has_key(self.current):
self.item[self.current] = [
BibItemField("".join(self.text), self.attrs)
]
else:
self.item[self.current].append(
BibItemField("".join(self.text), self.attrs)
)
def parseXML(file):
"""Parse the XML file and return a Bibliography object."""
ch = ContentHandler()
p = xml.sax.make_parser()
p.setContentHandler(ch)
p.parse(file)
return ch.items
def main(args):
"""Parse a file from the commandline and output LaTeX on stdout."""
file = sys.argv.pop(1)
try:
bib = parseXML(file)
except xml.sax.SAXParseException, msg:
sys.stderr.write("%s\nProcessing aborted!\n" % (msg))
bib.output(sys.stdout)
if __name__ == '__main__':
import sys
main(sys.argv[1:])