Bib2LaTeX Converter
Revision as of 16:29, 25 February 2008 by PeterHarding (talk | contribs) (New page: =Overview= ==Script== <pre> #!/usr/bin/env python # bib2ltx.py # Author: Christopher Arnt <chris.arndt@web.de> # Version: 0.2b # Date: Monday, 21.01.2002 # Copyleft: GPL """Thi...)
Overview
Script
#!/usr/bin/env python # bib2ltx.py # Author: Christopher Arnt <chris.arndt@web.de> # Version: 0.2b # Date: Monday, 21.01.2002 # Copyleft: GPL """This script parses a list of bibliographic entries and outputs it in LaTeX. """ import xml.sax import UserDict, UserList def latex_escape(s): """Escape LaTEx special characters.""" #s = s.replace('$', '\$') s = s.replace('|', '$\backslash$') s = s.replace('{', '\{') s = s.replace('}', '\}') s = s.replace('#', '\#') s = s.replace('%', '\%') s = s.replace('&', '\&') s = s.replace('_', '\_') s = s.replace('"', r'\dq{}') s = s.replace('^', '\verb|^|') s = s.replace('~', '\verb|~|') return s class BibItemField: """Container for single tag value and attributes""" def __init__(self, value=None, attrs={}): self.value = value self.attrs = attrs class BibItem(UserDict.UserDict): """Item (entry) of the bibliography. Basically a dict of tag lists.""" def __init__(self, itemType='book', dict=None): UserDict.UserDict.__init__(self, dict) self.itemType = itemType def getField(self, key, default="", sep=", "): if self.data.has_key(key): l = map(lambda x: x.value, self.data[key]) if len(l) > 3: s = self.data[key][0].value + '[u.a.]' else: s = sep.join(l) return latex_escape(s) return latex_escape(default) def __str__(self): s = "" for key in self.data.keys(): s = s + "'%s': %s\n" % (key, self.getField(key)) return s class Bibliography(UserList.UserList): """The bibliography is basically a list of BibItems. It has a method for sorting the bibliography and for outputting as a LaTeX document. """ itemTypes = ['book', 'article', 'injournal', 'webpage'] format = 'latex' standalone = 1 sortOutput = 1 preamble = r"""\documentclass[german,a4paper]{scrartcl} \usepackage{babel} \usepackage[T1]{fontenc} \usepackage[latin1]{inputenc} \usepackage{times} \begin{document} """ postamble = "\\end{document}\n" def __init__(self, items=None): UserList.UserList.__init__(self, items) def sortItems(self, x, y): ax = x.get('author') if ax: ax = ax[0].value.lower() else: ax = 'anonymous' ay = y.get('author') if ay: ay = ay[0].value.lower() else: ay = 'anonymous' r = cmp(ax,ay) if r == 0: try: return cmp(x['year'][0].value, y['year'][0].value) except: return 0 return r def output(self, fp): self.out = fp if self.sortOutput: self.sort(self.sortItems) # print out the the LaTeX preamble if self.standalone: self.out.write(self.preamble) self.out.write("\\section*{Literatur}\n\n") self.out.write("\\begin{enumerate}\n\\raggedright\n") for i in range(len(self)): self.outputItem(i) self.out.write("\\end{enumerate}\n") if self.standalone: self.out.write(self.postamble) def outputItem(self, i): self.out.write(r"\item ") # XXX check for "[ders.]" # author and title self.out.write(r"\textsc{%s}, " % \ self[i].getField('author', 'Anonymous')) self.out.write(r"\emph{%s}, " % self[i].getField('title')) # titel and author (editor) of containing book if self[i].itemType == 'article': self.out.write("in: %s, " % self[i].getField('intitle')) if self[i].has_key('inauthor') and \ self[i]['inauthor'][0].attrs.get('is_ed'): self.out.write("hrsg. v. ") self.out.write("%s, " % self[i].getField('inauthor', '', ' -- ')) if self[i].itemType == 'injournal': self.out.write('%s ' % self[i].getField('journal')) # volume number if self[i].has_key('volume'): if self[i].itemType in ['book', 'article']: self.out.write('Bd. ') self.out.write('%s, ' % self[i].getField('volume')) # where published and when if self[i].itemType in ['book', 'article']: self.out.write("%s " % self[i].getField('city', 'o.O.'," -- ")) if self[i].itemType in ['book', 'article', 'injournal']: self.out.write("%s" % self[i].getField('year', 'o.J.')) if self[i].itemType == 'webpage': self.out.write("%s, " % self[i].getField('url')) self.out.write(self[i].getField('date')) # page numbers if self[i].itemType in ['article', 'injournal']: self.out.write(", %s." % self[i].getField('pages', 'XXX')) else: self.out.write(".") # signature, when present if self[i].has_key('signature'): self.out.write("""\\\\\nSign.: %s\n""" % \ self[i].getField('signature', '', " / ")) self.out.write("\n\n") class ContentHandler(xml.sax.ContentHandler): """Handler fro SAX parsing. Collects bibliography entries in a Bibliography object. """ def __init__(self): self.item = None self.items = Bibliography() self.current = None self.text = [] def startElement(self, name, attrs): """Handle an event for the start tag.""" if name in self.items.itemTypes: if name == getattr(self.item, 'itemType', None): raise xml.sax.SAXParseException, "Malformed XML" self.item = BibItem(name) elif self.item != None: self.current = name self.attrs = attrs def endElement(self, name): """Handle an event for the closing tag.""" if name in self.items.itemTypes: if name != getattr(self.item, 'itemType', None): raise xml.sax.SAXParseException, "Malformed XML" self.items.append(self.item) self.item = None elif self.item != None: self.setField() self.current = None self.text = [] self.attrs = None def characters(self, data, *args): """Handle a character data event.""" #data = data.strip() if data and self.current: self.text.append(data.encode('iso8859_1')) def setField(self): if not self.item.has_key(self.current): self.item[self.current] = [ BibItemField("".join(self.text), self.attrs) ] else: self.item[self.current].append( BibItemField("".join(self.text), self.attrs) ) def parseXML(file): """Parse the XML file and return a Bibliography object.""" ch = ContentHandler() p = xml.sax.make_parser() p.setContentHandler(ch) p.parse(file) return ch.items def main(args): """Parse a file from the commandline and output LaTeX on stdout.""" file = sys.argv.pop(1) try: bib = parseXML(file) except xml.sax.SAXParseException, msg: sys.stderr.write("%s\nProcessing aborted!\n" % (msg)) bib.output(sys.stdout) if __name__ == '__main__': import sys main(sys.argv[1:])