- #! /usr/bin/env python
- #
- import sys
- import bibtexparser
- from bibtexparser.bparser import BibTexParser
- import bibtexparser.customization
- areas = { u'cv':(u'Computer Vision', u'Images/Vision-100.png'),
- u'ai':(u'Artificial Intelligence', u'Images/AI-100.png'),
- u'ml':(u'Machine Learning', u'Images/ML-80.png'),
- u'forensics':(u'Forensics', u'Images/Forensics-80.png'),
- u'access':(u'Accessibility', u'Images/Access-80.png'),
- u'cg':(u'Computer Graphics', u'Images/Graphics-80.png'),
- u'asl':(u'Sign Language', u'Images/ASL-100.png'),
- u'wavelets':(u'Wavelets', u'Images/Wavelet-100.png'),
- u'sport': (u'Sport', u'Images/Sport-80.png'),
- u'uncert': (u'Uncertanties', u'Images/Uncert-100.png'),
- u'virtcrd': (u'Virtual Crowds', u'Images/Crowd-80.png'),
- u'med': (u'Biomedical', u'Images/Med-64.png'),
- u'biblio': (u'Bibliometrics', u'Images/Book-80.png')
- }
- areas_keys = areas.keys()
- def stripnline(record):
- if u"author" in record:
- record[u"author"] = record[u"author"].replace(u'\n', u' ').strip()
- if "title" in record:
- record[u"title"] = record[u"title"].replace(u'\n', u' ').strip()
- return record
- def customizations(record):
- """Use some functions delivered by the library
- :param record: a record
- :returns: -- customized record
- """
- record = bibtexparser.customization.type(record)
- # record = author(record)
- # record = editor(record)
- # record = journal(record)
- record = bibtexparser.customization.keyword(record)
- # record = link(record)
- # record = page_double_hyphen(record)
- # record = doi(record)
- record = stripnline(record)
- return record
- #
- # input is the variable with all the text of the bibtex file
- # output is a list of dictionaries
- #
- def parseBibtex( bibtex_fname ):
- with open(bibtex_fname) as bibtex_file:
- parser = BibTexParser()
- parser.customization = customizations
- bibtex_database = bibtexparser.load(bibtex_file, parser=parser)
- # fix encoding
- #// source = parser.checkEncoding(source)
- # fix latex special characters, among other things
- # source = parser.preprocess(source) #.encode('utf-8'))
- # get list of dictionaries
- return bibtex_database.entries
- def toBibtex (ref,
- valid_fields=[u'title', u'editor', u'author', u'journal', u'booktitle',
- u'publisher', u'school', u'issn', u'volume', u'number',
- u'pages', u'year', u'doi', u'pdf'],
- omit_fields=[]):
- omit = [each.lower() for each in omit_fields]
- fields = [each.lower() for each in valid_fields]
- # ttable = [('&', r'\&'),
- # ('~', r'\~'),
- # ('—',r'---'),
- # ('–', r'--'),
- # ]
- bib_key = ref[u'ID']
- ref_type = ref[u'ENTRYTYPE'].replace(u'Reference','')
- bibtex = u'\n@' + ref_type + u'{' + bib_key + u',\n' # '%s{%s,\n" % (ref_type, bib_key)
- ref_keys = ref.keys()
- for k in fields:
- if k in ref_keys and k not in omit:
- if type(ref[k]) == list:
- nv = ref[k][0]
- else:
- nv = ref[k]
- # for a,b in ttable:
- # nv = nv.replace(a,b)
- bibtex = bibtex + u' '
- bibtex = bibtex + k
- bibtex = bibtex + u' = {'
- # bibtex = bibtex + unicode(nv.encode('utf8'))
- # bibtex = bibtex + unicode(nv, encoding='utf8')
- bibtex = bibtex + nv
- bibtex = bibtex + u'},\n'
- # remove trailing command
- bibtex = bibtex[0:-2] + u"}\n"
- return bibtex
- def toHTML (ref,
- numb=0,
- valid_fields=[u'title', u'editor', u'author', u'journal', u'booktitle',
- u'publisher', u'school', u'issn', u'volume', u'number',
- u'pages', u'year', u'doi', u'pdf', u'abstract'],
- omit_fields=[]):
- omit = [each.lower() for each in omit_fields]
- fields = [each.lower() for each in valid_fields]
- ref_type = ref[u'ENTRYTYPE'].replace('Reference','').lower()
- bib_key = ref[u'ID']
- bid = 'b' + bib_key
- ref_keys = ref.keys()
- html = u'<table width="100%" class="pub">\n'
- html += u'<col align="center" width=230px />\n'
- html += u'<col align="left" />\n'
- html += u'<col align="left" />\n'
- html += u'<tr>\n'
- html += u'<td class="keywords" align="center">\n'
- # html += u'<td class="keywords" align="center" padding="20px">\n'
- if u'keyword' in ref_keys:
- # for k,v in ref[u'keyword'].iteritems():
- # print k, v
- for k in ref[u'keyword']:
- lk = k.lower()
- if lk in areas_keys:
- html += u'<img class="keyw" src="%s" alt="%s" title="%s" align="middle" />\n' % (areas[lk][1], areas[lk][0], areas[lk][0])
- else:
- html += u'No keywords!\n'
- html += u'</td>\n'
- html += u'<td><div class="%s" name="%s" ><a name="%s">\n' % (ref_type, bib_key, bib_key)
- # if numb>0:
- # html += u'%d. \n' % numb
- for k in fields:
- if k in ref_keys and k not in omit:
- if type(ref[k]) == list:
- v = ref[k][0]
- else:
- v = ref[k]
- if numb>0 and k=='title':
- html += u'<span class="%s">%d. %s </span>\n' % (k, numb, v)
- elif k=='issn':
- html += u'<span class="%s">ISSN: %s, </span>\n' % (k, v)
- elif k=='volume':
- html += u'<span class="%s">Vol. %s, </span>\n' % (k, v)
- elif k=='number':
- html += u'<span class="%s">Num. %s, </span>\n' % (k, v)
- elif k=='pages':
- html += u'<span class="%s">pgs. %s, </span>\n' % (k, v)
- elif k=='doi':
- html += u'<span class="%s"> <a href="http://dx.doi.org/%s">doi:%s</a> </span>\n' % (k, v, v)
- elif k == 'pdf':
- html += u'<span class="pdf"> <a href="papers/%s">PDF.</a> </span>\n' % v
- elif k == 'abstract':
- html += u'<span class="linkabs"><a href="javascript:unhide(\'%s\');">Show/Hide Abstract.</a></span>\n' % (bid + u'-abs')
- html += u'<div class="hidden" id="%s"><div class="abstract"><b>Abstract:</b> %s\n</div></div>\n' % (bid + u'-abs', v)
- else:
- html += u'<span class="%s"> %s </span>\n' % (k, v)
- html += u'<span class="linkbib"><a href="javascript:unhide(\'%s\');">Show/Hide Bibtex.</a></span>\n' % bid
- html += u'<div class="hidden" id="%s"><pre>%s\n</pre></div>\n' % (bid, toBibtex(ref))
- html += u'\n </a></div></td>\n'
- html += u' </tr>\n'
- html += u'</table>\n\n'
- return html
- def referencesByYear(bibs):
- # byYears = {first:0, last: 3000}
- byYears = {}
- for ref in bibs:
- if u'year' in ref.keys():
- year = int(ref[u'year'])
- if year not in byYears.keys():
- byYears[year] = [ ref ]
- else:
- byYears[year].append(ref)
- return byYears
- def preamble():
- a = """<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
- <link href="publications.css" type="text/css" rel="stylesheet" />
- <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
- <script type="text/javascript">
- function unhide(divID) {
- var item = document.getElementById(divID);
- if (item) {
- item.className=(item.className=='hidden')?'unhidden':'hidden';
- }
- }
- </script>
- <title>Publications</title>
- </head><body>
- <div class="header1"><br />
- <table class="header1" cellpadding="0" cellspacing="0">
- <colgroup span="1" width="80%" /><colgroup span="1" width="20%" /><tbody>
- <tr>
- <td>
- <h1>Publications</h1>
- </td>
- <td><!--#include virtual="navigation.html"--> </td>
- </tr>
- </tbody>
- </table>
- </div>
- <p>
- The full CV, at the CNPq CV repository, is available
- <a href="http://lattes.cnpq.br/3575273076267374 ">here</a>
- (in portuguese).
- </p>"""
- return a
- def posamble():
- a = """<h2> </h2>
- <div>
- <address>Siome Klein Goldenstein:
- [myfirstname](at) ic unicamp br </address>
- <!-- hhmts start -->Lastmodified:
- <!--#flastmod file="publications.shtml" -->
- <!-- hhmts end -->
- </div>
- </body></html>"""
- return a
- def filterType( entry, valid_types ):
- vtypes = [each.lower() for each in valid_types]
- tp = entry[u'ENTRYTYPE']
- tp = tp.lower()
- tp = tp.replace(u'reference','')
- return tp in vtypes
- def generateWebpage (bibs,
- pord=[ ([u'article'], u'Journal Publications', u'byYear'),
- ([u'patent'], u'Patents', None),
- ([u'book', u'inbook'], u'Books and Book Chapters', None),
- ([u'inproceedings'], u'Conferences and Workshops', u'byYear'),
- ([u'incollection'], u'Magazines', None),
- ([u'phdthesis', u'mastersthesis'], u'Thesis', None)
- ]
- ):
- webpg = preamble() + u'\n\n'
- webpg += u'<br /> <br /> <p style="text-align: center;"> '
- for ent in pord:
- webpg += u' '
- webpg += u' <a href="#' + unicode(ent[1]) + u'"> ' + unicode(ent[1]) + u'</a> '
- webpg += u' </p"> <br /> <br />'
- for ent in pord:
- fbib = filter(lambda x: filterType(x, ent[0]), bibs)
- print >>sys.stderr, u'%s: has %d elements out of %d total' % (ent[1],len(fbib),len(bibs))
- sys.stderr.flush()
- webpg += u'<h2> <a name="%s" > %s </a></h2>\n' % (ent[1],ent[1])
- # webpg += u'<ol reversed="reversed">'
- numb = len(fbib)
- byear = referencesByYear(fbib)
- years = byear.keys()
- years.sort(reverse=True)
- for y in years:
- if ent[2] == u'byYear':
- webpg += u'<h3> <span> %d </span></h3>\n' % y
- for r in byear[y]:
- webpg += u'\n %s \n' % toHTML(r, numb)
- numb -=1
- webpg += posamble()
- return webpg
- if __name__ == "__main__":
- if len(sys.argv) != 3:
- print '\n' + sys.argv[0] + ': requires name of the bibtex file and output file!\n'
- sys.exit()
- fname = sys.argv[1]
- # bibfile = codecs.open(fname,encoding='latin_1').read()
- bibs = parseBibtex( fname )
- s = generateWebpage( bibs )
- with open(sys.argv[2],'w') as fout:
- fout.write(s.encode('utf8'))
- # sys.stdout.write(s)
- # print s.encode('latin_1')
- # print (s)
Raw Paste