PYTHON   50
processBibtex py
Guest on 23rd August 2022 10:55:17 AM


  1. #! /usr/bin/env python
  2. #
  3.  
  4. import sys
  5. import codecs
  6. import tempfile
  7. import logging
  8. logging.basicConfig()
  9.  
  10. from zope.publisher.browser import TestRequest
  11.  
  12. from mybibtex import BibtexParser
  13. from bibliograph.rendering.renderers.bibtex import BibtexRenderView
  14. from bibliograph.rendering.utility import BibtexRenderer
  15.  
  16. from zope.publisher.browser import TestRequest
  17. from zope.component import queryAdapter
  18.  
  19. from bibliograph.core.interfaces import IBibliography
  20. from bibliograph.core.interfaces import IBibliographicReference
  21.  
  22. areas = { 'cv':('Computer Vision', 'Images/Vision-100.png'),
  23.           'ai':('Artificial Intelligence', 'Images/AI-100.png'),
  24.           'ml':('Machine Learning', 'Images/ML-80.png'),
  25.           'forensics':('Forensics', 'Images/Forensics-80.png'),
  26.           'access':('Accessibility', 'Images/Access-80.png'),
  27.           'cg':('Computer Graphics', 'Images/Graphics-80.png'),
  28.           'asl':('Sign Language', 'Images/ASL-100.png'),
  29.           'wavelets':('Wavelets', 'Images/Wavelet-100.png'),
  30.           'sport': ('Sport', 'Images/Sport-80.png'),
  31.           'uncert': ('Uncertanties', 'Images/Uncert-100.png'),
  32.           'virtcrd': ('Virtual Crowds', 'Images/Crowd-80.png'),
  33.           'med': ('Biomedical', 'Images/Med-64.png'),
  34.           'biblio': ('Bibliometrics', 'Images/Book-80.png')
  35.           }
  36.  
  37. areas_keys = areas.keys()
  38.  
  39.  
  40. #
  41. #  input is the variable with all the text of the bibtex file
  42. #  output is a list of dictionaries
  43. #
  44. def parseBibtex( source ):
  45.  
  46.     # Bibtex parser
  47.     parser = BibtexParser()
  48.  
  49.     # fix encoding
  50.     #//    source = parser.checkEncoding(source)
  51.  
  52.     # fix latex special characters, among other things
  53.     source = parser.preprocess(source) #.encode('utf-8'))
  54.  
  55.     # get list of dictionaries
  56.     return parser.getEntries(source)
  57.  
  58.  
  59.  
  60. def toBibtex (ref,
  61.             valid_fields=[u'title', u'editor', u'author', u'journal', u'booktitle',
  62.                           u'publisher', u'school', u'issn', u'volume', u'number',  
  63.                           u'pages', u'year', u'doi', u'pdf'],
  64.               omit_fields=[]):
  65.  
  66.     omit   = [each.lower() for each in omit_fields]
  67.     fields = [each.lower() for each in valid_fields]
  68.  
  69.     ttable = [('&', r'\&'),
  70.               ('~', r'\~'),
  71.               ('—',r'---'),
  72.               ('–', r'--'),
  73.               ]
  74.  
  75.     bib_key = ref['pid']
  76.     ref_type = ref['reference_type'].replace('Reference','')
  77.     bibtex = u'\n@' + ref_type + u'{' + bib_key + u',\n' # '%s{%s,\n" %  (ref_type, bib_key)
  78.  
  79.     ref_keys = ref.keys()
  80.  
  81.     for k in fields:
  82.         if k in ref_keys and k not in omit:
  83.             if type(ref[k]) == list:
  84.                 nv = ref[k][0]
  85.             else:
  86.                 nv = ref[k]
  87.  
  88.             for a,b in ttable:
  89.                 nv = nv.replace(a,b)
  90.  
  91.             bibtex = bibtex + u'  '
  92.             bibtex = bibtex + k
  93.             bibtex = bibtex + u' = {'  
  94.             #            bibtex = bibtex + unicode(nv.encode('utf8'))
  95.             #            bibtex = bibtex + unicode(nv, encoding='utf8')
  96.             bibtex = bibtex + unicode(nv, encoding='latin_1')
  97.             bibtex = bibtex + u'},\n'
  98.            
  99.     # remove trailing command
  100.     bibtex = bibtex[0:-2] + u"}\n"
  101.  
  102.  
  103.     return bibtex
  104.  
  105.  
  106.  
  107. def toHTML (ref,
  108.             numb=0,
  109.             valid_fields=[u'title', u'editor', u'author', u'journal', u'booktitle',
  110.                           u'publisher', u'school', u'issn', u'volume', u'number',  
  111.                           u'pages', u'year', u'doi', u'pdf', u'abstract'],
  112.             omit_fields=[]):
  113.  
  114.     omit = [each.lower() for each in omit_fields]
  115.     fields = [each.lower() for each in valid_fields]
  116.  
  117.     ref_type = ref['reference_type'].replace('Reference','').lower()
  118.  
  119.     bib_key = ref['pid']
  120.     bid = 'b' + bib_key
  121.     ref_keys = ref.keys()
  122.  
  123.     html = u'<table width="100%" class="pub">\n'
  124.     html += u'<col align="center" width=230px />\n'
  125.     html += u'<col align="left" />\n'
  126.     html += u'<col align="left" />\n'
  127.     html += u'<tr>\n'
  128.     html += u'<td class="keywords" align="center">\n'
  129.     #    html += u'<td class="keywords" align="center" padding="20px">\n'
  130.     if 'keywords' in ref_keys:
  131.         for k in ref['keywords']:
  132.             lk = k.lower()
  133.             if lk in areas_keys:
  134.                 html += u'<img class="keyw" src="%s" alt="%s" title="%s" align="middle" />\n' % (areas[lk][1], areas[lk][0], areas[lk][0])
  135.     else:
  136.         html += u'No keywords!\n'
  137.    
  138.     html += u'</td>\n'
  139.  
  140.        
  141.     html += u'<td><div class="%s" name="%s" ><a name="%s">\n' % (ref_type, bib_key, bib_key)
  142.     #    if numb>0:
  143.     #        html += u'%d. \n' % numb
  144.  
  145.  
  146.     for k in fields:
  147.         if k in ref_keys and k not in omit:
  148.             if type(ref[k]) == list:
  149.                 v = ref[k][0]
  150.             else:
  151.                 v = ref[k]
  152.             if numb>0 and k=='title':
  153.                 html += u'<span class="%s">%d. %s </span>\n' %(k, numb, unicode(v, encoding='utf8'))
  154.             elif k=='issn':
  155.                 html += u'<span class="%s">ISSN: %s, </span>\n' %(k, unicode(v, encoding='utf8'))
  156.             elif k=='volume':
  157.                 html += u'<span class="%s">Vol. %s, </span>\n' %(k, unicode(v, encoding='utf8'))
  158.             elif k=='number':
  159.                 html += u'<span class="%s">Num. %s, </span>\n' %(k, unicode(v, encoding='utf8'))
  160.             elif k=='pages':
  161.                 html += u'<span class="%s">pgs. %s, </span>\n' %(k, unicode(v, encoding='utf8'))
  162.             elif k=='doi':
  163.                 html += u'<span class="%s"> <a href="http://dx.doi.org/%s">doi:%s</a> </span>\n' % (k, unicode(v, encoding='utf8'), unicode(v, encoding='utf8'))
  164.             elif k == 'pdf':
  165.                 html += u'<span class="pdf"> <a href="papers/%s">PDF.</a> </span>\n' % unicode(v, encoding='utf8')
  166.             elif k == 'abstract':
  167.                 html += u'<span class="linkabs"><a href="javascript:unhide(\'%s\');">Show/Hide Abstract.</a></span>\n' % (bid + '-abs')
  168.                 html += u'<div class="hidden" id="%s"><div class="abstract"><b>Abstract:</b> %s\n</div></div>\n' % (bid+'-abs', unicode(v, encoding='utf8'))
  169.             else:
  170.                 html += u'<span class="%s"> %s </span>\n' %(k, unicode(v, encoding='utf8'))
  171.     html += u'<span class="linkbib"><a href="javascript:unhide(\'%s\');">Show/Hide Bibtex.</a></span>\n' % bid
  172.     html += u'<div class="hidden" id="%s"><pre>%s\n</pre></div>\n' % (bid, toBibtex(ref))
  173.  
  174.  
  175.     html += u'\n</a></div></td>\n'
  176.  
  177.  
  178.     html += u'</tr>\n'
  179.  
  180.     # html += u'<tr>\n'
  181.     # html += u'<td>\n'
  182.     # html += u'</td>\n'
  183.     # html += u'<td>\n'
  184.     # html += u'<div class="hidden" id="%s"><pre>%s\n</pre></div>\n' % (bid, toBibtex(ref))
  185.     # html += u'</td>\n'
  186.     # html += u'</tr>\n'
  187.     html += u'</table>\n'
  188.  
  189.     return html
  190.  
  191. def referencesByYear(bibs):
  192.     #    byYears = {first:0, last: 3000}
  193.     byYears = {}
  194.  
  195.     for ref in bibs:
  196.         if 'year' in ref.keys():
  197.             year = int(ref['year'])
  198.             if year not in byYears.keys():
  199.                 byYears[year] = [ ref ]
  200.             else:
  201.                 byYears[year].append(ref)
  202.  
  203.     return byYears
  204.  
  205.  
  206. def preamble():
  207.     a = """<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
  208.    
  209. <link href="publications.css" type="text/css" rel="stylesheet" />
  210.  
  211. <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
  212.  
  213. <script type="text/javascript">
  214. function unhide(divID) {
  215. var item = document.getElementById(divID);
  216. if (item) {
  217. item.className=(item.className=='hidden')?'unhidden':'hidden';
  218. }
  219. }
  220. </script>
  221.  
  222. <title>Publications</title>
  223.  
  224. </head><body>
  225. <div class="header1"><br />
  226. <table class="header1" cellpadding="0" cellspacing="0">
  227.  <colgroup span="1" width="80%" /><colgroup span="1" width="20%" /><tbody>
  228.    <tr>
  229.      <td>
  230.      <h1>Publications</h1>
  231.      </td>
  232.      <td><!--#include virtual="navigation.html"-->&nbsp; </td>
  233.    </tr>
  234.  </tbody>
  235. </table>
  236. </div>
  237.  
  238. <p>
  239. The full CV, at the CNPq CV repository, is available
  240. <a href="http://lattes.cnpq.br/3575273076267374 ">here</a>
  241. (in portuguese).
  242. </p>"""
  243.     return a
  244.  
  245. def posamble():
  246.     a = """<h2>&nbsp;</h2>
  247.  
  248. <div>
  249. <address>Siome Klein Goldenstein:
  250. [myfirstname](at) ic unicamp br </address>
  251. <!-- hhmts start -->Lastmodified:&nbsp;
  252. <!--#flastmod file="publications.shtml" -->
  253. <!-- hhmts end -->
  254. </div>
  255.  
  256. </body></html>"""
  257.     return a
  258.  
  259.  
  260. def filterType( entry, valid_types ):
  261.     vtypes = [each.lower() for each in valid_types]
  262.     tp = entry['reference_type']
  263.     tp = tp.lower()
  264.     tp = tp.replace('reference','')
  265.  
  266.     return tp in vtypes
  267.    
  268.  
  269.  
  270. def generateWebpage (bibs,
  271.                      pord=[ (['article'], 'Journal Publications', 'byYear'),
  272.                             (['patent'], 'Patents', None),
  273.                             (['book', 'inbook'], 'Books and Book Chapters', None),
  274.                             (['inproceedings'], 'Conferences and Workshops', 'byYear'),
  275.                             (['incollection'], 'Magazines', None),
  276.                             (['phdthesis','mastersthesis'], 'Thesis', None)
  277.                           ]
  278.                      ):
  279.  
  280.     webpg = preamble() + u'\n\n'
  281.  
  282.     webpg += u'<br /> <br /> <p style="text-align: center;"> '
  283.     for ent in pord:
  284.       webpg += u'&nbsp;&nbsp;&nbsp;&nbsp;'
  285.       webpg += u'    <a href="#' + unicode(ent[1]) + u'"> ' + unicode(ent[1]) + u'</a> &nbsp;&nbsp;&nbsp;&nbsp;'
  286.     webpg += u' </p"> <br /> <br />'
  287.  
  288.  
  289.     for ent in pord:
  290.         fbib = filter(lambda x: filterType(x, ent[0]), bibs)
  291.         print >>sys.stderr, '%s: has %d elements out of %d total' % (ent[1],len(fbib),len(bibs))
  292.         sys.stderr.flush()
  293.        
  294.         webpg += u'<h2> <a name="' + unicode(ent[1]) + u'" > ' + unicode(ent[1]) + u' </a></h2>\n'
  295. #        webpg += u'<ol reversed="reversed">'
  296.  
  297.         numb = len(fbib)
  298.         byear = referencesByYear(fbib)
  299.         years = byear.keys()
  300.         years.sort(reverse=True)
  301.  
  302.         for y in years:
  303.             if ent[2] == 'byYear':
  304.                 webpg += u'<h3> <span> '+ unicode(str(y)) + u'</span></h3>\n'
  305. #                fp.write( '<h3> '+ y + '</h3>\n' )
  306.  
  307.             for r in byear[y]:
  308. #                fp.write('\n' + toHTML(r) + '\n')
  309.                 webpg += u'\n' + toHTML(r, numb) + u'\n'
  310.                 numb -=1
  311.  
  312. #        webpg += u'</ol>'
  313.  
  314.  
  315. #    fp.write ( posamble() )
  316.     webpg += posamble()
  317.  
  318.  
  319. #    fp.close()
  320.     return webpg
  321.  
  322.  
  323. if __name__ == "__main__":
  324.     if len(sys.argv) != 2:
  325.         print '\n' +  sys.argv[0] + ': requires name of the bibtex file!\n'
  326.         sys.exit()
  327.     fname = sys.argv[1]
  328.     bibfile = codecs.open(fname,encoding='latin_1').read()
  329.     bibs = parseBibtex( bibfile )
  330.     s = generateWebpage( bibs )
  331.  
  332.     print s.encode('latin_1')

Raw Paste

Login or Register to edit or fork this paste. It's free.