PYTHON   11

new processBibTeX py

Guest on 23rd August 2022 10:54:24 AM

  1. #! /usr/bin/env python
  2. #
  3.  
  4. import sys
  5. import bibtexparser
  6. from   bibtexparser.bparser import BibTexParser
  7. import bibtexparser.customization
  8.  
  9. areas = { u'cv':(u'Computer Vision', u'Images/Vision-100.png'),
  10.           u'ai':(u'Artificial Intelligence', u'Images/AI-100.png'),
  11.           u'ml':(u'Machine Learning', u'Images/ML-80.png'),
  12.           u'forensics':(u'Forensics', u'Images/Forensics-80.png'),
  13.           u'access':(u'Accessibility', u'Images/Access-80.png'),
  14.           u'cg':(u'Computer Graphics', u'Images/Graphics-80.png'),
  15.           u'asl':(u'Sign Language', u'Images/ASL-100.png'),
  16.           u'wavelets':(u'Wavelets', u'Images/Wavelet-100.png'),
  17.           u'sport': (u'Sport', u'Images/Sport-80.png'),
  18.           u'uncert': (u'Uncertanties', u'Images/Uncert-100.png'),
  19.           u'virtcrd': (u'Virtual Crowds', u'Images/Crowd-80.png'),
  20.           u'med': (u'Biomedical', u'Images/Med-64.png'),
  21.           u'biblio': (u'Bibliometrics', u'Images/Book-80.png')
  22.           }
  23.  
  24. areas_keys = areas.keys()
  25.  
  26.  
  27. def stripnline(record):
  28.     if u"author" in record:
  29.         record[u"author"] = record[u"author"].replace(u'\n', u' ').strip()
  30.  
  31.     if "title" in record:
  32.         record[u"title"] = record[u"title"].replace(u'\n', u' ').strip()
  33.        
  34.     return record
  35.  
  36.  
  37. def customizations(record):
  38.     """Use some functions delivered by the library
  39.  
  40.    :param record: a record
  41.    :returns: -- customized record
  42.    """
  43.     record = bibtexparser.customization.type(record)
  44. #    record = author(record)
  45. #    record = editor(record)
  46. #    record = journal(record)
  47.     record = bibtexparser.customization.keyword(record)
  48. #    record = link(record)
  49. #    record = page_double_hyphen(record)
  50. #    record = doi(record)
  51.     record = stripnline(record)
  52.     return record
  53.  
  54.  
  55. #
  56. #  input is the variable with all the text of the bibtex file
  57. #  output is a list of dictionaries
  58. #
  59. def parseBibtex( bibtex_fname ):
  60.  
  61.     with open(bibtex_fname) as bibtex_file:
  62.         parser = BibTexParser()
  63.         parser.customization = customizations
  64.         bibtex_database = bibtexparser.load(bibtex_file, parser=parser)
  65.  
  66.  
  67.     # fix encoding
  68.     #//    source = parser.checkEncoding(source)
  69.  
  70.     # fix latex special characters, among other things
  71. #    source = parser.preprocess(source) #.encode('utf-8'))
  72.  
  73.     # get list of dictionaries
  74.     return bibtex_database.entries
  75.  
  76.  
  77.  
  78. def toBibtex (ref,
  79.             valid_fields=[u'title', u'editor', u'author', u'journal', u'booktitle',
  80.                           u'publisher', u'school', u'issn', u'volume', u'number',  
  81.                           u'pages', u'year', u'doi', u'pdf'],
  82.               omit_fields=[]):
  83.  
  84.     omit   = [each.lower() for each in omit_fields]
  85.     fields = [each.lower() for each in valid_fields]
  86.  
  87. #    ttable = [('&', r'\&'),
  88. #              ('~', r'\~'),
  89. #              ('—',r'---'),
  90. #              ('–', r'--'),
  91. #              ]
  92.  
  93.     bib_key = ref[u'ID']
  94.     ref_type = ref[u'ENTRYTYPE'].replace(u'Reference','')
  95.     bibtex = u'\n@' + ref_type + u'{' + bib_key + u',\n' # '%s{%s,\n" %  (ref_type, bib_key)
  96.  
  97.     ref_keys = ref.keys()
  98.  
  99.     for k in fields:
  100.         if k in ref_keys and k not in omit:
  101.             if type(ref[k]) == list:
  102.                 nv = ref[k][0]
  103.             else:
  104.                 nv = ref[k]
  105.  
  106. #            for a,b in ttable:
  107. #                nv = nv.replace(a,b)
  108.  
  109.             bibtex = bibtex + u'  '
  110.             bibtex = bibtex + k
  111.             bibtex = bibtex + u' = {'  
  112.             #            bibtex = bibtex + unicode(nv.encode('utf8'))
  113.             #            bibtex = bibtex + unicode(nv, encoding='utf8')
  114.             bibtex = bibtex + nv
  115.             bibtex = bibtex + u'},\n'
  116.            
  117.     # remove trailing command
  118.     bibtex = bibtex[0:-2] + u"}\n"
  119.  
  120.     return bibtex
  121.  
  122.  
  123.  
  124. def toHTML (ref,
  125.             numb=0,
  126.             valid_fields=[u'title', u'editor', u'author', u'journal', u'booktitle',
  127.                           u'publisher', u'school', u'issn', u'volume', u'number',  
  128.                           u'pages', u'year', u'doi', u'pdf', u'abstract'],
  129.             omit_fields=[]):
  130.  
  131.     omit = [each.lower() for each in omit_fields]
  132.     fields = [each.lower() for each in valid_fields]
  133.  
  134.     ref_type = ref[u'ENTRYTYPE'].replace('Reference','').lower()
  135.  
  136.     bib_key = ref[u'ID']
  137.     bid = 'b' + bib_key
  138.     ref_keys = ref.keys()
  139.  
  140.     html = u'<table width="100%" class="pub">\n'
  141.     html += u'<col align="center" width=230px />\n'
  142.     html += u'<col align="left" />\n'
  143.     html += u'<col align="left" />\n'
  144.     html += u'<tr>\n'
  145.     html += u'<td class="keywords" align="center">\n'
  146.     #    html += u'<td class="keywords" align="center" padding="20px">\n'
  147.     if u'keyword' in ref_keys:
  148. #        for k,v in ref[u'keyword'].iteritems():
  149. #            print k, v
  150.         for k in ref[u'keyword']:
  151.             lk = k.lower()
  152.             if lk in areas_keys:
  153.                 html += u'<img class="keyw" src="%s" alt="%s" title="%s" align="middle" />\n' % (areas[lk][1], areas[lk][0], areas[lk][0])
  154.     else:
  155.         html += u'No keywords!\n'
  156.    
  157.     html += u'</td>\n'
  158.  
  159.        
  160.     html += u'<td><div class="%s" name="%s" ><a name="%s">\n' % (ref_type, bib_key, bib_key)
  161.     #    if numb>0:
  162.     #        html += u'%d. \n' % numb
  163.  
  164.  
  165.     for k in fields:
  166.         if k in ref_keys and k not in omit:
  167.             if type(ref[k]) == list:
  168.                 v = ref[k][0]
  169.             else:
  170.                 v = ref[k]
  171.             if numb>0 and k=='title':
  172.                 html += u'<span class="%s">%d. %s </span>\n' % (k, numb, v)
  173.             elif k=='issn':
  174.                 html += u'<span class="%s">ISSN: %s, </span>\n' % (k, v)
  175.             elif k=='volume':
  176.                 html += u'<span class="%s">Vol. %s, </span>\n' % (k, v)
  177.             elif k=='number':
  178.                 html += u'<span class="%s">Num. %s, </span>\n' % (k, v)
  179.             elif k=='pages':
  180.                 html += u'<span class="%s">pgs. %s, </span>\n' % (k, v)
  181.             elif k=='doi':
  182.                 html += u'<span class="%s"> <a href="http://dx.doi.org/%s">doi:%s</a> </span>\n' % (k, v, v)
  183.             elif k == 'pdf':
  184.                 html += u'<span class="pdf"> <a href="papers/%s">PDF.</a> </span>\n' % v
  185.             elif k == 'abstract':
  186.                 html += u'<span class="linkabs"><a href="javascript:unhide(\'%s\');">Show/Hide Abstract.</a></span>\n' % (bid + u'-abs')
  187.                 html += u'<div class="hidden" id="%s"><div class="abstract"><b>Abstract:</b> %s\n</div></div>\n' % (bid + u'-abs', v)
  188.             else:
  189.                 html += u'<span class="%s"> %s </span>\n' % (k, v)
  190.     html += u'<span class="linkbib"><a href="javascript:unhide(\'%s\');">Show/Hide Bibtex.</a></span>\n' % bid
  191.     html += u'<div class="hidden" id="%s"><pre>%s\n</pre></div>\n' % (bid, toBibtex(ref))
  192.  
  193.  
  194.     html += u'\n  </a></div></td>\n'
  195.  
  196.  
  197.     html += u'  </tr>\n'
  198.     html += u'</table>\n\n'
  199.  
  200.     return html
  201.  
  202. def referencesByYear(bibs):
  203.     #    byYears = {first:0, last: 3000}
  204.     byYears = {}
  205.  
  206.     for ref in bibs:
  207.         if u'year' in ref.keys():  
  208.             year = int(ref[u'year'])
  209.             if year not in byYears.keys():
  210.                 byYears[year] = [ ref ]
  211.             else:
  212.                 byYears[year].append(ref)
  213.  
  214.     return byYears
  215.  
  216.  
  217. def preamble():
  218.     a = """<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"><head>
  219.    
  220. <link href="publications.css" type="text/css" rel="stylesheet" />
  221.  
  222. <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
  223.  
  224. <script type="text/javascript">
  225. function unhide(divID) {
  226. var item = document.getElementById(divID);
  227. if (item) {
  228. item.className=(item.className=='hidden')?'unhidden':'hidden';
  229. }
  230. }
  231. </script>
  232.  
  233. <title>Publications</title>
  234.  
  235. </head><body>
  236. <div class="header1"><br />
  237. <table class="header1" cellpadding="0" cellspacing="0">
  238.  <colgroup span="1" width="80%" /><colgroup span="1" width="20%" /><tbody>
  239.    <tr>
  240.      <td>
  241.      <h1>Publications</h1>
  242.      </td>
  243.      <td><!--#include virtual="navigation.html"-->&nbsp; </td>
  244.    </tr>
  245.  </tbody>
  246. </table>
  247. </div>
  248.  
  249. <p>
  250. The full CV, at the CNPq CV repository, is available
  251. <a href="http://lattes.cnpq.br/3575273076267374 ">here</a>
  252. (in portuguese).
  253. </p>"""
  254.     return a
  255.  
  256. def posamble():
  257.     a = """<h2>&nbsp;</h2>
  258.  
  259. <div>
  260. <address>Siome Klein Goldenstein:
  261. [myfirstname](at) ic unicamp br </address>
  262. <!-- hhmts start -->Lastmodified:&nbsp;
  263. <!--#flastmod file="publications.shtml" -->
  264. <!-- hhmts end -->
  265. </div>
  266.  
  267. </body></html>"""
  268.     return a
  269.  
  270.  
  271. def filterType( entry, valid_types ):
  272.     vtypes = [each.lower() for each in valid_types]
  273.     tp = entry[u'ENTRYTYPE']
  274.     tp = tp.lower()
  275.     tp = tp.replace(u'reference','')
  276.  
  277.     return tp in vtypes
  278.    
  279.  
  280.  
  281. def generateWebpage (bibs,
  282.                      pord=[ ([u'article'], u'Journal Publications', u'byYear'),
  283.                             ([u'patent'], u'Patents', None),
  284.                             ([u'book', u'inbook'], u'Books and Book Chapters', None),
  285.                             ([u'inproceedings'], u'Conferences and Workshops', u'byYear'),
  286.                             ([u'incollection'], u'Magazines', None),
  287.                             ([u'phdthesis', u'mastersthesis'], u'Thesis', None)
  288.                           ]
  289.                      ):
  290.  
  291.     webpg = preamble() + u'\n\n'
  292.  
  293.     webpg += u'<br /> <br /> <p style="text-align: center;"> '
  294.     for ent in pord:
  295.       webpg += u'&nbsp;&nbsp;&nbsp;&nbsp;'
  296.       webpg += u'    <a href="#' + unicode(ent[1]) + u'"> ' + unicode(ent[1]) + u'</a> &nbsp;&nbsp;&nbsp;&nbsp;'
  297.     webpg += u' </p"> <br /> <br />'
  298.  
  299.  
  300.     for ent in pord:
  301.         fbib = filter(lambda x: filterType(x, ent[0]), bibs)
  302.         print >>sys.stderr, u'%s: has %d elements out of %d total' % (ent[1],len(fbib),len(bibs))
  303.         sys.stderr.flush()
  304.        
  305.         webpg += u'<h2> <a name="%s" > %s </a></h2>\n' % (ent[1],ent[1])
  306. #        webpg += u'<ol reversed="reversed">'
  307.  
  308.         numb = len(fbib)
  309.         byear = referencesByYear(fbib)
  310.         years = byear.keys()
  311.         years.sort(reverse=True)
  312.  
  313.         for y in years:
  314.             if ent[2] == u'byYear':
  315.                 webpg += u'<h3> <span> %d </span></h3>\n' % y
  316.  
  317.             for r in byear[y]:
  318.                 webpg += u'\n %s \n' % toHTML(r, numb)
  319.                 numb -=1
  320.  
  321.     webpg += posamble()
  322.  
  323.     return webpg
  324.  
  325.  
  326. if __name__ == "__main__":
  327.     if len(sys.argv) != 3:
  328.         print '\n' +  sys.argv[0] + ': requires name of the bibtex file and output file!\n'
  329.         sys.exit()
  330.     fname = sys.argv[1]
  331. #    bibfile = codecs.open(fname,encoding='latin_1').read()
  332.     bibs = parseBibtex( fname )
  333.     s = generateWebpage( bibs )
  334.  
  335.     with open(sys.argv[2],'w') as fout:
  336.         fout.write(s.encode('utf8'))
  337. #    sys.stdout.write(s)
  338. #    print s.encode('latin_1')
  339. #    print (s)

Raw Paste


Login or Register to edit or fork this paste. It's free.