PYTHON   11

get_artists_origin.py

Guest on 3rd June 2021 05:38:50 AM

  1. from base import *
  2.  
  3. filepath_origin = './data/artists_origin.csv'
  4. filepath_no_data_origin = './data/artists_origin_nodata.csv'
  5. filepath_manual_origin = './data/artists_origin_manual.csv'
  6. filepath_countries_topartists = './data/countries_topartists1000.json'
  7.  
  8. countries = json.load(open(filepath_countries_topartists))
  9.  
  10. countries_no = len(countries)
  11. processed_no = 0
  12.  
  13. origin_all = {}
  14.  
  15. # previously stored
  16. with io.open(filepath_origin, 'rb') as f:
  17.     reader = csv.reader(f, delimiter='\t', lineterminator='\n')
  18.     for row in reader:
  19.         if len(row) == 3:
  20.             origin_all[row[0]] = {
  21.                 "mbid": row[0],
  22.                 "name": row[1],
  23.                 "origin": row[2]
  24.             }
  25.  
  26. # manually added
  27. with io.open(filepath_manual_origin, 'rb') as f:
  28.     reader = csv.reader(f, delimiter='\t', lineterminator='\n')
  29.     for row in reader:
  30.         print(row)
  31.         if len(row) == 3:
  32.             origin_all[row[0]] = {
  33.                 "mbid": row[0],
  34.                 "name": row[1],
  35.                 "origin": row[2]
  36.             }
  37.  
  38.  
  39. def store_origins():
  40.     with io.open(filepath_origin, 'wb') as f:
  41.         writer = csv.writer(f, delimiter='\t', lineterminator='\n')
  42.         for ai in origin_all:
  43.             artist = origin_all[ai]
  44.             if artist['origin']:
  45.                 try:
  46.                     writer.writerow([
  47.                         clean_str(artist['mbid']),
  48.                         clean_str(artist['name']),
  49.                         clean_str(artist['origin'])
  50.                     ])
  51.                 except Exception as e:
  52.                     print('problem storing artist' + artist['name'])
  53.  
  54.  
  55. for country in countries:
  56.     processed_no = processed_no + 1
  57.  
  58.     print ''
  59.     print 'processing country: ' + country['name'] + ', ' + str(
  60.         processed_no) + ' / ' + str(countries_no)
  61.  
  62.     for ai, artist in enumerate(country['artists']):
  63.         if ai % 100 == 0 and ai != 0:
  64.             print('progress: (' + country['name'] + ') - ' +
  65.                   str(float(ai) / float(1000) * 100) + '%')
  66.  
  67.         if artist['mbid'] not in origin_all:
  68.             try:
  69.                 if artist['mbid']:
  70.                     mb = request_mb(artist['mbid'])
  71.                     if 'error' in mb:
  72.                         time.sleep(0.5)
  73.                         mb = request_mb(artist['mbid'])
  74.  
  75.                     if 'country' in mb:
  76.                         origin = mb['country']
  77.  
  78.                         origin_all[artist['mbid']] = {
  79.                             "mbid": artist['mbid'],
  80.                             "name": artist['name'],
  81.                             "origin": origin
  82.                         }
  83.  
  84.             except Exception as e:
  85.                 print('artist without origin ' + artist['name'] + ' not found')
  86.  
  87.     store_origins()
  88.  
  89. with io.open(filepath_no_data_origin, 'wb') as f:
  90.     writer = csv.writer(f, delimiter='\t', lineterminator='\n')
  91.     for ai in origin_all:
  92.         artist = origin_all[ai]
  93.         if not artist['origin']:
  94.             try:
  95.                 writer.writerow([
  96.                     clean_str(artist['mbid']),
  97.                     clean_str(artist['name']),
  98.                 ])
  99.  
  100.             except Exception as e:
  101.                 print('problem storing artist' + artist['name'])

Raw Paste


Login or Register to edit or fork this paste. It's free.