-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtodb.py
231 lines (200 loc) · 8.36 KB
/
todb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
#!/usr/bin/env python
#-*- coding:utf-8 -*-
#
# Methodes pour ajouter le resultat de requetes a la base de donnees
from allmusic import *
import sqlite3 as lite
import MySQLdb as mdb
import sys
def artistToDB(artist,cur):
artistId = strToId(artist['ids']['nameId']) # 'MN0000423829' -> 423829
# Verifie si l'artiste n'est pas deja dans la table Artists
cur.execute('SELECT COUNT(*) FROM Artists WHERE id='+str(artistId))
data = cur.fetchone()
if data[0] > 0:
print "*** Artiste deja dans la base"
return
name = artist['name']
print "**********************************************"
print artist['active']
if artist['active'] == None:
active = []
else:
active = [int(d[:-1]) for d in artist['active']] # '1960s','1970s',... -> 1960, 1970,...
birthDate,birthPlace='NULL','NULL'
if artist.has_key('birth') and artist['birth'] != None:
if artist['birth'].has_key('date') and artist['birth']['date'] != None:
birthDate = artist['birth']['date']
if artist['birth'].has_key('place') and artist['birth']['place'] != None:
birthPlace = artist['birth']['place']
print 'birthDate=%s, birthPlace=%s' % (birthDate,birthPlace)
country = 'NULL'
if artist.has_key('country') and artist['country'] != None:
country = artist['country']
deathDate,deathPlace='NULL','NULL'
if artist.has_key('death') and artist['death'] != None:
if artist['death'].has_key('date') and artist['death']['date'] != None:
deathDate = artist['death']['date']
if artist['death'].has_key('place') and artist['death']['place'] != None:
deathPlace = artist['death']['place']
headlineBio = 'NULL'
if artist.has_key('headlineBio') and artist['headlineBio'] != None:
headlineBio = artist['headlineBio'].encode('utf-8')
print("INSERT INTO Artists VALUES(?,?,?,?,?,?,?,?)" , (artistId, name, birthDate, birthPlace, deathDate, deathPlace, country, headlineBio))
#le buffer est nécessaire dans certains cas pour des problemes d'encoding
cur.execute("INSERT INTO Artists VALUES(?,?,?,?,?,?,?,?)" , (artistId, name, birthDate, birthPlace, deathDate, deathPlace, country, buffer(headlineBio)))
#le buffer est nécessaire dans certains cas pour des problemes d'encoding
data = cur.fetchone()
# Ajoute les annees d'activite a la table ActiveYears
for decade in active:
cur.execute(('INSERT INTO ActiveYears VALUES(?,?)'),(artistId, decade))
data = cur.fetchone()
# Ajoute les genres de l'artiste a la table ArtistGenres
musicGenres = artist['musicGenres']
print musicGenres
if musicGenres != None:
for musicGenre in musicGenres:
genreId = strToId(musicGenre['id'])
weight = int(musicGenre['weight'])
# Verifie que le genre est deja dans la table des genres
cur.execute('SELECT COUNT(*) FROM Genres WHERE id='+str(genreId))
data = cur.fetchone()
if data[0] == 0:
# Il faut d'abord ajouter le genre a la table des genres
genreName = musicGenre['name']
cur.execute('INSERT INTO Genres VALUES(?,?)', (genreId, genreName))
data = cur.fetchone()
cur.execute(('INSERT INTO ArtistGenres VALUES(?,?,?)'), (artistId, genreId, weight))
data = cur.fetchone()
def isAlbumInDb(albumId,cur):
# Verifie si l'artiste n'est pas deja dans la table Artists
cur.execute('SELECT COUNT(*) FROM Albums WHERE id='+str(albumId))
data = cur.fetchone()
if data[0] > 0:
print "*** Album deja dans la base ", albumId
return True
else:
return False
def albumToDB(album,cur):
albumId = strToId(album['ids']['albumId']) # 'MW0000187921' -> 187921
if isAlbumInDb(albumId,cur) == True:
return
title = album['title']
originalReleaseDate = 'NULL'
if album.has_key('year') and album['year'] != '': # API v1
originalReleaseDate = album['year']
elif album.has_key('originalReleaseDate') and album['originalReleaseDate'] != None: # API v2.1
originalReleaseDate = album['originalReleaseDate']
label = 'NULL' # API v1
if album.has_key('label') and album['label'] != '':
label = album['label']
atype = 'NULL' # API v1
if album.has_key('atype'):
atype = album['type']
rating = int(album['rating'])
duration = 'NULL' # Seulement dans l'API v2.1
if album.has_key('duration'):
duration = album['duration']
# Execute la requete d'insertion dans Albums
cur.execute("INSERT INTO Albums VALUES(?,?,?,?,?,?,?)", (albumId, title, originalReleaseDate, label, atype, rating, duration))
data = cur.fetchone()
# Ajoute les flags de l'album a la table AlbumFlags
flags = album['flags']
if flags != None:
for flag in flags:
cur.execute("INSERT INTO AlbumFlags VALUES(?,?)", (albumId, flag))
data = cur.fetchone()
# Ajoute les artistes principaux de l'album
# a la table AlbumPrimaryArtists (API v2.1)
if album.has_key('primaryArtists'):
artists = album['primaryArtists']
for artist in artists:
artistId = strToId(artist['id'])
# Verifie que cet artiste est deja dans la table Artists
cur.execute('SELECT COUNT(*) FROM Artists WHERE id='+str(artistId))
data = cur.fetchone()
if data[0] == 0:
# Il faut d'abord ajouter l'artiste a la table Artists
artistResult = reqArtistById(artistId)
artistToDB(artistResult,cur)
cur.execute(("INSERT INTO AlbumPrimaryArtists VALUES(?,?)"), (albumId, artistId))
data = cur.fetchone()
# Ajoute les genres de l'album la table AlbumGenres (API v2.1)
if album.has_key('genres'):
genres = album['genres']
if genres != None:
for genre in genres:
genreId = strToId(genre['id'])
weight = int(genre['weight'])
# Verifie que le genre est deja dans la table Genres
cur.execute('SELECT COUNT(*) FROM Genres WHERE id='+str(genreId))
data = cur.fetchone()
if data[0] == 0:
# Il faut d'abord ajouter le genre a la table Genres
genreName = genre['name']
cur.execute("INSERT INTO Genres VALUES(?,?)", (genreId, genreName))
data = cur.fetchone()
cur.execute("INSERT INTO AlbumGenres VALUES(?,?,?)", (albumId, genreId, weight))
data = cur.fetchone()
# TODO ajouter les primaryArtists dans une table AlbumPrimaryArtists
def creditToDB(credit,albumId,cur, con):
if albumId == None:
print '__ERR : l\'id de l\'album doit etre specifie'
return
print credit
nameId = strToId(credit['id']) # 'MN0000423829' -> 423829
# Verifie si cet artiste se trouve deja dans la table Artists
cur.execute('SELECT COUNT(*) FROM Artists WHERE id='+str(nameId))
data = cur.fetchone()
if data[0] == 0:
# Il faut d'abord ajouter l'artiste a la table Artists
artist = reqArtistById(nameId)
if artist == None:
print '__ERR : impossible de trouver l\'artiste '+nameIdToStr(nameId)+' sur allmusic !'
return
artistToDB(artist,cur)
#name = '"'+credit['name'].encode('utf-8')+'"'
'''
ctype = credit['type'] # un caractere
cur.execute("INSERT INTO CreditTypes VALUES(?,?,?)", (albumId, nameId, ctype))
data = cur.fetchone()
'''
# Ajoute les jobs effectues sur l'album dans la table CreditJobs
jobs = map(lambda x : x.strip(),credit['credit'].split(','))
for job in jobs:
jobLabel = (job,)
print job
cur.execute("SELECT COUNT(jobId) FROM jobs WHERE job=?", jobLabel)
data = cur.fetchone()
if data[0] == 0:
cur.execute("INSERT INTO Jobs (job) VALUES(?)", jobLabel)
print("INSERT INTO Jobs (job) VALUES(?)", jobLabel)
con.commit()
cur.execute("SELECT jobId FROM jobs WHERE job=?", jobLabel)
data = cur.fetchone()
if data[0] == 0:
print "**********************************"
print jobLabel
print "+++++++++ ", albumId,nameId,data[0]
else:
try:
print("INSERT INTO Credits VALUES(?,?,?)", (albumId,nameId,data[0]))
cur.execute("INSERT INTO Credits VALUES(?,?,?)", (albumId,nameId,data[0]))
except :
print albumId,nameId,data[0]
print "---- Credit deja insere"
con.commit()
def searchResultsToDB(results):
con = lite.connect('data/allmusic.db')
#con = mdb.connect('localhost', 'root', '', 'jazzgraph');
with con:
cur = con.cursor()
for result in results:
if result['type'] == 'artist':
artistToDB(result['name'],cur)
elif result['type'] == 'album':
albumToDB(result['album'],cur)
else:
print '__ERR : type de recherche non pris en charge'
con.commit()
con.close()