Nov-26-2017, 04:09 PM
I have this code below that iterate through some tracks. And then for each track I want to use the musixmatch api to get and print the lyrics of the track based on the artist name and track name.
code that iterete trough some tracks and print the lyrics:
The print(tracks) returns in this format:
{12: {'trackID': 12, 'title': 'Achtung Baby', 'number': '1', 'artist': 'U2', 'album': 'Achtung Baby', 'albumID': 2, 'duration': '291'},...
When the code exuted the lyrics for the firsts tracks are printed, but then it appears an error:
Traceback (most recent call last):
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 239, in <module>
print(song_lyric(title, artist))
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 72, in song_lyric
lyrics_tracking(tracking_url)
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 79, in lyrics_tracking
request = urllib.request.Request(querystring)
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 329, in __init__
self.full_url = url
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 355, in full_url
self._parse()
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 384, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: ''
Do you know why this error is appearing? The request = urllib.request.Request(querystring) in the code should be ok because it work for some artists.
The methods to get the lyrics from musixmatch are public available:
code that iterete trough some tracks and print the lyrics:
for i, v in tracks.items(): artist = tracks[i]['artist'].replace(" ", "+") title = tracks[i]['title'].replace(" ", "+") print(tracks) print(song_lyric(title, artist))
The print(tracks) returns in this format:
{12: {'trackID': 12, 'title': 'Achtung Baby', 'number': '1', 'artist': 'U2', 'album': 'Achtung Baby', 'albumID': 2, 'duration': '291'},...
When the code exuted the lyrics for the firsts tracks are printed, but then it appears an error:
Traceback (most recent call last):
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 239, in <module>
print(song_lyric(title, artist))
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 72, in song_lyric
lyrics_tracking(tracking_url)
File "C:/Users/Ozzy/PycharmProjects/getData/getData.py", line 79, in lyrics_tracking
request = urllib.request.Request(querystring)
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 329, in __init__
self.full_url = url
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 355, in full_url
self._parse()
File "C:\Users\Ozzy\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 384, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: ''
Do you know why this error is appearing? The request = urllib.request.Request(querystring) in the code should be ok because it work for some artists.
The methods to get the lyrics from musixmatch are public available:
def song_lyric(song_name, artist_name): while True: querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote( song_name) + "&q_artist=" + urllib.parse.quote( artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1" # matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao request = urllib.request.Request(querystring) # request.add_header("Authorization", "Bearer " + client_access_token) request.add_header("User-Agent", "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned while True: try: response = urllib.request.urlopen(request, timeout=4) # timeout set to 4 seconds; automatically retries if times out raw = response.read() except socket.timeout: print("Timeout raised and caught") continue break json_obj = json.loads(raw.decode('utf-8')) body = json_obj["message"]["body"]["lyrics"]["lyrics_body"] copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"] tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"] #print(tracking_url) lyrics_tracking(tracking_url) return (body + "\n\n" + copyright) def lyrics_tracking(tracking_url): while True: querystring = tracking_url request = urllib.request.Request(querystring) # request.add_header("Authorization", "Bearer " + client_access_token) request.add_header("User-Agent", "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned try: response = urllib.request.urlopen(request, timeout=4) # timeout set to 4 seconds; automatically retries if times out raw = response.read() except socket.timeout: print("Timeout raised and caught") continue break print(raw)Full working exemple that reproduces the error:
import requests import json import urllib.request, urllib.error, urllib.parse import socket apikey_musixmatch = '0b4a363bbd71974c2634837d5b5d1d9a' #generated for the example apiurl_musixmatch = 'http://api.musixmatch.com/ws/1.1/' api_key = "b088cbedecd40b35dd89e90f55227ac2" #generated for the example def song_lyric(song_name, artist_name): while True: querystring = apiurl_musixmatch + "matcher.lyrics.get?q_track=" + urllib.parse.quote( song_name) + "&q_artist=" + urllib.parse.quote( artist_name) + "&apikey=" + apikey_musixmatch + "&format=json&f_has_lyrics=1" # matcher.lyrics.get?q_track=sexy%20and%20i%20know%20it&q_artist=lmfao request = urllib.request.Request(querystring) # request.add_header("Authorization", "Bearer " + client_access_token) request.add_header("User-Agent", "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned while True: try: response = urllib.request.urlopen(request, timeout=4) # timeout set to 4 seconds; automatically retries if times out raw = response.read() except socket.timeout: print("Timeout raised and caught") continue break json_obj = json.loads(raw.decode('utf-8')) body = json_obj["message"]["body"]["lyrics"]["lyrics_body"] copyright = json_obj["message"]["body"]["lyrics"]["lyrics_copyright"] tracking_url = json_obj["message"]["body"]["lyrics"]["html_tracking_url"] print("Tracking_url====================" +tracking_url + "==================================") lyrics_tracking(tracking_url) return (body + "\n\n" + copyright) def lyrics_tracking(tracking_url): while True: querystring = tracking_url request = urllib.request.Request(querystring) # request.add_header("Authorization", "Bearer " + client_access_token) request.add_header("User-Agent", "curl/7.9.8 (i686-pc-linux-gnu) libcurl 7.9.8 (OpenSSL 0.9.6b) (ipv6 enabled)") # Must include user agent of some sort, otherwise 403 returned try: response = urllib.request.urlopen(request, timeout=4) # timeout set to 4 seconds; automatically retries if times out raw = response.read() except socket.timeout: print("Timeout raised and caught") continue break print(raw) ID = 0 #get top artists from country artists = {} for i in range(2, 3): artists_response = requests.get( 'http://ws.audioscrobbler.com/2.0/?method=geo.gettopartists&country=spain&format=json&page=' + str(i) + '&api_key=' + api_key) artists_data = artists_response.json() for artist in artists_data["topartists"]["artist"]: name = artist["name"] url = artist["url"] if ID > 1: continue artists[ID] = {} artists[ID]['ID'] = ID artists[ID]['name'] = name ID += 1 for i, v in artists.items(): chosen = artists[i]['name'].replace(" ", "+") artist_response = requests.get( 'http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&format=json&artist=' + chosen + '&api_key=' + api_key) artist_data = artist_response.json() # get top albums of the artists albums = {} for i, v in artists.items(): chosen = artists[i]['name'].replace(" ", "+") topalbums_response = requests.get( 'http://ws.audioscrobbler.com/2.0/?method=artist.gettopalbums&format=json&artist=' + chosen + '&api_key=' + api_key + '&limit=5') albums_data = topalbums_response.json() for album in albums_data['topalbums']['album']: name = album["name"] url = album["url"] albums[ID] = {} albums[ID]['ID'] = ID albums[ID]['artist'] = artists[i]['name'] albums[ID]['artistID'] = artists[i]['ID'] albums[ID]['name'] = name ID += 1 # Get tracks of the album tracks = {} for i, v in albums.items(): artist = albums[i]['artist'].replace(" ", "+") name = albums[i]['name'].replace(" ", "+") album_response_data = requests.get( 'http://ws.audioscrobbler.com/2.0/?method=album.getinfo&format=json&api_key=' + api_key + '&artist=' + artist + '&album=' + name) album_response = album_response_data.json() for album in album_response['album']['tracks']['track']: title = album['name'] tracks[ID] = {} tracks[ID]['trackID'] = ID tracks[ID]['title'] = title tracks[ID]['artist'] = albums[i]['artist'] tracks[ID]['album'] = albums[i]['name'] tracks[ID]['albumID'] = albums[i]['ID'] ID += 1 for i, v in tracks.items(): artist = tracks[i]['artist'].replace(" ", "+") title = tracks[i]['title'].replace(" ", "+") # print the lyric of each track print(song_lyric(title, artist))