获得某易云音乐 对应歌单下的所有歌曲的歌曲、专辑图片、歌手图片、lrc歌词
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import urllib.request
import json
import re
headers = {
'Referer': 'http://music.163.com/',
'Host': 'music.163.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
}
play_url = 'http://music.163.com/playlist?id=752199922'
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome('D:/chromedriver_win32/chromedriver.exe', chrome_options=chrome_options)
browser.get(play_url)
browser.switch_to.frame('contentFrame')
trList = browser.find_element_by_class_name('m-table ').find_element_by_css_selector('tbody').find_elements_by_tag_name(
'tr')
assets = []
for i in range(len(trList)):
asset = []
tdList = trList[i].find_elements_by_tag_name('td')
musicID = str(tdList[1].find_element_by_tag_name('a').get_attribute('href')).split('?')[1]
musicName = tdList[1].find_element_by_tag_name('a').find_element_by_tag_name('b').get_attribute('title')
singer = tdList[3].find_elements_by_tag_name('a')
singerList = []
for j in range(len(singer)):
singerUrl = singer[j].get_attribute('href')
singerList.append(singerUrl)
albumUrl = tdList[4].find_element_by_tag_name('a').get_attribute('href')
musicUrl = 'http://music.163.com/song/media/outer/url?' + musicID + '.mp3'
lyricUrl = 'http://music.163.com/api/song/lyric?' + musicID + '&lv=1&kv=1&tv=-1'
asset.append(musicName)
asset.append(musicUrl)
asset.append(lyricUrl)
asset.append(albumUrl)
asset.append(singerList)
assets.append(asset)
for i in assets:
musicName = i[0]
musicDownUrl = i[1]
lrcDownUrl = i[2]
albumUrl = i[3]
singerUrl = i[4]
try:
urllib.request.urlretrieve(musicDownUrl, 'D:/歌曲/%s.mp3' % musicName)
html = requests.get(lrcDownUrl, headers=headers).text
json_obj = json.loads(html)
try:
lrc = json_obj['lrc']['lyric']
with open('D:歌词/{}.lrc'.format(musicName), 'a', encoding='utf-8') as fp:
fp.write(lrc)
except KeyError as e:
pass
html = requests.get(albumUrl, headers=headers).text
img = re.findall(r'<meta property="og:image" content="(.*?)" />', html, re.DOTALL)[0]
req = requests.get(img)
with open('D:/专辑图片/{}.jpg'.format(musicName), 'wb') as fp:
fp.write(req.content)
html = requests.get(albumUrl, headers=headers).text
img = re.findall(r'<meta property="og:image" content="(.*?)" />', html, re.DOTALL)[0]
req = requests.get(img)
albumName = re.findall(r'meta property="og:title" content="(.*?)" />', html, re.DOTALL)[0]
with open('D:/专辑图片/{}.jpg'.format(albumName), 'wb') as fp:
fp.write(req.content)
for k in singerUrl:
html = requests.get(k, headers=headers).text
img = re.findall(r'<meta property="og:image" content="(.*?)" />', html, re.DOTALL)[0]
req = requests.get(img)
singerName = re.findall(r'<meta name="keywords" content="(.*?)" />', html, re.DOTALL)[0]
with open('D:/歌手图片/{}.jpg'.format(singerName), 'wb') as fp:
fp.write(req.content)
print('下载成功')
except:
print('下载失败')
只不过感觉下载速度有点慢,才60个资源就下了好久,不知道是不是用了chromedriver的原因
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)