Giter Club home page Giter Club logo

music163-spiders's Introduction

网易云音乐使用了内联框架,除网站的主框架外,其他内容都是嵌在子网页当中,如果直接使用 requests 库是无法爬取到我 们想要的内容的,因此我们需要使用 selenium 的 switch_to_frame()方法来切换到子网页中,这时获取的网页源代码才包含我们需要的内容.详细的爬取思路大家可以参考我的这篇文章网易云音乐评论爬虫(二):爬取全部热门歌曲及其对应的id号 爬取入口是:https://music.163.com/#/discover/artist

一种方法爬取思路同上。另一种方法是直接爬取嵌在iframe里面的子页面(控制面板--Sources--contentFrame--域名文件夹下可找到)。 爬取子frame

由于网易云音乐的歌曲评论是通过Ajax进行加载的,而且加载过程还对参数进行了js混淆加密,使得爬取歌曲全部评论变得困难.笔者对加密过程详细分析之后得出了它的 加密过程:两次 AES 加密得到 params 参数的值,RSA 加密得到 encSeckey 的值.然后使用 Python 进行相应的加密得到对应的参数, 利用得到的加密参数发起带 data的post请求即可获得含有歌曲评论的json格式的数据.详细爬取思路分析请参考:网易云音乐评论爬虫(三):爬取歌曲的全部评论

制作网易云歌手粉丝地域分布热力图,我把它分为两个步骤:1.获取歌手全部的粉丝信息 2.制作歌手粉丝地域分布的热力图

在歌手的个人主页,我们可以找到粉丝数据(通过Ajax进行加载),这些数据是进行了加密的,加密方式同歌曲评论的加密方式,加密方式的分析请参考:网易云音乐评论爬虫(三):爬取歌曲的全部评论,只要分析出其中变量的变化规律即能获取到这些数据。 再通过粉丝的id号,爬取粉丝个人主页中的地理位置信息。

由于上一步获取到的位置是城市编码,因此我们需要将城市编码转换为中文的位置信息,再通过百度的API将位置转换为经纬度,使用百度API需要先获取服务密钥(ak),最后把经纬度,同一个地方的粉丝 数量按字典格式插入到热力图的模板HTML代码中去!热力图的H5模板文件格式:

<!DOCTYPE html>
<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta name="viewport" content="initial-scale=1.0, user-scalable=no" />
    <script type="text/javascript" src="http://api.map.baidu.com/api?v=2.0&ak=这里填写你的百度服务密钥(ak)"></script>
    <script type="text/javascript" src="http://api.map.baidu.com/library/Heatmap/2.0/src/Heatmap_min.js"></script>
    <title>热力图功能示例</title>
    <style type="text/css">
		ul,li{list-style: none;margin:0;padding:0;float:left;}
		html{height:100%}
		body{height:100%;margin:0px;padding:0px;font-family:"微软雅黑";}
		#container{height:500px;width:100%;}
		#r-result{width:100%;}
    </style>
</head>
<body>
	<div id="container"></div>
	<div id="r-result">
		<input type="button"  onclick="openHeatmap();" value="显示热力图"/><input type="button"  onclick="closeHeatmap();" value="关闭热力图"/>
	</div>
</body>
</html>
<script type="text/javascript">
    var map = new BMap.Map("container");          // 创建地图实例

    var point = new BMap.Point(113.418261, 39.921984);
    map.centerAndZoom(point, 5);             // 初始化地图,设置中心点坐标和地图级别
    map.enableScrollWheelZoom(); // 允许滚轮缩放

    var points =[
{"lat":39.92998577808024,"lng":116.39564503787867,"count":4000.57},
{"lat":39.143929903310074,"lng":117.21081309155257,"count":2300.01},
{"lat":38.048958314615454,"lng":114.52208184420766,"count":1015.77},
{"lat":37.89027705396754,"lng":112.5508635890553,"count":680.13},
{"lat":40.828318873081585,"lng":111.6603505200542,"count":520.52},
{"lat":41.808644783515746,"lng":123.43279092160505,"count":709.67},
{"lat":38.94870993830429,"lng":121.59347778143518,"count":535.17},
{"lat":43.89833760709784,"lng":125.31364242720072,"count":596.65},
{"lat":45.7732246332393,"lng":126.65771685544611,"count":526.13},
{"lat":31.24916171001514,"lng":121.48789948569473,"count":3709.03},
{"lat":32.05723550180587,"lng":118.77807440802562,"count":1845.6},
{"lat":30.259244461536102,"lng":120.2193754157201,"count":2606.63},
{"lat":29.885258965918055,"lng":121.57900597258933,"count":1270.33},
{"lat":31.86694226068694,"lng":117.28269909168304,"count":1352.59},
{"lat":26.04712549657293,"lng":119.33022110712668,"count":1679.44},
{"lat":24.489230612469232,"lng":118.10388604566381,"count":765.8},
{"lat":28.689578000141147,"lng":115.89352754583604,"count":674.6},
{"lat":36.68278472716141,"lng":117.02496706629023,"count":1164.14},
{"lat":36.10521490127382,"lng":120.38442818368189,"count":1369.14},
{"lat":34.756610064140254,"lng":113.64964384986449,"count":2778.95},
{"lat":30.58108412692075,"lng":114.31620010268132,"count":2517.44},
{"lat":28.21347823085322,"lng":112.9793527876505,"count":1266.63},
{"lat":23.12004910207623,"lng":113.30764967515182,"count":2540.85},
{"lat":22.546053546205247,"lng":114.0259736573215,"count":1756.52},
{"lat":22.80649293560261,"lng":108.29723355586638,"count":854},
{"lat":20.022071276952243,"lng":110.3308018483363,"count":551.29},
{"lat":29.54460610888615,"lng":106.53063501341296,"count":3725.95},
{"lat":30.679942845419564,"lng":104.06792346330406,"count":2641.14},
{"lat":26.62990674144093,"lng":106.7091770961758,"count":923.26},
{"lat":25.049153100453157,"lng":102.71460113878045,"count":1530.5},
{"lat":29.662557062056536,"lng":91.11189089598402,"count":337.69},
{"lat":42.98636494637781,"lng":125.15014857862096,"count":1949.5},
{"lat":36.06422552504259,"lng":103.8233054407292,"count":370.39},
{"lat":36.640738611957964,"lng":101.76792098980276,"count":316.5},
{"lat":38.50262101187604,"lng":106.2064786078384,"count":474.94},
{"lat":43.84038034721766,"lng":87.56498774111579,"count":344.71},
   ];

    if(!isSupportCanvas()){
    	alert('热力图目前只支持有canvas支持的浏览器,您所使用的浏览器不能使用热力图功能~')
    }
	//详细的参数,可以查看heatmap.js的文档 https://github.com/pa7/heatmap.js/blob/master/README.md
	//参数说明如下:
	/* visible 热力图是否显示,默认为true
     * opacity 热力的透明度,1-100
     * radius 势力图的每个点的半径大小
     * gradient  {JSON} 热力图的渐变区间 . gradient如下所示
     *	{
			.2:'rgb(0, 255, 255)',
			.5:'rgb(0, 110, 255)',
			.8:'rgb(100, 0, 255)'
		}
		其中 key 表示插值的位置, 0~1.
		    value 为颜色值.
     */
	heatmapOverlay = new BMapLib.HeatmapOverlay({"radius":20});
	map.addOverlay(heatmapOverlay);
	heatmapOverlay.setDataSet({data:points,max:100});
	//是否显示热力图
    function openHeatmap(){
        heatmapOverlay.show();
    }
	function closeHeatmap(){
        heatmapOverlay.hide();
    }
	closeHeatmap();
    function setGradient(){
     	/*格式如下所示:
		{
	  		0:'rgb(102, 255, 0)',
	 	 	.5:'rgb(255, 170, 0)',
		  	1:'rgb(255, 0, 0)'
		}*/
     	var gradient = {};
     	var colors = document.querySelectorAll("input[type='color']");
     	colors = [].slice.call(colors,0);
     	colors.forEach(function(ele){
			gradient[ele.getAttribute("data-key")] = ele.value;
     	});
        heatmapOverlay.setOptions({"gradient":gradient});
    }
	//判断浏览区是否支持canvas
    function isSupportCanvas(){
        var elem = document.createElement('canvas');
        return !!(elem.getContext && elem.getContext('2d'));
    }
</script>

我们生成的字典数据({"lat":23.043023815368237,"lng":113.76343399075655,"count":204})只需要一个一个插入到point列表中去就行, 字典格式为{"lat": 纬度, "lng": 经度, "count":这个地理位置的粉丝数量}。最后把这个HTML文件在浏览器中打开,再点击显示热力图,缩放地 图便能看到歌手粉丝地域分布的热力图。以薛之谦为例,我们来看一下他的部分粉丝地域分布的热力图:薛之谦粉丝地域分布热力图

music163-spiders's People

Contributors

zyingzhou avatar

Stargazers

 avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar

Watchers

 avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar

music163-spiders's Issues

歌词获取失败了

直接运行代码中的例子,get_lyric.py
会提示下面的错误。
{'code': -460, 'msg': 'Cheating'}

hello?

driver.switch_to.frame("g_iframe")
报错请问是为什么呀

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.