基于Spark网易云音乐数据分析
提示:适合用于课程设计或毕业设计,工作量达标,源码开放
项目分享:
https://gitee.com/asoonis/feed-neo
1 系统模块包含爬虫,Scala代码,Spark,Hadoop,ElasticSearch,logstash,Flume,echarts,log4j
emotional_analysis_spider 爬虫模块 emotional_analysis_web 数据处理模块(Scala代码) emotional_analysis_recommend 推荐模块目前还未开发 emotional_analysis_web 报表展现模块
Crawler-sample-data 爬虫抓取样例数据 MySQL-yuncun MySQL数据库结构及数据 config ES logStash Flume 相关配置文件 mlib-text 机器学习分类测试相关数据 statistical-static-txt SparkSQL相关关联静态数据
2 分析内容1.图计算 2.机器学习预测歌曲分类 3.评论词云 4.评论时间段 5.评论top榜 6.热歌top榜 7.用户性别比例 8.用户星座比例 9.用户年龄比例 10.用户全国地理分布 11.热评搜索等
3 界面展示4 项目源码 package org.bianqi.wangyi.web.es.test;import java.net.InetSocketAddress;import java.util.ArrayList;import java.util.Date;import java.util.HashMap;import java.util.List;import java.util.Map;import java.util.Map.Entry;import java.util.Set;import org.bianqi.web.wangyi.utils.ESClientUtils;import org.bianqi.web.wangyi.utils.PageBean;import org.bianqi.web.wangyi.utils.SearchUtils;import org.elasticsearch.action.get.GetResponse;import org.elasticsearch.action.index.IndexRequest;import org.elasticsearch.action.index.IndexResponse;import org.elasticsearch.action.search.SearchRequestBuilder;import org.elasticsearch.action.search.SearchResponse;import org.elasticsearch.action.search.SearchType;import org.elasticsearch.client.transport.TransportClient;import org.elasticsearch.cluster.node.DiscoveryNode;import org.elasticsearch.common.settings.Settings;import org.elasticsearch.common.text.Text;import org.elasticsearch.common.transport.InetSocketTransportAddress;import org.elasticsearch.common.xcontent.XContentBuilder;import org.elasticsearch.common.xcontent.XContentFactory;import org.elasticsearch.index.query.BoolQueryBuilder;import org.elasticsearch.index.query.QueryBuilder;import org.elasticsearch.index.query.QueryBuilders;import org.elasticsearch.search.SearchHit;import org.elasticsearch.search.SearchHits;import org.elasticsearch.search.highlight.HighlightBuilder;import org.elasticsearch.search.highlight.HighlightField;import org.junit.Before;import org.junit.Test;import com.github.pagehelper.PageHelper;import com.github.pagehelper.PageInfo;/** * ES客户端 *
Title: ESClientTest
*Description:
*School: qiqihar university
* @authorBQ * @date2018年2月23日下午6:46:19 * @version 1.0.0 */public class ESClientTest {private TransportClient client;private IndexRequest source;@Beforepublic void before11() throws Exception {Settings settings = Settings.settingsBuilder().put("client.transport.ignore_cluster_name", true) // 忽略集群名字验证,.build();client = TransportClient.builder().settings(settings).build().addTransportAddress(new InetSocketTransportAddress(new InetSocketAddress("192.168.254.215", 9300)));System.out.println("success connect");}/** * 查看集群信息 */ @Test public void testInfo() { List nodes = client.connectedNodes(); for (DiscoveryNode node : nodes) { System.out.println(node.getHostAddress()); } } /** * 组织json串, 方式1,直接拼接 */ public String createJson1() { String json = "{" + ""user":"kimchy"," + ""postDate":"2013-01-30"," + ""message":"trying out Elasticsearch"" + "}"; return json; } /** * 使用map创建json */ public Map createJson2() { Map json = new HashMap(); json.put("user", "kimchy"); json.put("postDate", new Date()); json.put("message", "trying out elasticsearch"); return json; } /** * 使用es的帮助类 */ public XContentBuilder createJson4() throws Exception { // 创建json对象, 其中一个创建json的方式 XContentBuilder source = XContentFactory.jsonBuilder() .startObject() .field("user", "kimchy") .field("postDate", new Date()) .field("message", "trying to out ElasticSearch") .endObject(); return source; } @Test public void test1() throws Exception { XContentBuilder source = createJson4(); // 存json入索引中 IndexResponse response = client.prepareIndex("twitter", "tweet", "1").setSource(source).get();// // 结果获取 String index = response.getIndex(); String type = response.getType(); String id = response.getId(); long version = response.getVersion(); boolean created = response.isCreated(); System.out.println(index + " : " + type + ": " + id + ": " + version + ": " + created); } /** * get API 获取指定文档信息 */ @Test public void testGet() {// GetResponse response = client.prepareGet("twitter", "tweet", "1")// .get(); GetResponse response = client.prepareGet("log4j-2018.02.19", "log4j_type", "AWGvky2aWyh2zXt6_1Um") .setOperationThreaded(false) // 线程安全 .get(); System.out.println(response.getSourceAsString()); } @Test public void testGet1(){ SearchResponse response = client.prepareSearch("log4j-2018.02.19") .setTypes("log4j_type") // 设置查询类型 // 1.SearchType.DFS_QUERY_THEN_FETCH = 精确查询 // 2.SearchType.SCAN = 扫描查询,无序 // 3.SearchType.COUNT = 不设置的话,这个为默认值,还有的自己去试试吧 .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) // 设置查询关键词 .setQuery(QueryBuilders.matchQuery("log4j_type.thread", "main")) .addHighlightedField("main") .setHighlighterPreTags("") .setHighlighterPostTags("") // 设置查询数据的位置,分页用 .setFrom(0) // 设置查询结果集的最大条数 .setSize(60) // 设置是否按查询匹配度排序 .setExplain(true) // 最后就是返回搜索响应信息 .execute() .actionGet(); SearchHits searchHits = response.getHits(); System.out.println("-----------------在["+"ma"+"]中搜索关键字["+"main"+"]---------------------"); System.out.println("共匹配到:"+searchHits.getTotalHits()+"条记录!"); SearchHit[] hits = searchHits.getHits(); for (SearchHit searchHit : hits) { //获取高亮的字段 Map highlightFields = searchHit.getHighlightFields(); HighlightField highlightField = highlightFields.get("main"); System.out.println("高亮字段:"+highlightField.getName()+" 高亮部分内容:"+highlightField.getFragments()[0].string()); Map sourceAsMap = searchHit.sourceAsMap(); Set keySet = sourceAsMap.keySet(); for (String string : keySet) { //key value 值对应关系 System.out.println(string+":"+sourceAsMap.get(string)); } } } @Test public void test4(){ SearchRequestBuilder responsebuilder = client.prepareSearch("twitter").setTypes("tweet") ; SearchResponse myresponse=responsebuilder.setQuery(QueryBuilders.matchPhraseQuery("user", "kimchy")) .setFrom(0).setSize(10).setExplain(true).execute().actionGet(); SearchHits hits = myresponse.getHits(); for (int i = 0; i < hits.getHits().length; i++) { System.out.println(hits.getHits()[i].getSourceAsString());} } @Test public void test5(){ BoolQueryBuilder boolBuilder = QueryBuilders.boolQuery(); //使用should实现或者查询 boolBuilder.should(QueryBuilders.matchQuery("content","爱我")); //c查询 SearchRequestBuilder searchRequestBuilder = this.client.prepareSearch("hotcomments") .setTypes("logs") .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) //设置查询类型:1.SearchType.DFS_QUERY_THEN_FETCH 精确查询; 2.SearchType.SCAN 扫描查询,无序 .setQuery(boolBuilder) .setSize(10); //设置高亮显示 searchRequestBuilder.setHighlighterPostTags(""); searchRequestBuilder.setHighlighterPreTags(""); searchRequestBuilder.addHighlightedField("content"); //执行结果 SearchResponse response = searchRequestBuilder.get(); //接受结果 List result = new ArrayList(); //遍历结果 for(SearchHit hit:response.getHits()){ Map source = hit.getSource(); //处理高亮片段 Map highlightFields = hit.getHighlightFields(); HighlightField nameField = highlightFields.get("content"); if(nameField!=null){ Text[] fragments = nameField.fragments(); String nameTmp =""; for(Text text:fragments){ nameTmp+=text; } //将高亮片段组装到结果中去 source.put("content",nameTmp); } result.add(source); } for (Map map : result) {Set entrySet = map.entrySet();for (Entry entry : entrySet) {System.out.println(entry.getKey()+":::"+entry.getValue());}} } @Test public void test6() throws Exception{ PageBean searchListPageBean = new PageBean(); BoolQueryBuilder boolBuilder = QueryBuilders.boolQuery(); boolBuilder.must(QueryBuilders.matchQuery("content","那就这样吧")); PageHelper.startPage(5, 3); SearchRequestBuilder searchRequestBuilder = ESClientUtils.getClient().prepareSearch("hotcomments") .setTypes("logs") .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) //设置查询类型:1.SearchType.DFS_QUERY_THEN_FETCH 精确查询; 2.SearchType.SCAN 扫描查询,无序 .setQuery(boolBuilder) .setFrom(5) .setSize(3); long totalHits = ESClientUtils.getClient().prepareSearch("hotcomments") .setTypes("logs") .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) .setQuery(boolBuilder) .get() .getHits().getTotalHits(); //设置高亮显示 searchRequestBuilder.setHighlighterPostTags(""); searchRequestBuilder.setHighlighterPreTags(""); searchRequestBuilder.addHighlightedField("content"); //执行结果 SearchResponse response = searchRequestBuilder.get(); //接受结果 List result = new ArrayList(); //遍历结果 for(SearchHit hit:response.getHits()){ Map source = hit.getSource(); String songName = null; String songUrl = null; Set entrySet = source.entrySet(); for (Entry entry : entrySet) { if(entry.getKey().equals("songId")){ Integer songId = (Integer)entry.getValue(); songName = SearchUtils.getSongNameById(songId); songUrl = SearchUtils.getSongUrlById(songId); } } source.put("songName", songName); source.put("songUrl", songUrl); //处理高亮片段 Map highlightFields = hit.getHighlightFields(); HighlightField nameField = highlightFields.get("content"); if(nameField!=null){ Text[] fragments = nameField.fragments(); String nameTmp =""; for(Text text:fragments){ nameTmp+=text; } //将高亮片段组装到结果中去 source.put("content",nameTmp); } result.add(source); } PageInfo pageInfo = new PageInfo(result); long totalPage = (totalHits + 3 - 1) / 3;int pageNum = pageInfo.getPageNum();searchListPageBean.setPage(pageNum);searchListPageBean.setTotalPage(totalPage);searchListPageBean.setList(result); for (Map map : result) {Set entrySet = map.entrySet();for (Entry entry : entrySet) {System.out.println(entry.getKey()+":::"+entry.getValue());}} }} 5 最后项目分享:
https://gitee.com/asoonis/feed-neo