ikstar明星合成Elasticsea
2024年06月19日 风云资讯
1.1 IK分词器的安装
1.1.1 前期准备工作
1)CentOS联网
配置CentOS能连接外网。Linux虚拟机ping www.baidu.com 是畅通的
参照官方文档安装IK分词器的两种方法:
1、直接到https://github.com/medcl/elasticsearch-analysis-ik/releases下载对应版本zip包
解压到elasticsearch的plugin目录下
unzip elasticsearch-analysis-ik-5.5.1.zip
2、使用elasticsearch-plugin命令安装
./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v5.5.1/elasticsearch-analysis-ik-5.5.1.zip
重新启动elasticsearch
1.2 IK分词器的使用
1.2.1 命令行查看结果
ik_smart模式
[itstar@bigdata11 elasticsearch]$ curl -XGET 'http://bigdata11:9200/_analyze?pretty&analyzer=ik_smart' -d '中华人民共和国'
{
"tokens" : [
{
"token" : "中华人民共和国",
"start_offset" : 0,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 0
}
]
}
ik_max_word模式
[itstar@bigdata11 elasticsearch]$ curl -XGET 'http://bigdata11:9200/_analyze?pretty&analyzer=ik_max_word' -d '中华人民共和国'
{
"tokens" : [
{
"token" : "中华人民共和国",
"start_offset" : 0,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 0
},
{
"token" : "中华人民",
"start_offset" : 0,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 1
},
{
"token" : "中华",
"start_offset" : 0,
"end_offset" : 2,
"type" : "CN_WORD",
"position" : 2
},
{
"token" : "华人",
"start_offset" : 1,
"end_offset" : 3,
"type" : "CN_WORD",
"position" : 3
},
{
"token" : "人民共和国",
"start_offset" : 2,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 4
},
{
"token" : "人民",
"start_offset" : 2,
"end_offset" : 4,
"type" : "CN_WORD",
"position" : 5
},
{
"token" : "共和国",
"start_offset" : 4,
"end_offset" : 7,
"type" : "CN_WORD",
"position" : 6
},
{
"token" : "共和",
"start_offset" : 4,
"end_offset" : 6,
"type" : "CN_WORD",
"position" : 7
},
{
"token" : "国",
"start_offset" : 6,
"end_offset" : 7,
"type" : "CN_CHAR",
"position" : 8
}
]
}
1.2.2 JavaAPI操作
1)创建索引
//创建索引(数据库)
@Test
public void createIndex() {
//创建索引
client.admin().indices().prepareCreate("blog4").get();
//关闭资源
client.close();
}
2)创建mapping
//创建使用ik分词器的mapping
@Test
public void createMapping() throws Exception {
// 1设置mapping
XContentBuilder builder = XContentFactory.jsonBuilder()
.startObject()
.startObject("article")
.startObject("properties")
.startObject("id1")
.field("type", "string")
.field("store", "yes")
.field("analyzer","ik_smart")
.endObject()
.startObject("title2")
.field("type", "string")
.field("store", "no")
.field("analyzer","ik_smart")
.endObject()
.startObject("content")
.field("type", "string")
.field("store", "yes")
.field("analyzer","ik_smart")
.endObject()
.endObject()
.endObject()
.endObject();
// 2 添加mapping
PutMappingRequest mapping = Requests.putMappingRequest("blog4").type("article").source(builder);
client.admin().indices().putMapping(mapping).get();
// 3 关闭资源
client.close();
}
3)插入数据
//创建文档,以map形式
@Test
public void createDocumentByMap() {
HashMap<String, String> map = new HashMap<>();
map.put("id1", "2");
map.put("title2", "Lucene");
map.put("content", "它提供了一个分布式的web接口");
IndexResponse response = client.prepareIndex("blog4", "article", "3").setSource(map).execute().actionGet();
//打印返回的结果
System.out.println("结果:" + response.getResult());
System.out.println("id:" + response.getId());
System.out.println("index:" + response.getIndex());
System.out.println("type:" + response.getType());
System.out.println("版本:" + response.getVersion());
//关闭资源
client.close();
}
4 词条查询
//词条查询
@Test
public void queryTerm() {
SearchResponse response = client.prepareSearch("blog4").setTypes("article").setQuery(QueryBuilders.termQuery("content","提供")).get();
//获取查询命中结果
SearchHits hits = response.getHits();
System.out.println("结果条数:" + hits.getTotalHits());
for (SearchHit hit : hits) {
System.out.println(hit.getSourceAsString());
}
}
5)结果查看