Elasticsearch入门

入门

>folded
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
PUT /hotel
{
"mappings": {
"properties": {
"title":{
"type": "text"
},
"city":{
"type":"keyword"
},
"price":{
"type": "double"
}
}
}
}

POST /hotel/_doc/001
{
"title":"7天酒店",
"city":"青岛",
"price":399.99
}

GET /hotel/_doc/001

GET /hotel/_search
{
"query": {
"term": {
"price": {
"value": 578.23
}
}
}
}

GET /hotel/_search
{
"query": {
"match": {
"title": "再来"
}
}
}

第二天

批量添加
1
2
3
4
5
6
7
8
9
POST hotel/_bulk
{"index":{"_id":"001"}}
{"title":"詩詞"}
{"index":{"_id":"002"}}
{"title":"學問"}
{"index":{"_id":"003"}}
{"title":"門廳"}
{"index":{"_id":"004"}}
{"title":"園區"}
>folded 自定义分词器
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
PUT hotel
{
"settings": {
"analysis": {
"analyzer": {
"ik":{
"tokenizer":"ik_max_word",
"filter":["tsconvert", "pinyin_filter"]
}
},
"filter": {
"tsconvert" : {
"type" : "stconvert",
"delimiter" : "#",
"keep_both" : false,
"convert_type" : "t2s"
},
"pinyin_filter":{
"type":"pinyin",
"keep_first_letter":true,
"keep_full_pinyin":false,
"keep_none_chinese":true
}
}
}
},
"mappings": {
"properties": {
"title":{
"type": "text",
"analyzer": "ik"
}
}
}
}

第三天

>folded
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
DELETE convert

PUT convert
{
"settings": {
"analysis": {
"analyzer": {
"ik_sz_tsconvert":{
"tokenizer":"sz_tokenizer",
"char_filter":["tsconvert"],
"filter":["lowercase","asciifolding"]
}
},
"tokenizer": {
"sz_tokenizer":{
"type":"ngram",
"min_gram":1,
"max_gram":1,
"token_chars":["letter","digit"]
}
},
"char_filter": {
"tsconvert":{
"type":"stconvert",
"convert_type":"t2s"
}
}
}
},
"mappings": {
"properties": {
"name":{
"type":"text",
"analyzer": "ik_sz_tsconvert"
}
}
}
}


POST convert/_bulk
{"index":{"_id":"001"}}
{"name":"中華人民共和國國家2021ABC年發生了哪些事情"}
{"index":{"_id":"002"}}
{"name":"中华人民共和国国家2021ABC发生了哪些事情"}
{"index":{"_id":"003"}}
{"name":"中华人民共和国国家2021ABC年发生了哪些事情"}
{"index":{"_id":"004"}}
{"name":"中华人民共和国国家2021abc年发生了哪些事情"}
{"index":{"_id":"005"}}
{"name":"中华人民共和国国家2021年发生了哪些事情"}
{"index":{"_id":"006"}}
{"name":"中华人民共和国国家2021ABC年发生了哪些事情"}

GET convert/_search
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"name": "中"
}
}
]
}
}
}

第四天

这里主要围绕【Hanlp】展开

  • 安装

  • 有问题请根据在线地址现在到本地,进行离线安装
    在线安装Hanlp
    1
    ./bin/elasticsearch-plugin install https://github.com/KennFalcon/elasticsearch-analysis-hanlp/releases/download/v7.6.2/elasticsearch-analysis-hanlp-7.6.2.zip
  • 这里要要先根据链接将文件下载下来,放到一个指定的文件夹,例如这里放【D:/plugins/elasticsearch-analysis-hanlp-7.6.2.zip】
    离线安装Hanlp
    1
    ./bin/elasticsearch-plugin install file:///D:/plugins/elasticsearch-analysis-hanlp-7.6.2.zip
  • 配置

    配置项描述
    enable_custom_config是否开启自定义配置(要自己配置此项必须配置为true)
    enable_index_mode是否是索引分词
    enable_number_quantifier_recognize是否识别数字和量词
    enable_custom_dictionary是否加载用户词典
    enable_translated_name_recognize是否识别音译人名
    enable_japanese_name_recognize是否识别日本人名
    enable_organization_recognize是否识别机构
    enable_place_recognize是否识别地名
    enable_name_recognize是否识别中国人名
    enable_traditional_chinese_mode是否开启繁体中文
    enable_stop_dictionary是否启用停用词
    enable_part_of_speech_tagging是否开启词性标注
    enable_remote_dict是否开启远程词典
    enable_normalization是否执行字符正规化
    enable_offset是否计算偏移量
  • DSL

    >folded 简+繁+异体字通检示例
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    PUT test
    {
    "settings": {
    "analysis": {
    "analyzer": {
    "my_hanlp_analyzer": {
    "tokenizer": "my_hanlp"
    },
    "hanlps": {
    "tokenizer": "hanlp"
    }
    },
    "tokenizer": {
    "my_hanlp": {
    "type": "hanlp",
    "enable_custom_config": true,
    "enable_index_mode":true,
    "enable_stop_dictionary": true,
    "enable_custom_dictionary":true,
    "enable_traditional_chinese_mode":true,
    "enable_place_recognize":true,
    "enable_name_recognize":true
    },
    "hanlp": {
    "type": "hanlp",
    "enable_custom_config": true,
    "enable_index_mode":true,
    "enable_name_recognize":true,
    "enable_traditional_chinese_mode":true
    }
    }
    }
    },
    "mappings": {
    "properties": {
    "title":{
    "type": "text",
    "analyzer": "my_hanlp_analyzer"
    },
    "name":{
    "type": "text",
    "analyzer": "hanlps"
    }
    }
    }
    }

    DELETE test

    POST test/_bulk
    {"index":{"_id":"001"}}
    {"title":"中華人民共和國國家2021ABC年發生了哪些事情","name":"聶強"}
    {"index":{"_id":"002"}}
    {"title":"中华人民共和国国家2021ABC发生了哪些事情","name":"聂強"}
    {"index":{"_id":"003"}}
    {"title":"中华人民共和国国家2021ABC年发生了哪些事情","name":"聂强"}
    {"index":{"_id":"004"}}
    {"title":"中华人民共和国国家2021abc年发生了哪些事情","name":"王安石"}
    {"index":{"_id":"005"}}
    {"title":"中华人民共和国国家2021年发生了哪些事情","name":"王先安"}
    {"index":{"_id":"006"}}
    {"title":"中华人民共和国国家2021ABC年发生了哪些事情","name":"李龢平"}
    {"index":{"_id":"007"}}
    {"title":"中华人民共龢国国家2021ABC年发生了哪些事情","name":"慽继光"}


    # 繁
    GET test/_search
    {
    "query": {
    "match": {
    "name":"聶強"
    }
    }
    }

    # 简+繁
    GET test/_search
    {
    "query": {
    "match": {
    "name":"聂強"
    }
    }
    }

    # 简
    GET test/_search
    {
    "query": {
    "match": {
    "name":"聂强"
    }
    }
    }

    # 异体字“龢”+简
    GET test/_search
    {
    "query": {
    "match": {
    "title":"中华人民共龢国"
    }
    }
    }

    # 简
    GET test/_search
    {
    "query": {
    "match": {
    "title":"中华人民共和国"
    }
    }
    }

    # 简+繁
    GET test/_search
    {
    "query": {
    "match": {
    "title":"中華人民共和國"
    }
    }
    }

    # 简+异体“龢”+繁
    GET test/_search
    {
    "query": {
    "match": {
    "title":"中華人民共龢國"
    }
    }
    }

    # 多添加 简+繁+异体“龢”
    GET test/_search
    {
    "query": {
    "bool": {
    "must": [
    {
    "match": {
    "title": "中華人民共和國"
    }
    },{
    "match": {
    "name": "王先安"
    }
    }
    ]
    }
    }
    }
    PUT /demo
    {
    “settings”: {
    “number_of_shards”: 5,
    “number_of_replicas”: 1,
    “analysis”: {
    “analyzer”: {
    “jinghuo”:{
    “type”: “custom”,
    “tokenizer”:”ik_smart”,
    “char_filter”:[“local_doc_filter”]
    }
    },
    “filter”: {
    “local_doc_filter”:{
    “type” : “synonym”,
    “synonyms_path”:”/analysis/VariantCharacter.txt”
    }
    }
    }
    }
    }

评论