ES基础操作及java代码

2023-11-03

ES相关随手记

文章目录

ES相关随手记
一基本操作
- - 1. es三大属性：索引、映射、文档
二高级查询
三索引原理之倒排索引
四 ik分词器
- - - - 1.分词器测试
      - 2.分词器扩展
五 filter过滤查询
- - - - 1.说明
      - 2.使用
      - 1.**term**
        
        2. terms
        
        3. range
        
        4. exists
        
        5. ids
六聚合查询

一基本操作

1. es三大属性：索引、映射、文档

1.1 索引：

查看 es 中所有的索引信息

GET /_cat/indices?v

创建索引

PUT /products

创建索引指定库信息

PUT /products
{
  "settings": {
    "number_of_shards":1,
    "number_of_replicas":0
  }
}

java代码

    /**
     * 创建索引并推送映射
     * @return
     */
    @GetMapping("/index/create")
    public String  createIndexAndPushMapping(){
        IndexOperations indexOperations = elasticsearchRestTemplate.indexOps(Product.class);
        //创建索引
        boolean a = indexOperations.create();
        if (a){
            //生成映射
            Document mapping = indexOperations.createMapping();
            //推送映射
            boolean b = indexOperations.putMapping(mapping);
            return "索引和映射创建"+b;
        }else {
            return "索引创建失败";
        }

    }

   /**
     * 判断索引是否存在
     * @return
     */
    @GetMapping("/index/exists")
    public String indexExists(){
        // 判断索引是否存在
        IndexOperations indexOperations = elasticsearchRestTemplate.indexOps(Product.class);
        boolean exists = indexOperations.exists();
        return "是否存在index："+exists;
    }

#删除索引

DELETE /products

java代码

    /**
     * @Description 删除索引
     * @Date 11:33 2023/4/21
     * @Param []
     * @Return void
     **/
    @DeleteMapping("/index/delete")
    public String deleteIndex(){
        IndexOperations indexOperations = elasticsearchRestTemplate.indexOps(Product.class);
        boolean delete = indexOperations.delete();
        return "索引删除状态"+delete;
    }

1.2 映射：

es中的数据类型：

字符串类型：keyword（不分词）、text（会分词）

数字类型：integer、long

小数类型：float、double

布尔类型：boolean

日期类型：date

创建索引&映射

PUT /products
{
  "settings": {
    "number_of_shards":1,
    "number_of_replicas": 0
  },
  "mappings": {
    "properties": {
      "id":{
        "type": "integer"
      },
      "title":{
        "type": "keyword"
      },
      "price":{
        "type": "double"
      },
      "created_at":{
        "type": "date"
      },
      "description":{
        "type": "text",
        # 设置指定的分词器
        "analyzer": "standard"
      }
    }
  }
}

查询某个索引的映射信息 mapping

GET /products/_mapping

1.3 文档：

添加文档操作手动指定——id

POST /products/_doc/1
{
  "id":1,
  "title":"小浣熊",
  "price":0.5,
  "created_at":"2023-04-17",
  "description":"小浣熊真好吃"
}

java代码

    /**
     * 添加或修改文档
     * @return
     */
    @PostMapping("/doc/addOrUpdate")
    public Object addOrUpdate(@RequestBody Product product){
        try {
//            product.setCreateTime(new Date());
            Product save = elasticsearchRestTemplate.save(product,IndexCoordinates.of("IndexName"));
            return save;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

添加文档操作自动创建文档的 id Isn8jIcBRFWba4Ejbi2b

POST /products/_doc/
{
  "title":"日本豆",
  "price":1.5,
  "description":"日本豆很不错"
}

文档查询基于 id查询

GET /products/_doc/1

java代码

    /**
     * 根据id查询
     *
     * @param id
     * @param index
     * @return
     */
    @GetMapping("/doc/getByIndex")
    public Product selectAllById(@RequestParam("id") String id,@RequestParam("index") String index) {
        if (StringUtils.isEmpty(index)){
            Product product = elasticsearchRestTemplate.get(id, Product.class);
            return product;
        }else {
            IdsQueryBuilder idsQueryBuilder = QueryBuilders.idsQuery();
            idsQueryBuilder.addIds(id);

            NativeSearchQuery query = new NativeSearchQueryBuilder()
                    .withQuery(idsQueryBuilder)
                    .build();
            SearchHit<Product> itemSearchHit = elasticsearchRestTemplate.searchOne(query, Product.class, IndexCoordinates.of(index));
            Product content = itemSearchHit.getContent();
            return content;
        }
    }

删除文档基于 id删除

DELETE /products/_doc/Isn8jIcBRFWba4Ejbi2b

java代码

    /**
     * 删除文档
     * @return
     */
    @DeleteMapping("/doc/delete")
    public Object delete(@RequestParam("id") String id,@RequestParam("index") String index) {

        try {
            String delete;
            if (StringUtils.isEmpty(index)) {
                Product product = new Product();
                product.setId(Integer.valueOf(id));
                delete = elasticsearchRestTemplate.delete(product);
            }else {
                delete = elasticsearchRestTemplate.delete(id, IndexCoordinates.of(index));
            }
            return delete;
        } catch (Exception e) {
            e.printStackTrace();
        }

        return false;
    }

更新文档（注意：此种方式是先删除后添加）

PUT /products/_doc/1
{
  "id":1,
  "title":"小浣熊",
  "price":0.5,
  "created_at":"2023-04-17",
  "description":"小浣熊真好吃，真好吃！！"
}

#更新文档 基于指定字段进行更新
POST /products/_doc/1/_update
{
  "doc":{
    "price":2.5
  }
}

1.3.2: 文档的批量操作

文档的批量操作 _bulk 关键字(“index”:新增、“update”:更新、“删除”:“delete”) 后面的json 不能被格式化

#index 代表新增，如果index后面没有指定_id就自动生成，如果指定了，就按指定的创建 I8kTjYcBRFWba4Ej8i30

POST /products/_doc/_bulk
{"index":{"_id":2}}
    {"id" : 2,"title" : "日本豆","price" : 1.8,"created_at" : "2023-04-15","description" : "日本豆真好吃，真好吃！！"}
{"index":{}}
   {"title" : "鱼豆腐","price" : 4.8,"created_at" : "2023-04-15","description" : "鱼豆腐真好吃，真好吃！！"}

文档批量操作添加更新删除

POST /products/_doc/_bulk
{"index":{"_id":3}}
  {"id" : 3,"title" : "甜不辣","price" : 7.8,"created_at" : "2023-04-15","description" : "甜不辣真好吃，真好吃！！"}
{"update":{"_id":2}}
  {"doc":{"title":"小鱼豆腐"}}
{"delete":{"_id":"I8kTjYcBRFWba4Ej8i30"}}

java代码

/**
     * 批量 添加或修改文档
     * @return
     */
    @PostMapping("/doc/addOrUpdate/Batch")
    public Object addOrUpdateBatch(@RequestBody Product product){
        try {
            List<Product> addOrUpdateList = new ArrayList<>();
            for (int i = 0; i < 5; i++) {
                Product product1 = new Product();
                product1.setId(i)
                        .setPrice(product.getPrice()+i)
                        .setTitle(product.getTitle()+i)
                        .setDescription(product.getDescription())
                        .setCreateTime(new Date());
                addOrUpdateList.add(product1);
            }
            Iterable<Product> save = elasticsearchRestTemplate.save(addOrUpdateList);
            return save;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

二高级查询

说明：

ES 中提供了一种强大的检索数据方式，这种检索方式称为：**Query DSL**<Domain Specified Language>（特殊领域的查询语言），Query DSL 是利用**Rest API传递JSON格式的请求体**（Request Body）数据与ES进行交互，这种方式的 丰富查询语言 让ES 检索变得更强大 更简洁。

1.查询所有

query DSL 语法查询所有

GET /products/_doc/_search
{
  "query": {
    "match_all": {}
  }
}

GET /products/_search
{
  "query": {
    "match_all": {}
  }
}

java代码

    /**
     * 1.match_all查询所有
     * search(termQueryBuilder) 调用搜索方法，参数查询构建器对象
     */
    @GetMapping("/doc/selectAll")
    public List<Product> selectAll(){
        MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(matchAllQueryBuilder)
                // 排序字段
                .withSort(SortBuilders.fieldSort("id").order(SortOrder.ASC))
                .build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        List<Product> itemVOS = new ArrayList<>();
        search.forEach((hits)->itemVOS.add(hits.getContent()));
        return itemVOS;
    }

2. term 基于关键词查询

integer keyword double date 不分词

text 类型默认 es 标准分词器中文单字分词英文单词分词

总结：

1.在Es 中除了text类型分词其余类型均不分词

在Es中默认使用标准分词中文单字分词英文单词分词

GET /products/_search
{
  "query": {
    "term": {
      "title": {
        "value": "小浣熊"
      }
    }
  }
}

java代码

/**
     * 2.term 查询 基于关键词查询
     * search(termQueryBuilder) 调用搜索方法，参数查询构建器对象
     */
    @GetMapping("/doc/term/get")
    public Object termQuery(@RequestParam("name") String name, @RequestParam("value") String value) {
        //构建查询条件
        TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery(name, value);
        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(termQueryBuilder)
                .build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        List<Product> itemVOS = new ArrayList<>();
        search.forEach((hits)->itemVOS.add(hits.getContent()));
        return search;
    }

3. range 范围查询

GET /products/_search
{
  "query": {
    "range": {
      "price": {
        "gte": 5,
        "lte": 10
      }
    }
  }
}

java代码

 /**
     *  3.range 范围查询
     * @param begin
     * @param end
     * @param index
     * @return
     */
    @GetMapping("/doc/range")
    public List<Product> selectByTimeFromTo(@RequestParam("begin") String begin, @RequestParam("end") String end, @RequestParam("index") String index) {
        List<Product> itemVOS = new ArrayList<>();

        try {
            NativeSearchQuery query = new NativeSearchQueryBuilder().withSort(SortBuilders.fieldSort("createTime").order(SortOrder.DESC))
                    .withFilter(QueryBuilders.rangeQuery("createTime").timeZone("+08:00").format("yyyy-MM-dd HH:mm:ss").gte(begin).lte(end))
                    .build();

            SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class, IndexCoordinates.of(index));

            search.forEach((hits) -> itemVOS.add(hits.getContent()));
        } catch (Exception e) {
            e.printStackTrace();
        }

        return itemVOS;
    }

4. prefix 前缀查询

GET /products/_search
{
  "query": {
    "prefix": {
      "description": {
        "value": "小"
      }
    }
  }
}

java代码

/**
     *  4.prefix 前缀查询
     * @return
     */
    @GetMapping("/doc/prefix")
    public List<Product> selectBySearchPrefix(@RequestParam("name") String name,@RequestParam("prefix")  String prefix){
        List<Product> result =new ArrayList<>();
        // 创建搜索
        PrefixQueryBuilder prefixQuery = QueryBuilders.prefixQuery(name, prefix);
        // 组装搜索条件
        NativeSearchQuery query = new NativeSearchQueryBuilder().withQuery(prefixQuery).build();
        // 查询
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        search.forEach(item -> {
            result.add(item.getContent());
        });
        return result;
    }

5. wildcard 通配符查询 ?只匹配一个字符 *匹配多个字符

GET /products/_search
{
  "query": {
    "wildcard": {
      "title": {
        "value": "小*"
      }
    }
  }
}

java代码

 /**
     *  5.wildcard  通配符查询  ?只匹配一个字符  *匹配多个字符
     * @return
     */
    @GetMapping("/doc/wildcard")
    public List<Product> selectByWildcard(@RequestParam("name") String name,@RequestParam("query")  String query){
        List<Product> result = new ArrayList<>();
        // 创建查询
        WildcardQueryBuilder wildcardQuery = QueryBuilders.wildcardQuery(name, query);
        // 组装查询
        NativeSearchQuery query1 = new NativeSearchQueryBuilder().withQuery(wildcardQuery).build();
        // 查询
        SearchHits<Product> search = elasticsearchRestTemplate.search(query1, Product.class);
        // 返回结果
        search.forEach(item ->{ result.add(item.getContent());});

        return result;
    }

6. ids 多个id查询

GET /products/_search
{
  "query": {
    "ids": {
      "values": [1,2,3,4]
    }
  }
}

java代码

    /**
     *   6.ids 多个id查询
     * @return
     */
    @GetMapping("/doc/ids")
    public List<Product> selectByIds(@RequestParam("ids") String[] ids){
        // 创建查询
        IdsQueryBuilder idsQuery = QueryBuilders.idsQuery();
        idsQuery.addIds(ids);
        // 组装查询
        NativeSearchQuery query = new NativeSearchQueryBuilder().withQuery(idsQuery).build();
        // 查询
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        // 返回结果
        List<Product> result = new ArrayList<>();
        search.forEach(item ->{ result.add(item.getContent());});
        return result;
    }

7. fuzzy 模糊查询

注意：搜索关键词长度为2不允许存在模糊

搜索关键词长度为3~5允许一次模糊

搜索关键词长度大于5允许最大2次模糊

GET /products/_search
{
  "query": {
    "fuzzy": {
      "title": "小浣熊1356"
    }
  }
}

java代码

 /**
     *   7.fuzzy 模糊查询
     *  - 注意：搜索关键词长度为2不允许存在模糊
     * - 搜索关键词长度为3~5允许一次模糊
     * - 搜索关键词长度大于5允许最大2次模糊
     * @return
     */
    @GetMapping("/doc/fuzzy")
    public List<Product> selectByFuzzy(@RequestParam("name") String name,@RequestParam("value")  String value){
        // 创建查询
        FuzzyQueryBuilder fuzziness = QueryBuilders.fuzzyQuery(name, value).fuzziness(Fuzziness.TWO);
        //设置高亮字段
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        //设置标签前缀
        highlightBuilder.preTags("<font color='red'>");
        //设置标签后缀
        highlightBuilder.postTags("</font>");
        //设置高亮字段
        highlightBuilder.field(name);
        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(fuzziness)
                .withHighlightBuilder(highlightBuilder)
                .build();

        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);


        // 返回结果
        List<Product> result = new ArrayList<>();
        for (SearchHit<Product> searchHit : search) {
            Product content = searchHit.getContent();
            //将高亮的字段取出来
            List<String> requestBody = searchHit.getHighlightField(name);
            String highText = "";
            for (String s : requestBody) {
                highText = highText += s;
            }
            //重新对字段赋值
            content.setDescription(highText);
            result.add(content);
        }
        return result;
    }

8. bool 用来组合多个条件实现复杂查询

bool关键字：用来组合多个条件实现复杂查询

must：相当于&& 同时成立

should：相当于|| 成立一个就行

must_not：相当于！不能满足任何一个`

GET /products/_search
{
  "query": {
    "bool": {
      "must": [
        {
        "prefix": {
          "title": {
            "value": "小浣熊"
          }
        }
      },
      {
        "term": {
          "title": {
            "value": "小浣熊"
          }
        }
      }
      ]
    }
  }
}

java代码

/**
     *   8.bool  用来组合多个条件
     *   - must：相当于&& 同时成立
     * - should：相当于|| 成立一个就行
     * - must_not：相当于！不能满足任何一个`
     * @return
     */
    @GetMapping("/doc/bool")
    public List<Product> selectBool(@RequestParam("value") String value){
        BoolQueryBuilder should = QueryBuilders.boolQuery().should(QueryBuilders.matchQuery("title", value)).should(QueryBuilders.matchQuery("description", value));
        //设置高亮字段
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        //设置标签前缀
        highlightBuilder.preTags("<font color='red'>");
        //设置标签后缀
        highlightBuilder.postTags("</font>");
        //设置高亮字段
        highlightBuilder.field("description");
        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(should)
                .withHighlightBuilder(highlightBuilder)
                .build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        // 返回结果
        List<Product> result = new ArrayList<>();
        for (SearchHit<Product> searchHit : search) {
            Product content = searchHit.getContent();
            //将高亮的字段取出来
            List<String> requestBody = searchHit.getHighlightField("description");
            String highText = "";
            for (String s : requestBody) {
                highText = highText += s;
            }
            //重新对字段赋值
            content.setDescription(highText);
            result.add(content);
        }
        return result;
    }

9. multi_match 多字段查询

注意：字段类型分词，将查询条件分词之后进行查询该字段，若果该字不分词就会将查询条件作为整体进行查询

GET /products/_search
{
  "query": {
    "multi_match": {
      "query": "浣熊",
      "fields": ["title","description"]
    }
  }
}

java代码

/**
     * 9.multi_match 多字段查询
     * 注意：字段类型分词，将查询条件分词之后进行查询该字段，若果该字不分词就会将查询条件作为整体进行查询
     **/
    @GetMapping("/doc/multiMatch")
    public List<Product> selectMultiMatch(@RequestParam("text") Object text, @RequestParam("fieldNames") String[] fieldNames){
        MultiMatchQueryBuilder multiMatchQueryBuilder = QueryBuilders.multiMatchQuery(text, fieldNames);
        NativeSearchQuery query = new NativeSearchQueryBuilder().withQuery(multiMatchQueryBuilder).build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        // 返回结果
        List<Product> result = new ArrayList<>();
        search.forEach(item ->{ result.add(item.getContent());});
        return result;
    }

10. query_string 默认字段分词查询

注意：查询字段分词就查询条件分词查询，查询字段不分词将查询条件不分词查询

GET /products/_search
{
  "query": {
    "query_string": {
      "default_field": "description",
      "query": "小浣熊真"
    }
  }
}

java代码

/**
     * 10. query_string 默认字段分词查询
     * 注意：查询字段分词就查询条件分词查询，查询字段不分词将查询条件不分词查询
     * queryStringQuery(queryString).field(field)
     * 第一个参数是查询的内容
     * 第二个参数是过滤的字段
     **/
    @GetMapping("/doc/queryString")
    public List<Product> queryString(@RequestParam String queryString,@RequestParam String field){
        QueryStringQueryBuilder queryStringQueryBuilder = QueryBuilders.queryStringQuery(queryString).field(field);
        //设置高亮字段
        HighlightBuilder highlightBuilder = new HighlightBuilder();
        //设置标签前缀
        highlightBuilder.preTags("<font color='red'>");
        //设置标签后缀
        highlightBuilder.postTags("</font>");
        //设置高亮字段
        highlightBuilder.field("description");
        // 组装查询对象
        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(queryStringQueryBuilder)
                .withHighlightBuilder(highlightBuilder)
                .withSorts(SortBuilders.fieldSort("id")
                        .order(SortOrder.ASC)).build();
        // 实际查询
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        // 返回结果
        List<Product> result = new ArrayList<>();
        for (SearchHit<Product> searchHit : search) {
            Product content = searchHit.getContent();
            //将高亮的字段取出来
            List<String> requestBody = searchHit.getHighlightField("description");
            String highText = "";
            for (String s : requestBody) {
                highText = highText += s;
            }
            //重新对字段赋值
            content.setDescription(highText);
            result.add(content);
        }
        return result;
    }

11. highlight 高亮查询

highlight关键字可以让符合条件的文档的关键词高亮

自定义高亮html标签：可以在highlight中使用pre_tags和post_tags

GET /products/_search
{
  "query": {
      "multi_match": {
      "query": "浣熊",
      "fields": ["title","description"]
    }
  },
  "highlight": {
    "fields": {
      "*":{}
    },
    "pre_tags": ["<span style='color:red;'>"],
    "post_tags": ["</span>"]
  }
}

12. size 返回指定条数

size关键字：指定查询结果中返回指定条数。默认返回10条

GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "size": 2
}

13. from 分页查询

from关键字：用来指定起始返回位置，和size关键字连用可以实现分页效果

GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "from": 3,
  "size": 3
}

14. sort指定字段排序

GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "sort": [
    {
      "price": {
        "order": "asc"
      }
    }
  ]
}

15. _source 返回指定字段

_source关键字：是一个数组，在数组当中用来指定需要展示的字段

GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "_source": ["title","description","id"]
}

java代码

  * 15._source 返回指定字段
     *
     **/
    @GetMapping("/doc/source")
    public List<Product> selectSource(){
        MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
        // 设置包含哪些字段，不包含哪些字段
        SourceFilter sourceFilter = new SourceFilter() {
            @Override
            public String[] getIncludes() {
                String[] includes = {"title","price"};
                return includes ;
            }

            @Override
            public String[] getExcludes() {
                return new String[0];
            }
        };
        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(matchAllQueryBuilder)
                .withSourceFilter(sourceFilter).build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        // 返回结果
        List<Product> result = new ArrayList<>();
        search.forEach(item ->{ result.add(item.getContent());});
        return result;
    }

三索引原理之倒排索引

倒排索引也叫反向索引，有反向索引必有正向索引。通俗的来说，正向索引是通过key找value，反向索引则是通过value找key，ES底层在检索时底层就是使用的倒排索引。

得分的计算次数/长度 1:1:9 (索引:次数:总长度)

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-QccXkt4x-1682299674138)(C:\Users\LiBo\AppData\Roaming\Typora\typora-user-images\image-20230417153722334.png)]

四 ik分词器

说明：下载ik分词器之后，放在es的插件文件夹下，重启es

1.分词器测试

# 分词器测试
# ik ik_smart 粗分词
POST /_analyze
{
  "analyzer": "ik_smart",
  "text": "某某教育存在一名非常优秀的老师他的名字叫张三"
}

# ik ik_smart 粗分词 分词结果
{
  "tokens" : [
    {
      "token" : "某某",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "CN_WORD",
      "position" : 0
    },
    {
      "token" : "教育",
      "start_offset" : 2,
      "end_offset" : 4,
      "type" : "CN_WORD",
      "position" : 1
    },
    {
      "token" : "存在",
      "start_offset" : 4,
      "end_offset" : 6,
      "type" : "CN_WORD",
      "position" : 2
    },
    {
      "token" : "一名",
      "start_offset" : 6,
      "end_offset" : 8,
      "type" : "CN_WORD",
      "position" : 3
    },
    {
      "token" : "非常",
      "start_offset" : 8,
      "end_offset" : 10,
      "type" : "CN_WORD",
      "position" : 4
    },
    {
      "token" : "优秀的",
      "start_offset" : 10,
      "end_offset" : 13,
      "type" : "CN_WORD",
      "position" : 5
    },
    {
      "token" : "老师",
      "start_offset" : 13,
      "end_offset" : 15,
      "type" : "CN_WORD",
      "position" : 6
    },
    {
      "token" : "他的",
      "start_offset" : 15,
      "end_offset" : 17,
      "type" : "CN_WORD",
      "position" : 7
    },
    {
      "token" : "名字叫",
      "start_offset" : 17,
      "end_offset" : 20,
      "type" : "CN_WORD",
      "position" : 8
    },
    {
      "token" : "张三",
      "start_offset" : 20,
      "end_offset" : 22,
      "type" : "CN_WORD",
      "position" : 9
    }
  ]
}

# ik ik_max_word 细分
POST /_analyze
{
  "analyzer": "ik_max_word",
  "text": "某某教育存在一名非常优秀的老师他的名字叫张三"
}

# ik ik_max_word 细分 分词结果
{
  "tokens" : [
    {
      "token" : "某某",
      "start_offset" : 0,
      "end_offset" : 2,
      "type" : "CN_WORD",
      "position" : 0
    },
    {
      "token" : "某",
      "start_offset" : 0,
      "end_offset" : 1,
      "type" : "CN_WORD",
      "position" : 1
    },
    {
      "token" : "某",
      "start_offset" : 1,
      "end_offset" : 2,
      "type" : "CN_WORD",
      "position" : 2
    },
    {
      "token" : "教育",
      "start_offset" : 2,
      "end_offset" : 4,
      "type" : "CN_WORD",
      "position" : 3
    },
    {
      "token" : "教",
      "start_offset" : 2,
      "end_offset" : 3,
      "type" : "CN_WORD",
      "position" : 4
    },
    {
      "token" : "育",
      "start_offset" : 3,
      "end_offset" : 4,
      "type" : "CN_CHAR",
      "position" : 5
    },
    {
      "token" : "存在",
      "start_offset" : 4,
      "end_offset" : 6,
      "type" : "CN_WORD",
      "position" : 6
    },
    {
      "token" : "在一",
      "start_offset" : 5,
      "end_offset" : 7,
      "type" : "CN_WORD",
      "position" : 7
    },
    {
      "token" : "一名",
      "start_offset" : 6,
      "end_offset" : 8,
      "type" : "CN_WORD",
      "position" : 8
    },
    {
      "token" : "一",
      "start_offset" : 6,
      "end_offset" : 7,
      "type" : "CN_WORD",
      "position" : 9
    },
    {
      "token" : "名",
      "start_offset" : 7,
      "end_offset" : 8,
      "type" : "CN_CHAR",
      "position" : 10
    },
    {
      "token" : "非常",
      "start_offset" : 8,
      "end_offset" : 10,
      "type" : "CN_WORD",
      "position" : 11
    },
    {
      "token" : "非",
      "start_offset" : 8,
      "end_offset" : 9,
      "type" : "CN_WORD",
      "position" : 12
    },
    {
      "token" : "常",
      "start_offset" : 9,
      "end_offset" : 10,
      "type" : "CN_CHAR",
      "position" : 13
    },
    {
      "token" : "优秀的",
      "start_offset" : 10,
      "end_offset" : 13,
      "type" : "CN_WORD",
      "position" : 14
    },
    {
      "token" : "优秀",
      "start_offset" : 10,
      "end_offset" : 12,
      "type" : "CN_WORD",
      "position" : 15
    },
    {
      "token" : "老师",
      "start_offset" : 13,
      "end_offset" : 15,
      "type" : "CN_WORD",
      "position" : 16
    },
    {
      "token" : "他的名字",
      "start_offset" : 15,
      "end_offset" : 19,
      "type" : "CN_WORD",
      "position" : 17
    },
    {
      "token" : "他的",
      "start_offset" : 15,
      "end_offset" : 17,
      "type" : "CN_WORD",
      "position" : 18
    },
    {
      "token" : "名字叫",
      "start_offset" : 17,
      "end_offset" : 20,
      "type" : "CN_WORD",
      "position" : 19
    },
    {
      "token" : "名字",
      "start_offset" : 17,
      "end_offset" : 19,
      "type" : "CN_WORD",
      "position" : 20
    },
    {
      "token" : "字",
      "start_offset" : 18,
      "end_offset" : 19,
      "type" : "CN_WORD",
      "position" : 21
    },
    {
      "token" : "叫",
      "start_offset" : 19,
      "end_offset" : 20,
      "type" : "CN_CHAR",
      "position" : 22
    },
    {
      "token" : "张三",
      "start_offset" : 20,
      "end_offset" : 22,
      "type" : "CN_WORD",
      "position" : 23
    },
    {
      "token" : "三",
      "start_offset" : 21,
      "end_offset" : 22,
      "type" : "TYPE_CNUM",
      "position" : 24
    }
  ]
}

2.分词器扩展

在ik分词器的安装目录下的config下 IKAnalyzer.cfg.xml 文件下配置；配置完之后需要重启es

其中 ext.dic 和 stop.dic 需要在config文件夹下创建，并且需要每一个词一行进行记录

系统推荐的 extra_main.dic 扩展词

系统推荐的停用词典 extra_stopword.dic

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
	<comment>IK Analyzer 扩展配置</comment>
	<!--用户可以在这里配置自己的扩展字典 -->
	<entry key="ext_dict">ext.dic</entry>
	 <!--用户可以在这里配置自己的扩展停止词字典-->
	<entry key="ext_stopwords">stop.dic</entry>
	<!--用户可以在这里配置远程扩展字典 -->
	<!-- <entry key="remote_ext_dict">words_location</entry> -->
	<!--用户可以在这里配置远程扩展停止词字典-->
	<!-- <entry key="remote_ext_stopwords">words_location</entry> -->
</properties>

五 filter过滤查询

1.说明

过滤查询，其实准确来说，ES中的查询操作分为2种: 查询(query)和过滤(filter)。查询即是之前提到的 query查询，它(查询)默认会计算每个返回文档的得分，然后根据得分排序。而过滤(filter) 只会筛选出符合的文档，并不计算得分，而且它可以缓存文档。所以，单从性能考虑，过滤比查询更快。换句话说、过滤适合在大范围筛选数据，而查询则适合精确匹配数据。一般应用时，应先使用过滤操作过滤数据，然后使用查询匹配数据。

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-FQjzARao-1682299674139)(C:\Users\LiBo\AppData\Roaming\Typora\typora-user-images\image-20230419092443574.png)]

2.使用

GET /test123/_search
{
  "query": {
    "bool": {
      "must": [
        {"match_all": {}} // 查询条件
      ],
      "filter": [
        {。。。。。。}// 过滤条件
      ]
    }
  }
}

注意

在执行 filter 和 query 时,先执行 filter 在执行 query

Elasticsearch会自动缓存经常使用的过滤器，以加快性能

常见过滤类型有: term 、 terms 、ranage、exists、ids等filter。

1.term

GET /test123/_search
{
  "query": {
    "bool": {
      "must": [
        {"match_all": {}}
      ],
      "filter": [
        {
          "term": {
            "title": "真好"
          }
        }
      ]
    }
  }
}

2. terms

GET /test123/_search
{
  "query": {
    "bool": {
      "must": [
        {"match_all": {}}
      ],
      "filter": [
        {
          "terms": {
            "title": [
              "真好",
              "food"
            ]
          }
        }
      ]
    }
  }
}

3. range

GET /products/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match_all": {}
        }
      ],
      "filter": [
        {
          "range": {
            "price": {
              "gte": 5,
              "lte": 10
            }
          }
        }
      ]
    }
  }
}

4. exists

GET /test123/_search
{
  "query": {
    "bool": {
      "must": [
        {"match_all": {}}
      ],
      "filter": [
        {
          "exists": {
            "field": "title"
          }
        }
      ]
    }
  }
}

5. ids

GET /test123/_search
{
  "query": {
    "bool": {
      "must": [
        {"match_all": {}}
      ],
      "filter": [
        {
          "ids": {
            "values": [
              "1","2","3"
            ]
          }
        }
      ]
    }
  }
}

六聚合查询

简介：

聚合：英文为Aggregation Aggs，是es除搜索功能外提供的针对es数据做统计分析的功能。聚合有助于根据搜索查询提供聚合数据。聚合查询是数据库中重要的功能特性，ES作为搜索引擎兼数据库，同样提供了强大的聚合分析能力。它基于查询条件来对数据进行分桶、计算的方法。有点类似于SOL中的 group by 再加一些函数方法的操作。

注意：text类型是不支持聚合的

1. terms对字段聚合查询

#根据price聚合查询（分组查询）

GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "price_group": {
      "terms": {
        "field": "price",
        "size": 10
      }
    }
  }
}

java代码

 /**
     *
     * 1. term对字段聚合查询;2.terms
     **/
    @GetMapping("/doc/aggs/terms")
    public List<Product> aggsTerms(){
        // 查询所有
        MatchAllQueryBuilder matchAllQueryBuilder = QueryBuilders.matchAllQuery();
//        聚合查询   子聚合
        TermsAggregationBuilder field = AggregationBuilders.terms("titleGroup").field("title")
                .subAggregation(AggregationBuilders.terms("priceGroup").field("price"));
        SumAggregationBuilder buyCountSum = AggregationBuilders.sum("buyCountSum").field("price");
        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(matchAllQueryBuilder)
                .withAggregations(buyCountSum)
                .withAggregations(field)
                .build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);

        // 聚合结果
        if (search.hasAggregations()) {
            Aggregations aggregations = (Aggregations) search.getAggregations().aggregations();
            if (Objects.nonNull(aggregations)) {
                Sum sum = aggregations.get("buyCountSum");
                log.info("计算 buyCount 总数：{} ", sum.getValue());
                // 分组查询
                Terms terms = aggregations.get("titleGroup");
                terms.getBuckets().forEach(item ->{
                    System.out.println("");
                    System.out.println("主分类");
                    System.out.println(item.getKey() +": "+item.getDocCount());
                    Aggregations itemAggregations = item.getAggregations();
                    Terms priceGroup = itemAggregations.get("priceGroup");
                    priceGroup.getBuckets().forEach(priceItem -> {
                        System.out.println("子分类");
                        System.out.println(priceItem.getKey() +": "+priceItem.getDocCount());
                    });
                });

            }
        }
        // 返回结果
        List<Product> result = new ArrayList<>();
        search.forEach(item ->{ result.add(item.getContent());});
        return result;
    }

2. max 聚合查询最大值

计算最大值

GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "max_price": {
      "max": {
        "field": "price"
      }
    }
  }
}

java代码

/**
     *
     * 2.  max 聚合查询最大值
     **/
    @GetMapping("/doc/aggs/max")
    public List<Product> aggsMax(){
        // 查询所有
        MatchAllQueryBuilder queryAll = QueryBuilders.matchAllQuery();
        // 聚合最大值
        MaxAggregationBuilder maxAggregationBuilder = AggregationBuilders.max("maxPrice").field("price");
        NativeSearchQuery query = new NativeSearchQueryBuilder()
                .withQuery(queryAll)
                .withAggregations(maxAggregationBuilder)
                .build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        // 聚合结果
        if (search.hasAggregations()) {
            Aggregations aggregations = (Aggregations) search.getAggregations().aggregations();
            if (Objects.nonNull(aggregations)){
                Max max = aggregations.get("maxPrice");
                log.info("最大值为："+max.getValue());
            }
        }
        // 返回结果
        List<Product> result = new ArrayList<>();
        search.forEach(item ->{ result.add(item.getContent());});
        return result;
    }

3. min 聚合查询最小值

GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "min_price": {
      "min": {
        "field": "price"
      }
    }
  }
}

java代码

 /**
     *
     * 3.  min 聚合查询最小值
     **/
    @GetMapping("/doc/aggs/min")
    public List<Product> aggsMin(){
        MatchAllQueryBuilder queryAll = QueryBuilders.matchAllQuery();
        MinAggregationBuilder field = AggregationBuilders.min("minPtice").field("price");
        NativeSearchQuery query = new NativeSearchQueryBuilder().withQuery(queryAll).withAggregations(field).build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);

        if (search.hasAggregations()){
            Aggregations aggregations = (Aggregations)search.getAggregations().aggregations();
            if (Objects.nonNull(aggregations)){
                Min min = aggregations.get("minPtice");
                log.info("最小值为："+min.getValue());
            }
        }
        // 返回结果
        List<Product> result = new ArrayList<>();
        search.forEach(item ->{ result.add(item.getContent());});
        return result;
    }

4. sum 聚合查询求和

GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "price_sum": {
      "sum": {
        "field": "price"
      }
    }
  }
}

java代码

 /**
     *
     * 4. sum 聚合查询求和
     **/
    @GetMapping("/doc/aggs/sum")
    public List<Product> aggsSum(){
        MatchAllQueryBuilder queryAll = QueryBuilders.matchAllQuery();
        SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("sumPrice").field("price");
        NativeSearchQuery query = new NativeSearchQueryBuilder().withQuery(queryAll).withAggregations(sumAggregationBuilder).build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        if (search.hasAggregations()){
            Aggregations aggregations = (Aggregations)search.getAggregations().aggregations();
            if (Objects.nonNull(aggregations)){
                Sum price = aggregations.get("sumPrice");
                log.info("最和为："+price.getValue());
            }
        }
        // 返回结果
        List<Product> result = new ArrayList<>();
        search.forEach(item ->{ result.add(item.getContent());});
        return result;
    }

5. avg 聚合查询平均值

GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "avg_price": {
      "avg": {
        "field": "price"
      }
    }
  }
}

java代码

 /**
     *
     * 5. avg 聚合查询平均值
     **/
    @GetMapping("/doc/aggs/avg")
    public List<Product> aggsAvg(){
        MatchAllQueryBuilder queryAll = QueryBuilders.matchAllQuery();
        AvgAggregationBuilder aggregationBuilder = AggregationBuilders.avg("avgPrice").field("price");
        NativeSearchQuery query = new NativeSearchQueryBuilder().withQuery(queryAll).withAggregations(aggregationBuilder).build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        if (search.hasAggregations()){
            Aggregations aggregations = (Aggregations)search.getAggregations().aggregations();
            if (Objects.nonNull(aggregations)){
                Avg avg = aggregations.get("avgPrice");
                log.info("最小平均值为："+avg.getValue());
            }
        }
        // 返回结果
        List<Product> result = new ArrayList<>();
        search.forEach(item ->{ result.add(item.getContent());});
        return result;
    }

java项目代码地址
Aggregations aggregations = (Aggregations)search.getAggregations().aggregations();
if (Objects.nonNull(aggregations)){
Sum price = aggregations.get(“sumPrice”);
log.info(“最和为：”+price.getValue());
}
}
// 返回结果
List result = new ArrayList<>();
search.forEach(item ->{ result.add(item.getContent());});
return result;
}


****

##### 5. avg 聚合查询平均值

```http
GET /products/_search
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "avg_price": {
      "avg": {
        "field": "price"
      }
    }
  }
}

java代码

 /**
     *
     * 5. avg 聚合查询平均值
     **/
    @GetMapping("/doc/aggs/avg")
    public List<Product> aggsAvg(){
        MatchAllQueryBuilder queryAll = QueryBuilders.matchAllQuery();
        AvgAggregationBuilder aggregationBuilder = AggregationBuilders.avg("avgPrice").field("price");
        NativeSearchQuery query = new NativeSearchQueryBuilder().withQuery(queryAll).withAggregations(aggregationBuilder).build();
        SearchHits<Product> search = elasticsearchRestTemplate.search(query, Product.class);
        if (search.hasAggregations()){
            Aggregations aggregations = (Aggregations)search.getAggregations().aggregations();
            if (Objects.nonNull(aggregations)){
                Avg avg = aggregations.get("avgPrice");
                log.info("最小平均值为："+avg.getValue());
            }
        }
        // 返回结果
        List<Product> result = new ArrayList<>();
        search.forEach(item ->{ result.add(item.getContent());});
        return result;
    }

java项目代码地址

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)