ELK Elasticsearch 笔记（四）信息统计分析与搜索提示

2023-05-16

Aggregations

aggregations可以看做是对查询结构的二次汇总，比如先查询出某个时间段的HTTP请求，然后统计每天的数据。在aggregations中会用到“桶”(buckets)的概念。所谓的“桶”，是满足某个条件的文档集合，他和关系型数据库中的sql语句中的group by 子句的作用相似（但又不一样）。另外，metrics(测度)是为摸个桶中的文档计算得到的统计信息，类似于sql中的count()、max()等。可见，aggregations聚合是有一个或多个buckets、零个或者多喝metrics组合而成的统计结果。每个文档中的值会被计算来决定它们是否匹配了某些buckets的条件，如匹配成功，那么该文档会被植入该buckets中。

最值、求和、均值统计

GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "min_size":{ // agg名称
      "min": {
        "field": "price"
      }
    },
    "max_size":{ // agg名称
      "max": {
        "field": "price"
      }
    },
    "avg_val":{ // agg名称
      "avg": {
        "field": "price"
      }
    }
  }
}

返回的结果：

Stats Aggregation 及 Extended Stats Aggregations

GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "multi_stats_size":{ // agg名称 自定义
      "stats": {
        "field": "price"
      }
    },
    "extended_stats_of_size":{ // agg名称 自定义
      "extended_stats": {
        "field": "price"
      }
    }
  }
}

返回结果：

Tarms Aggregations 用于对指定字段的内容进行分布统计

// group by brandName
GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "brand_of_aggs":{
      "terms": {
        "field": "brandName"
      }
    }
  }
}

//
GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": { //外层aggs
    "brand_of_aggs":{
      "terms": {
        "field": "brandName",
        "size": 5,
        "order": {
          "avg_agg": "desc"
        }
      },
      "aggs": { //内层aggs
        "avg_agg": { //名称
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

返回结果1 ：返回结果2:

// group by brandName
GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "brand_of_aggs":{
      "terms": {
        "field": "brandName",
        "script": "'BrandName:'+_value", 
        "min_doc_count": 3,   // 相当于having  >=3
        //"include": "BrandName:万和",  //包含
        "exclude": "BrandName:万和"     //不包含 
      }
    }
  }
}

Range Aggregations 范围统计

// 1
GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "price_rang_agg":{              //名称
      "range": {
        "field": "price",
        "ranges": [
          {
            "key": "small", 
            "to": 2200              // <2200
          },
          {
            "key": "medium",        //自定义key
            "from": 2200,           // 2200~3000
            "to": 3000
          },
          {
            "key": "medium",        //自定义key
            "from": 3000            // >3000
          }
        ]
      }
    }
  }
}

// 2
GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "price_rang_agg":{
      "range": {
        "field": "price",
        "ranges": [
          {
            "key": "small", 
            "to": 2200
          },
          {
            "key": "medium", 
            "from": 2200,
            "to": 3000
          },
          {
            "key": "large", 
            "from": 3000
          }
        ]
      },
      "aggs": { //内层统计
        "price_stat": { //内层统计名称
          "stats": {
            "field": "price"
          }
        }
      }
    }
  }
}

Date_range Aggregations 专门对于时间类型字段进行区间统计的

GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "name_Aggs":{                         //统计结果名称
      "date_range": {                     //统计函数 date_range aggregations
        "field": "时间字段",               //用于统计的字段
        "format": "yyyy/MM/dd",           //转换显示的格式
        "ranges": [
          //{
          //  "from": "now-10d/d",          // 统计范围：当前日期的前10天
          //  "to": "now"                   // 今天
          //}
          {
             "to":"now-10d/d"              // 从最远点到当前日期的前10天
          },
          {
             "from": "now-10d/d"           //从当前日期的前十天到现在
          }
        ]
      }
    }
  }
}

//返回结果：
aggregations:{
    "name_Aggs":{
        "buckets":[
            {
                "key":"*-2020/04/20",
                "to": "2020/04/20的时间戳",
                "to_as_string":"2020/04/20",
                "doc_count":15297
            },{
                "key":"2020/04/20-*",
                "from": "2020/04/20的时间戳",
                "from_as_string":"2020/04/20",
                "doc_count":5883
            }
        ]
    }
}

Histogram Aggregations 是一种可以根据其返回值（针对数值型或日期行的字段）生成将来可用于柱状图的聚合数据。

GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "price_Aggs":{
      "histogram": {
        "field": "price",         //统计字段
        "interval": 1000,         //间隔
        "order": {                //排序规则
          "size_stat.max": "desc"
        }
      },
      "aggs": {            //内层aggs
        "size_stat": {
          "stats": {
            "field": "price"
          }
        }
      }
    }
  }
}

Date_histogram Aggregations 是一个增强型的专门针对日期型字段统计的histogram aggregation，它允许使用year、month、day、hour、minute等常量作为interval属性的取值。

GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "time_aggs":{
     "date_histogram": {
       "field": "日期字段",
       "interval": "month",
       "format": "yyyy-MM-dd"
     },
     //"aggs": { 
     //  "NAME": {
     //    "AGG_TYPE": {}
     //  }
     }
    }
  }
}

Filter Aggregations 类似于sql中的 where子句的作用，可以为当前文档集合定义一个过滤条件来聚焦现有的数据集。凡满足定义的过滤条件的文档都会被置入这个桶bucket中。

GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "price_agg":{
      "filter": {
        "range": {
          "price": {
            "gte": 3000,
            "lte": 4000
          }
        }
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

Missing Aggregations 同来统计缺少指定字段的文档个数，也称为缺值统计。

GET /pms/_search?pretty
{
  "query": {
    "match_all": {}
  },
  "aggs": {
    "without_name":{
      "missing": {
        "field": "brandId"
      }
    }
  }
}

搜索提示

在很多大型搜索网站中都提供搜索提示功能，这将提升用户的搜索体验。

官方文档地址： https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters.html#completion-suggester

首先，建立索引

PUT test3   //建立空索引
{}

PUT test3/_mapping  // 修改索引
{
  "properties":{
    "name":{
      "type" : "completion",
      "analyzer": "ik_max_word",
      "search_analyzer": "ik_max_word"
    },
    "desc" : {
        "type": "text",
        "analyzer": "ik_max_word",
        "search_analyzer": "ik_max_word"
    }
  }
}

插入数据

PUT test3/_doc/cn
{
  "name":"中国",
  "desc":"中华人民共和国"
}
PUT test3/_doc/zf
{
  "name":"中非",
  "desc":"中华共和国"
}
PUT test3/_doc/usa
{
  "name":"美国",
  "desc":"美利坚合众国"
}

测试自动补全

POST test3/_search?pretty
{
  "suggest": {
    "my-suggest": {
      "prefix":"中",
      "completion":{
        "field":"name"
      }
    }
  }
}

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)