ElasticSearch基于Lucene构建的快速搜索引擎,提供restful接口,具备分布式能力。
从索引到被搜索出的延迟非常小,大约1秒
支持集群
索引是文档的集合,类似于数据库,7.0以后类似于表
shellcurl localhost:9200/_cat/indices
shellyellow open test 74pIPiJITyK1dCQ2DJa8xg 1 1 6668935 0 3.2gb 3.2gb yellow open document TgHwjYQLRq6ViREpvpYuHQ 1 1 22984 0 5.6mb 5.6mb yellow open course cvT4UoWnSJ-BMULOLR9B7A 1 1 4667 0 1.5mb 1.5mb yellow open article fOe9wIkhT3i-CBM_76Irhg 1 1 705366 7 360.2mb 360.2mb
7.0以后一个索引只可以包含一个文档类型
shellcurl localhost:9200/test/_doc/1
json{
"_index": "test",
"_type": "_doc",
"_id": "1",
"_version": 1,
"_seq_no": 164,
"_primary_term": 1,
"found": true,
"_source": {
"explain": "“预付账款”、“应收账款”、“应收票据”均属于企业的流动资产,而“预收账款”属于企业的流动负债。",
"created_at": 1466179541,
"deleted": false,
"type": 1,
"rank": 1,
"title": "下列不属于流动资产的是( )。",
"score": null,
"pid": 1,
"id": 1,
"answer": "A",
"refid": 52000409,
"eid": 1,
"import_id": null,
"parent": null,
"updated_at": 1466179541,
"@version": "1",
"@timestamp": "2021-08-25T03:30:50.678Z",
"tags": ["test"]
}
}
是被索引的基础数据单元
提供水平分割数据的能力,可以将一个索引分布式存储在多个机器上
shell# 集群健康状态检查,接口改成了post请求,具体传参未知
curl -XGET "localhost:9200/_cat/heath?v"
# 查看所有nodes
curl -XGET "localhost:9200/_cat/nodes?v"
# 查看所有索引
curl -XGET "localhost:9200/_cat/indices?v"
# 创建索引
curl -XPUT "localhost:9200/index_test"
# 创建索引mapping
curl -XPUT 'localhost:9200/index_test/_mapping/test_type' -d ''
# 新建文档
curl -XPUT 'localhost:9200/index_test/test_type/1' -d ''
yamlversion: '2'
services:
elasticsearch:
image: elasticsearch:7.13.3
container_name: elasticsearch
volumes:
- ./data:/usr/share/elasticsearch/data
- ./plugins:/usr/share/elasticsearch/plugins
- ./config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml
environment:
- discovery.type=single-node
ports:
- "9200:9200"
restart: always
logstash:
image: logstash:7.13.3
container_name: logstash
volumes:
- ./logstash/logstash.yml:/usr/share/logstash/config/logstash.yml:ro
- ./logstash/logstash.conf:/usr/share/logstash/config/logstash.conf
- ./logstash/meta:/usr/share/logstash/meta
- ./logstash/mysql-connector-java-5.1.48.jar:/usr/share/logstash/config/mysql-connector-java-5.1.48.jar
links:
- elasticsearch
command: logstash -f /usr/share/logstash/config/logstash.conf
depends_on:
- elasticsearch
yamlbootstrap.memory_lock: false
cluster.name: "es-server"
node.name: node-1
node.master: true
node.data: true
network.host: 0.0.0.0
http.port: 9200
path.logs: /usr/share/elasticsearch/logs
http.cors.enabled: true
http.cors.allow-origin: "*"
xpack.security.audit.enabled: true
jsoninput {
stdin {
}
jdbc {
# mysql 数据库链接, laixue_test为数据库名
jdbc_connection_string => "jdbc:mysql://localhost:3306/database?charset=utf8"
# 用户名和密码
jdbc_user => "root"
jdbc_password => "root"
# 驱动
jdbc_driver_library => "/usr/share/logstash/config/mysql-connector-java-5.1.48.jar"
# 驱动类名
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
jdbc_default_timezone => "Asia/Shanghai"
# 执行的sql created_at设置只是为了减少服务压力
statement => "select * from article where published_at > :sql_last_value"
# 设置监听间隔 各字段含义(由左至右)分、时、天、月、年,全部为*默认含义为每分钟都更新
schedule => "* */2 * * *"
clean_run => false
use_column_value => true
tracking_column => "published_at"
tracking_column_type => numeric
record_last_run => true
last_run_metadata_path => "/usr/share/logstash/meta/article_last_run.txt"
tags => ["article"]
}
jdbc {
# mysql 数据库链接, laixue_test为数据库名
jdbc_connection_string => "jdbc:mysql://localhost:3306/database?charset=utf8"
# 用户名和密码
jdbc_user => "root"
jdbc_password => "root"
# 驱动
jdbc_driver_library => "/usr/share/logstash/config/mysql-connector-java-5.1.48.jar"
# 驱动类名
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
jdbc_default_timezone => "Asia/Shanghai"
# 执行的sql created_at设置只是为了减少服务压力
statement => "select * from course where id > :sql_last_value"
# 设置监听间隔 各字段含义(由左至右)分、时、天、月、年,全部为*默认含义为每分钟都更新
schedule => "*/30 * * * *"
clean_run => false
use_column_value => true
tracking_column => "id"
tracking_column_type => numeric
record_last_run => true
last_run_metadata_path => "/usr/share/logstash/meta/course_last_run.txt"
tags => ["course"]
}
jdbc {
# mysql 数据库链接, laixue_test为数据库名
jdbc_connection_string => "jdbc:mysql://localhost:3306/database?charset=utf8"
# 用户名和密码
jdbc_user => "root"
jdbc_password => "root"
# 驱动
jdbc_driver_library => "/usr/share/logstash/config/mysql-connector-java-5.1.48.jar"
# 驱动类名
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
jdbc_default_timezone => "Asia/Shanghai"
# 执行的sql created_at设置只是为了减少服务压力
statement => "select * from document where id > :sql_last_value"
# 设置监听间隔 各字段含义(由左至右)分、时、天、月、年,全部为*默认含义为每分钟都更新
schedule => "* */1 * * *"
clean_run => false
use_column_value => true
tracking_column => "id"
tracking_column_type => numeric
record_last_run => true
last_run_metadata_path => "/usr/share/logstash/meta/document_last_run.txt"
tags => ["document"]
}
jdbc {
# mysql 数据库链接, laixue_test为数据库名
jdbc_connection_string => "jdbc:mysql://localhost:3306/database?charset=utf8"
# 用户名和密码
jdbc_user => "root"
jdbc_password => "root"
# 驱动
jdbc_driver_library => "/usr/share/logstash/config/mysql-connector-java-5.1.48.jar"
# 驱动类名
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
jdbc_default_timezone => "Asia/Shanghai"
# 执行的sql created_at设置只是为了减少服务压力
statement => "select * from test where id > :sql_last_value"
# 设置监听间隔 各字段含义(由左至右)分、时、天、月、年,全部为*默认含义为每分钟都更新
schedule => "*/30 * * * *"
clean_run => false
use_column_value => true
tracking_column => "id"
tracking_column_type => numeric
record_last_run => true
last_run_metadata_path => "/usr/share/logstash/meta/test_last_run.txt"
tags => ["test"]
}
}
filter {
json {
source => "message"
remove_field => ["message"]
}
}
output {
if "document" in [tags] {
elasticsearch {
# ES的IP地址及端口
hosts => ["elasticsearch:9200"]
# 索引名称
index => "document"
# 自增ID id必须是待查询的数据表的序列字段
document_id => "%{id}"
}
}
if "article" in [tags] {
elasticsearch {
# ES的IP地址及端口
hosts => ["elasticsearch:9200"]
# 索引名称
index => "article"
# 自增ID id必须是待查询的数据表的序列字段
document_id => "%{id}"
}
}
if "course" in [tags] {
elasticsearch {
# ES的IP地址及端口
hosts => ["elasticsearch:9200"]
# 索引名称
index => "course"
# 自增ID id必须是待查询的数据表的序列字段
document_id => "%{id}"
}
}
if "test" in [tags] {
elasticsearch {
# ES的IP地址及端口
hosts => ["elasticsearch:9200"]
# 索引名称
index => "test"
# 自增ID id必须是待查询的数据表的序列字段
document_id => "%{id}"
}
}
stdout {
# JSON格式输出
codec => json_lines
}
}
shell# 简单查询
curl localhost:9200/_search?q=2021
curl localhost:9200/test/_search?q=2021
curl localhost:9200/test/_search?q=explain:2021&sort=id:desc
curl localhost:9200/test/_search?q=explain:计算
json{
"took": 12,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 24.136868,
"hits": [{
"_index": "test",
"_type": "_doc",
"_id": "6420437",
"_score": 24.136868,
"_source": {
"explain": "当甲状腺次全切除术后,患者出现手足抽搐是因手术时误伤及甲状旁腺或其血液供给受累所致。神经肌肉的应激性显著增高,多在术后1〜3天出现手足抽搐,抽搐发作时,应立即静脉注射10%葡萄:®酸钙或氯化钙10〜20ml。其他几种措施起效慢,不适合紧急处理。",
"created_at": 1618392032,
"deleted": false,
"type": 846,
"rank": 6420437,
"title": "甲状腺次全切除术后,患者出现手足抽搐发作时,最便捷而有效的治疗是",
"score": null,
"pid": 80975,
"id": 6420437,
"answer": "A",
"refid": 0,
"eid": 187,
"import_id": 34809,
"parent": null,
"updated_at": 1618392032,
"@version": "1",
"@timestamp": "2021-08-25T04:19:57.269Z",
"tags": ["test"]
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "333190",
"_score": 7.928772,
"_source": {
"explain": "1.当可燃性气体、蒸气或可燃粉尘与空气(或氧)在一定浓度范围内均匀混合,遇到火源发生爆炸的浓度范围称为爆炸极限。可燃性气体、蒸气或粉尘在爆炸极限范围内遇到热源(明火或温度),火焰瞬间传播于整个混合气体(或混合粉尘)空间,化学反应速度极快,同时释放大量的热,生成很多气体,气体受热膨胀,形成很高的温度和很大的压力,具有很强的破坏力。\r\n2.氧化反应中的有些氧化剂本身是强氧化剂,如高锰酸钾(如A项)、氯酸钾、过氧化氢、过氧化苯甲酰、硝酸铵(如B项)等,具有很大的危险性,如受高温、撞击、摩擦或与有机物、酸类接触,易引起燃烧或爆炸;易燃物品(如c、E两项),其蒸气与空气可形成爆炸性混合物,遇明火、高热能引起燃烧爆炸。\r\n3.C项属于管理措施;E项属于行政措施。\r\n4.①人身伤亡后所支出的费用,如医疗费用(含护理费用)、丧葬及抚恤费用、补助及救济费用、歇工工资。②善后处理费用,如处理事故的事务性费用、现场抢救费用、清理现场费用、事故罚款和赔偿费用。®财产损失价值,如固定资产损失价值、流动资产损失价值。\r\n5.①单元内存在的危险物质为单一品种,则该物质的数量即为单元内危险物质的总量,若等于或超过相应的临界{} 计算,若满足计算公式则定为重大危险源。故仓储区构成重大危险源。\r\n6.仓储区4号库房存放硫化钠10t、过硫酸铵40t、高锰酸钾10t、硝酸铵130t、洗衣粉50t,其中,混存的过硫酸铵与硫化钠(强氧化剂与还原剂)因接触而发生激烈的氧化还原反应,形成热积聚,导致起火燃烧。\r\n7.未作答参考解析:高锰酸钾,又称过锰酸钾,它属强氧化剂,在酸性条件下氧化性更强,可以用做消毒剂和漂白剂,和强还原性物质反应会褪色。",
"created_at": 1466680682,
"deleted": false,
"type": 269,
"rank": 333190,
"title": "某储运公司仓储区占地300 m×300 m,共有8个库房,原用于存放一般货物。3年前,该储运公司未经任何技术改造和审批,擅自将1号、4号和6号库房改存危险化学品。\r\n仓储区4号库房内首先发生爆炸,12 min后,6号库房也发生了爆炸,爆炸引发了火灾,火势越来越大,之后相继发生了几次小规模爆炸。消防队到达现场后,发现消火栓不出水,消防蓄水池没水,随后在1 km外找到取水点,并立即展开灭火抢险救援行动。\r\n事故发生前,1号库房存放双氧水5 t;4号库房存放硫化钠10 t、过硫酸铵40 t、高锰酸钾10 t、硝酸铵130 t、洗衣粉50 t;6号2008年3月14 日12 时18分,库房存放硫磺15 t、甲苯4 t、甲酸乙酯10 t、事故导致15人死亡、36人重伤、近万人疏散,烧损、炸毁建筑物39 000 m[~2.gif]和大量化学物品等,直接经济损失1.2亿元。",
"score": null,
"pid": 5756,
"id": 333190,
"answer": "",
"refid": 8000010,
"eid": 69,
"import_id": null,
"parent": null,
"updated_at": 1561108744,
"@version": "1",
"@timestamp": "2021-08-25T03:33:23.186Z",
"tags": ["test"]
}
}]
}
}
shell# term和terms查询
# term:查询某个字段里含有某个关键词的文档
# terms:查询某个字段里含有多个关键词的文档
# term
curl -XPOST -H 'Content-type:application/json' localhost:9200/test/_search -d '
{
"query":{"term":{ "explain":"计算"}}
}'
# terms
curl -XPOST -H 'Content-type:application/json' localhost:9200/test/_search -d '
{
"query":{"terms":{ "title":["计算", "成本"]}}
}'
shell# match 查询
# match查询会使用分词器
# match_all, multi_match, match_phrase
curl -XPOST -H 'Content-type:application/json' localhost:9200/test/_search -d '
{
"query":{"match":{ "explain":"计算"}}
}'
curl -XPOST -H 'Content-type:application/json' localhost:9200/article/_search -d '
{
"query":{"match":{ "title":"初级会计什么时候考试"}}
}'
# multi_match
curl -XPOST -H 'Content-type:application/json' localhost:9200/test/_search -d '
{
"_source": ["id", "title"],
"query":{
"multi_match":{
"query":"计算",
"fields": ["title", "explain"]
}
}
}'
json// article search
{
"took": 121,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": 21.627768,
"hits": [{
"_index": "article",
"_type": "_doc",
"_id": "215286",
"_score": 21.627768,
"_source": {
"author": "作者",
"comment_allowed": false,
"acid": 6,
"source": "网站",
"admin_id": null,
"id": 215286,
"published_at": 1584334868,
"flag": "",
"@version": "1",
"shared": 0,
"updated_at": 1584334868,
"jump_url": "",
"@timestamp": "2021-08-25T04:00:46.667Z",
"tags": ["article"],
"created_at": 1584334868,
"area": 1,
"keywords": "关键字",
"rank": 215286,
"title": "2020年初级会计考试时间什么时候进行",
"brief": "很多考生咨询今年5月份初会考试是否也会受疫情影响而推迟?本站小编提示“2020年初级会计职称考试时间什么时候进行”很多处于3-5月份考试均已推迟,请考生随时关注新的初级会计考试政策。\r\n\r\n考试时间\r\n\r\n2020初级会计考试于2",
"published": true,
"thumbnail": null,
"cid": 4,
"is_included": null,
"is_pushed": true,
"is_included_at": null,
"read": 183
}
}, {
"_index": "article",
"_type": "_doc",
"_id": "76686",
"_score": 21.62196,
"_source": {
"author": "作者",
"comment_allowed": false,
"acid": 6,
"source": "网站",
"admin_id": null,
"id": 76686,
"published_at": 1534299399,
"flag": "",
"@version": "1",
"shared": 0,
"updated_at": 1534299399,
"jump_url": "",
"@timestamp": "2021-08-25T04:00:19.159Z",
"tags": ["article"],
"created_at": 1533883100,
"area": 1,
"keywords": "",
"rank": 76686,
"title": "2019年初级会计证什么时候报名?",
"brief": "",
"published": false,
"thumbnail": null,
"cid": 4,
"is_included": null,
"is_pushed": false,
"is_included_at": null,
"read": 1
}
}]
}
}
json// multi_match
{
"took": 36,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": 12.341715,
"hits": [{
"_index": "test",
"_type": "_doc",
"_id": "267958",
"_score": 12.341715,
"_source": {
"id": 267958,
"title": "一台计算机连入计算机网络后,该计算机( )。"
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "3345285",
"_score": 12.1339855,
"_source": {
"id": 3345285,
"title": "按成本计算估价法计算利润的计算基数包括()"
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "7879",
"_score": 12.036486,
"_source": {
"id": 7879,
"title": "甲计算机的主频快,乙计算机的主频慢,甲计算机的运算速度也必定比乙计算机的运算速度快。( )"
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "2714263",
"_score": 12.007196,
"_source": {
"id": 2714263,
"title": "工程成本核算的依据是()。A.会计核算和统计核算B.统计核算和业务核算C.业务核算和资金核算D.资金核算和会计核算"
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "2294959",
"_score": 11.933133,
"_source": {
"id": 2294959,
"title": "进行疲劳验算时,计算部分的设计应力幅应按( )计算。"
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "238620",
"_score": 11.783192,
"_source": {
"id": 238620,
"title": "潮流计算常用的计算方法是( )。"
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "709734",
"_score": 11.783192,
"_source": {
"id": 709734,
"title": "第三代电子计算机是( )计算机。"
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "157993",
"_score": 11.783192,
"_source": {
"id": 157993,
"title": "计算设备原价时,一般按( )计算。"
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "234816",
"_score": 11.783192,
"_source": {
"id": 234816,
"title": "短路电流计算一般需要计算( )。"
}
}, {
"_index": "test",
"_type": "_doc",
"_id": "244612",
"_score": 11.783192,
"_source": {
"id": 244612,
"title": "潮流计算常用的计算方法是( )。"
}
}]
}
}
shell# range 查询
# from, to, include_lower,include_upper, boost
curl -XPOST -H 'Content-type:application/json' localhost:9200/article/_search -d '
{
"query": {
"range": {
"published_at": {
"from": "1627747200",
"to": "1630425600",
"include_lower": true,
"include_upper": false
}
}
}
}'
shell# wildcard
允许使用通配符* 和 ?来进行查询
*代表0个或多个字符
?代表任意一个字符
# fuzzy
fuzzy 查询是 term 查询的模糊等价。
a、是 包含(contains) 操作,而非 等值(equals) (判断)。
b、不知道分词器的存在,所以不会去分词,
c、所谓的包含是文档分词结果某个分词是否包含,不是整个文档是否包含
d、因为是在分词结果中匹配,所以大写要转换为小写,大写字母是匹配不到
# 高亮搜索结果
curl -XPOST -H 'Content-type:application/json' localhost:9200/article/_search -d '
{
"from": 0,
"size": 1,
"query": {
"match": {
"title": "初级会计什么时候考试"
}
},
"highlight": {
"fields": {
"title": {}
}
}
}'
json// 高亮搜索结果
{
"took": 97,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": 21.627768,
"hits": [{
"_index": "article",
"_type": "_doc",
"_id": "215286",
"_score": 21.627768,
"_source": {
"author": "作者",
"comment_allowed": false,
"acid": 6,
"source": "网站",
"admin_id": null,
"id": 215286,
"published_at": 1584334868,
"flag": "",
"@version": "1",
"shared": 0,
"updated_at": 1584334868,
"jump_url": "",
"@timestamp": "2021-08-25T04:00:46.667Z",
"tags": ["article"],
"created_at": 1584334868,
"area": 1,
"keywords": "关键字",
"rank": 215286,
"title": "2020年初级会计考试时间什么时候进行",
"brief": "很多考生咨询今年5月份初会考试是否也会受疫情影响而推迟?本站小编提示“2020年初级会计职称考试时间什么时候进行”很多处于3-5月份考试均已推迟,请考生随时关注新的初级会计考试政策。\r\n\r\n考试时间\r\n\r\n2020初级会计考试于2",
"published": true,
"thumbnail": null,
"cid": 4,
"is_included": null,
"is_pushed": true,
"is_included_at": null,
"read": 183
},
"highlight": {
"title": ["2020年<em>初</em><em>级</em><em>会</em><em>计</em><em>考</em><em>试</em><em>时</em>间<em>什</em><em>么</em><em>时</em><em>候</em>进行"]
}
}]
}
}
本文作者:谭三皮
本文链接:
版权声明:本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!