Elasticsearch 8.X 使用入门以及集成python springboot
elasticsearch入门级使用教程,包含安装, 分词器 与python java的集成
一、准备环境
1.1 docker安装单节点集群
[官方文档:Install Elasticsearch with Docker | Elasticsearch Guide 8.14] | Elastic
创建网络
docker network create elastic
pull镜像
docker pull docker.elastic.co/elasticsearch/elasticsearch:8.14.1
启动容器
docker run --name es01 --net elastic -p 9200:9200 -it -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.14.3
获取密码和token
docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
验证连接
curl -k -u elastic:$ELASTIC_PASSWORD https://localhost:9200
安装kinaba
#拉取镜像
docker pull docker.elastic.co/kibana/kibana:8.14.1
#启动容器
docker run --name kib01 --net elastic -p 5601:5601 docker.elastic.co/kibana/kibana:8.14.1
#需要使用es的token
docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-create-enrollment-token -s kibana
#为kibana生成密码
docker exec -it es01 /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic
1.2 安装ik分词器-在线、离线
在线安装,后面选择和es对应的版本
bin/elasticsearch-plugin install https://get.infini.cloud/elasticsearch/analysis-ik/8.4.1
离线安装
下载https://release.infinilabs.com/analysis-ik/stable/elasticsearch-analysis-ik-8.14.1.zip ,解压后放在elasticsearch/plugins
目录,重启es。
1.3 将ik分词器打包到es镜像
创建DockerFile文件
FROM docker.elastic.co/elasticsearch/elasticsearch:8.14.1
ADD analysis-ik /usr/share/elasticsearch/plugins/analysis-ik
目录结构如下
创建镜像,在当前路径下执行
docker build -f DockerFile -t elasticsearch-ik:8.14.1 .
执行docker images
可以看到打包好的镜像在本地
1.4 安装完验证
验证环境
curl -k https://127.0.0.1:9200/_analyze --user elastic:n8LbPoUIC1xGDNL0rTrw -d '{"analyzer": "ik_max_word", "text":"水雨"}' --header "Content-Type: application/json"
验证索引
curl -k https://127.0.0.1:9200/_cat/indices/so-comp-prod?v --user elastic:GMOJWtxHKqGtcfUZwd=f
curl -k GET https://127.0.0.1:9200/_cat/indices?v --user elastic:GMOJWtxHKqGtcfUZwd=f
验证数据
curl -k https://192.168.210.106:9200/so-comp-prod/_doc/30?pretty --user elastic:n8LbPoUIC1xGDNL0rTrw
curl -X GET 'https://127.0.0.1:9200/solution_mp_01/_search' \
-k \
--user elastic:GMOJWtxHKqGtcfUZwd=f \
-H 'Content-Type: application/json' \
-d '{
"query": {
"multi_match": {
"fields": ["summary"],
"query": "解决方案",
"analyzer": "ik_smart"
}
}
}'
二、Elasticsearch常用语句
2.1 新建索引
PUT product
{
"settings": {
"analysis": {
"analyzer": {
"default": {
"type": "ik_max_word" #索引时使用的分词器
},
"default_search": {
"type": "ik_smart" #检索时使用的分词器
}
}
},
"index": {
"number_of_shards": "2",
"number_of_replicas": "0"
}
},
"mappings": {
"properties": {
"id": {"type": "long"},
"advertise": {"type": "double"},
"product": {"type": "text"},
"summary": {"type": "text"},
"function": {"type": "text", "fields": {"keyword": {"ignore_above": 256, "type": "keyword"}}} # function.keyword字段不分词
}
}
}
#查看索引的映射情况
GET product/_mapping/
2.2 普通查询 match
GET product/_search
{
"query": {
"match": {
"name": {
"query":"袜子",
"analyzer": "ik_smart"
}
}
}
}
2.3 多字段查询 multi_match
GET product/_search
{
"query": {
"multi_match": {
"fields": ["name", "summary"],
"query": "条纹",
"analyzer": "ik_smart"
}
}
}
2.4 match_phrase 提升精确率
#match 和 match_phrase结合
GET product/_search
{
"query": {
"bool":{
"must": [
{
"multi_match": {
"fields": ["name","summary"],
"query": "红色条纹袜子"
}
}
],
"should":[
{
"match_phrase":{
"summary":{
"query": "红色条纹袜子",
"slop":9
}
}
}
]
}
}
}
2.5 term 检索
GET product/_search
{
"query": {
"terms": {
"function.keyword": [ "服饰", "袜子" ],
"boost": 1.0
}
}
}
2.6 向量检索
PUT vec_demo
{
"mappings": {
"properties": {
"name_vector": {
"type": "dense_vector",
"dims": 3,
"similarity": "l2_norm"
},
"name": {
"type": "text"
}
}
}
}
POST emb_ind/_search
{
"knn": {
"field": "dense_vector",
"query_vector": [-5, 9, -12,10],
"k": 10,
"num_candidates": 100
},
"fields": [ "name", "dense_vector" ]
}
2.7 highlight
{
"query":{……},
"highlight": {
"fields":{
"name":{},
"summary":{},
"solution":{},
"requirement":{},
"function_point":{}
},
"fragment_size": 150,
"number_of_fragments": 3,
"encoder": "html",
"require_field_match": false,
"pre_tags": ["<span style='color:yellow'>"],
"post_tags": ["</span>"]
}
}
2.8 explain
GET so-comp-prod/_search
{
"explain": true, #响应中包含解释
"query": {
"match": {
"summary": {
"query":"袜子",
"analyzer": "ik_smart"
}
}
}
}
2.9 analyze
GET /_analyze
{
"analyzer": "ik_max_word",
"text":"一双红色袜子"
}
#查看文档中某一个字段的分词结果
GET /product/_analyze
{
"field" : "summary",
"text": "汽车行业"
}
2.10 更新license
在_license
接口查询license 的情况
sh-5.0$ curl -u elastic:n8LbPoUIC1xGDNL0rTrw -XGET https://localhost:9200/_license -k
{
"license" : {
"status" : "expired",
"uid" : "3c453c5c-729f-4efd-b2e6-0ec32fc9b35b",
"type" : "trial",
"issue_date" : "2024-07-17T09:51:31.596Z",
"issue_date_in_millis" : 1721209891596,
"expiry_date" : "2024-08-16T09:51:31.596Z",
"expiry_date_in_millis" : 1723801891596,
"max_nodes" : 1000,
"max_resource_units" : null,
"issued_to" : "docker-cluster",
"issuer" : "elasticsearch",
"start_date_in_millis" : -1
}
es6.X及以前的版本是需要去官网申请一个type为basic的免费license, 8.x以后的版本是直接使用以下_license/start_basic
就可以更新license
curl -u elastic:n8LbPoUIC1xGDNL0rTrw -XPOST https://localhost:9200/_license/start_basic?acknowledge=true -k -H "Content-Type:application/json"
三、集成python
3.1 python-dsl
官方文档:Elasticsearch DSL — Elasticsearch DSL 8.14.0 documentation (elasticsearch-dsl.readthedocs.io)
安装依赖 requirement
# Elasticsearch 8.x
elasticsearch-dsl>=8.0.0,<9.0.0
创建连接 connections
from elasticsearch_dsl import connections
connections.create_connection(
hosts=['https://IP:9200'],
basic_auth=('elastic','n8LbPoUIC1xGDNL0rTrw'),
alias='es',
verify_certs=False,
ssl_show_warn=False
)
# 获取连接
es_conn = connections.get_connection('es')
创建索引,插入数据 Persistence
- 方式一:先创建index, 然后插入dict
创建一个配置文件,命名es_index.py
"index": {
"settings": {
"analysis": {
"analyzer": {
"default": {
"type": "ik_max_word"
}
}
},
"index": {
"number_of_shards": "1",
"number_of_replicas": "0"
}
},
"mappings": {
"properties": {
"id": {"type": "long"},
"name": {"type": "text"},
"summary": {"type": "text"},
"solution_class": {"type": "text", "fields": {"keyword": {"ignore_above": 256, "type": "keyword"}}}
}
}
}
创建index
from es_index import index
es_conn = connections.get_connection("es")
if not es_conn.indices.exists(index='product'):
es_conn.indices.create(index='product', body=index)
#插入数据
res = es_conn.index(index='product', body=ss.to_dict(), id=doc_id)
return res['result'] # "created"或"updated"
- 方式二
创建一个继承Document的类
from datetime import datetime
from elasticsearch_dsl import Document, Date, Integer, Keyword, Text, connections
class Product(Document):
id = Integer()
name = Text(analyzer='ik_max_word', search_analyzer='ik_smart', fields={"keyword": Keyword()})
summary = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
solution_class = Text(analyzer='ik_max_word', search_analyzer='ik_smart')
tags = Keyword()
published_from = Date()
class Index:
name = 'product'
settings = { # 可选的索引设置
"number_of_shards": 1,
"number_of_replicas": 0,
}
def is_published(self):
return datetime.now() > self.published_from
创建索引并插入数据
# init mapping
Product.init(using='es')
product = Product(meta={'id':42}, id=1042,name='bowl', summary='bowl with a beautiful and cute fox in it',solution_class='home',tags='Linabell')
product.published_from = datetime.now()
# insert data
product.save(using='es')
# 查询
product = Product.get(id=42, using='es')
Search-term|match|multi_match
from elasticsearch_dsl import Search, Q, connections
from elasticsearch_dsl.query import MultiMatch
#match
q = Q({"match": {column_name: {"query": value, "analyzer": "ik_smart"}}})
#term
q = Q({"term": {column_name + ".keyword": value}})
#multi-match
q = MultiMatch(query=value, fields=column_names, analyzer='ik_smart')
s = Search(using="es", index=index_name).query(q)
res = s[start_index:end_index].execute() #分页功能,默认返回前10条
results = [ABC(**data.to_dict()) for data in res] #ABC替换成需要转换的pojo类
size = res.hits.total.value
return results, size
3.2 集成Spring
由于项目的spring版本是2.X ,所以需要单独引入8.14版本的es
添加依赖
<dependency>
<groupId>co.elastic.clients</groupId>
<artifactId>elasticsearch-java</artifactId>
<version>8.14.1</version>
</dependency>
<dependency>
<groupId>jakarta.json</groupId>
<artifactId>jakarta.json-api</artifactId>
<version>2.1.3</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.17.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.17.0</version>
</dependency>
配置ElasticsearchClient``````bean
@Bean
public ElasticsearchClient elasticsearchClient(){
# openssl x509 -fingerprint -sha256 -noout -in http_ca.crt
SSLContext sslContext = TransportUtils
.sslContextFromCaFingerprint(fingerPrint);
BasicCredentialsProvider credsProv = new BasicCredentialsProvider();
credsProv.setCredentials(
AuthScope.ANY, new UsernamePasswordCredentials(username, password)
);
RestClient client = RestClient.builder(HttpHost.create(host))
.setHttpClientConfigCallback(hc -> hc
.setSSLContext(sslContext)
.setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE)
.setDefaultCredentialsProvider(credsProv)
)
.build();
ElasticsearchTransport transport = new RestClientTransport(client,new JacksonJsonpMapper());
return new ElasticsearchClient(transport);
}
创建索引、删除索引
@Autowired
ElasticsearchClient client;
#创建索引 path 是索引文件的位置
public boolean createIndex(String index, String path, boolean force) throws IOException {
boolean isExited = exitsIndex(index);
if (!isExited) {
InputStream input = this.getClass().getClassLoader().getResourceAsStream(path);
CreateIndexRequest req = CreateIndexRequest.of(b -> b
.index(index)
.withJson(input)
);
boolean created = client.indices().create(req).acknowledged();
return created;
}
return false;
}
#删除索引
public boolean deleteIndex(String index) throws IOException {
DeleteIndexRequest request = DeleteIndexRequest.of(b -> b.index(index));
DeleteIndexResponse response = client.indices().delete(request);
boolean deleted = response.acknowledged();
logger.info("Elasticsearch delete index {} : {}" ,index, deleted);
return deleted;
}
插入数据
public String saveDoc(Product prod) throws IOException {
IndexResponse res = client.index(s -> s
.index(INDEX)
.id(prod.getId())
.document(prod)
);
return res.result().jsonValue();
}
#批量插入
public void bulkSave(List<Product> prodList) throws IOException {
BulkRequest.Builder builder = new BulkRequest.Builder();
for (Product prod : prodList){
builder.operations(op -> op
.index(idx -> idx
.index(INDEX)
.id(prod.getId())
.document(prod)
)
);
}
BulkResponse response = client.bulk(builder.refresh(Refresh.WaitFor).build());
}
查询 multi-search
public List<Product> search(List<String> fieldlist, String queryText) throws IOException {
Query query = MultiMatchQuery.of(m -> m
.fields(fieldlist)
.query(queryText)
)._toQuery();
SearchResponse<Product> search = client.search(s -> s
.index(INDEX)
.query(query)
.from(0)
.size(30)
, Product.class
);
List<Product> prodList = new ArrayList<>();
for (Hit<Product> hit : search.hits().hits()) {
Product prod = hit.source();
prodList.add(prod);
}
return prodList;
}
带权重查询
对于有推广的产品,增加一列advertise的权重,值设为2.0,普通商品设置为1.0
带权重查询
GET prod/_search
{
"query": {
"function_score": {
"query": {
"multi_match": {
"fields": ["function","summary"],
"query": "数智"
}
},
"field_value_factor": {
"field":"advertise"
}
}
}
}
public List<Product> searchWithAdvertise(List<String> fieldlist, String queryText, String weightColumn) throws IOException {
Query query = MultiMatchQuery.of(m -> m
.fields(fieldlist)
.query(queryText)
)._toQuery();
FieldValueFactorScoreFunction factor = FieldValueFactorScoreFunction.of(f -> f
.field(weightColumn)
);
FunctionScore functionScore = FunctionScore.of(s -> s
.fieldValueFactor(factor)
);
FunctionScoreQuery functionScoreQuery = FunctionScoreQuery.of(fq -> fq
.functions(functionScore)
.query(query)
);
Query query1 = Query.of(s -> s.functionScore(functionScoreQuery));
SearchResponse<Product> search = client.search(s -> s
.index(INDEX)
.query(query1)
.from(0)
.size(30)
, Product.class
);
List<Product> prodList = new ArrayList<>();
for (Hit<Product> hit : search.hits().hits()) {
Product prod = hit.source();
prodList.add(prod);
}
return prodList;
}
更多推荐
所有评论(0)