Pattern Tokenizer(模式分词器)
输出示例
POST _analyze
{
"tokenizer": "pattern",
"text": "The foo_bar_size's default is 5."
}[ The, foo_bar_size, s, default, is, 5 ]配置
配置示例
Last updated
POST _analyze
{
"tokenizer": "pattern",
"text": "The foo_bar_size's default is 5."
}[ The, foo_bar_size, s, default, is, 5 ]Last updated
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "pattern",
"pattern": ","
}
}
}
}
}
POST my_index/_analyze
{
"analyzer": "my_analyzer",
"text": "comma,separated,values"
}[ comma, separated, values ]"((?:\\"|[^"]|\\")*)"\"((?:\\\\\"|[^\"]|\\\\\")+)\"PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "pattern",
"pattern": "\"((?:\\\\\"|[^\"]|\\\\\")+)\"",
"group": 1
}
}
}
}
}
POST my_index/_analyze
{
"analyzer": "my_analyzer",
"text": "\"value\", \"value with embedded \\\" quote\""
}[ value, value with embedded \" quote ]