From 85bcdc475ae382b33bb9da85cdd4a6c2ce108f13 Mon Sep 17 00:00:00 2001 From: Denis Zheleztsov Date: Sun, 11 Nov 2018 20:00:09 +0300 Subject: [PATCH] [utils/reassign_topics]: Get docs method --- create_index.sh | 31 ------------ mapping.json | 84 ++++++++++++++++++++++++++++++++ utils/reassign_topics/elastic.go | 39 +++++++++++++-- utils/reassign_topics/main.go | 5 ++ 4 files changed, 125 insertions(+), 34 deletions(-) delete mode 100755 create_index.sh create mode 100644 mapping.json diff --git a/create_index.sh b/create_index.sh deleted file mode 100755 index 08eacb8..0000000 --- a/create_index.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/sh -curl -XDELETE 'http://localhost:9200/iinetwork' && echo -curl -XPUT 'http://localhost:9200/iinetwork' -d '{ - "settings": { - "analysis": { - "analyzer": { - "my_analyzer": { - "type": "custom", - "tokenizer": "standard", - "filter": ["lowercase", "russian_morphology", "english_morphology", "my_stopwords"] - } - }, - "filter": { - "my_stopwords": { - "type": "stop", - "stopwords": "а,без,более,бы,был,была,были,было,быть,в,вам,вас,весь,во,вот,все,всего,всех,вы,где,да,даже,для,до,его,ее,если,есть,еще,же,за,здесь,и,из,или,им,их,к,как,ко,когда,кто,ли,либо,мне,может,мы,на,надо,наш,не,него,нее,нет,ни,них,но,ну,о,об,однако,он,она,они,оно,от,очень,по,под,при,с,со,так,также,такой,там,те,тем,то,того,тоже,той,только,том,ты,у,уже,хотя,чего,чей,чем,что,чтобы,чье,чья,эта,эти,это,я,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of,on,or,such,that,the,their,then,there,these,they,this,to,was,will,with" - } - } - } - } -}' && echo -curl -XPUT 'http://localhost:9200/iinetwork/post/_mapping' -d '{ - "post": { - "_all" : {"analyzer" : "russian_morphology"}, - "properties" : { - "post" : { "type" : "string", "analyzer" : "russian_morphology" } - } - } -}' && echo - -curl -XPOST 'http://localhost:9200/iinetwork/_refresh' && echo diff --git a/mapping.json b/mapping.json new file mode 100644 index 0000000..53d788c --- /dev/null +++ b/mapping.json @@ -0,0 +1,84 @@ +{ + "mappings": { + "post": { + "properties": { + "address": { + "type": "keyword" + }, + "author": { + "type": "keyword" + }, + "date": { + "type": "date", + "format": "epoch_second" + }, + "echo": { + "type": "keyword" + + }, + "message": { + "type": "text", + "fields": { + "russian": { + "type": "text", + "analyzer": "russian" + } + }, + "analyzer": "standard" + }, + "misplaced": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + + } + + } + + }, + "msgid": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + + } + + } + + }, + "repto": { + "type": "keyword" + }, + "subg": { + "type": "text", + "fields": { + "russian": { + "type": "text", + "analyzer": "russian" + + } + + }, + "analyzer": "standard" + + }, + "tags": { + "type": "keyword" + + }, + "to": { + "type": "keyword" + }, + "topicid": { + "type": "keyword" + } + + } + + } + } +} diff --git a/utils/reassign_topics/elastic.go b/utils/reassign_topics/elastic.go index 21bfa59..2111a6a 100644 --- a/utils/reassign_topics/elastic.go +++ b/utils/reassign_topics/elastic.go @@ -5,6 +5,7 @@ import "fmt" import "net/http" import log "github.com/Sirupsen/logrus" import "encoding/json" +import "strings" type Conf struct { ES i2es.ESConf @@ -15,7 +16,7 @@ type Stats struct { Indices IndexStats `json:"indices"` } -type IndexStats map[string]interface{} +type IndexStats map[string]map[string]interface{} // "indices": { // "idec": { @@ -45,7 +46,39 @@ func (c *Conf) getDocsCount() int64 { var stats Stats err = json.NewDecoder(resp.Body).Decode(&stats) - log.Infof("%+v", stats) + if err != nil { + log.Error(err) + return -1 + } - return -1 + return int64(stats.Indices[c.ES.Index]["primaries"].(map[string]interface{})["docs"].(map[string]interface{})["count"].(float64)) +} + +func (c *Conf) getDocs(from int) i2es.ESRes { + var res i2es.ESRes + reqURL := fmt.Sprintf("%s/%s/_search", c.ES.Host, c.ES.Index) + query := fmt.Sprintf(`{"sort": {"date": {"order": "asc"}}, "size": %d, "from": %d}`, c.Step, 0) + req, err := http.NewRequest("POST", reqURL, strings.NewReader(query)) + if err != nil { + log.Error(err) + return res + } + req.Header.Add("Content-Type", "application/json") + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + log.Error(err) + return res + } + + defer resp.Body.Close() + + err = json.NewDecoder(resp.Body).Decode(&res) + if err != nil { + log.Error(err) + return res + } + + return res } diff --git a/utils/reassign_topics/main.go b/utils/reassign_topics/main.go index 875a74b..559ce76 100644 --- a/utils/reassign_topics/main.go +++ b/utils/reassign_topics/main.go @@ -33,4 +33,9 @@ func main() { count := conf.getDocsCount() fmt.Println(count) + + for i := 0; int64(i) < count; i += step { + docs := conf.getDocs(i) + fmt.Printf("%d\n", len(docs.Hits.Hits)) + } }