diff --git b/credit/EsCountDay.py a/credit/EsCountDay.py new file mode 100644 index 0000000..48d84dc --- /dev/null +++ a/credit/EsCountDay.py @@ -0,0 +1,37 @@ +import requests +import sys +import json + + +dt=sys.argv[1] +index=sys.argv[2] +from requests.structures import CaseInsensitiveDict + +url = "http://172.16.25.23:9200/"+index+"/doc/_search" +data = { + "query": { + "bool": { + "must": [ + { + "range": { + "gxsj.keyword": { + "gte": "2021-07-01 00:00:00", + "lte": "2021-07-01 00:59:59" + } + } + } + ] + } + } + } +data['query']['bool']['must'][0]['range']['gxsj.keyword']['gte'] = dt + " 00:00:00" +data['query']['bool']['must'][0]['range']['gxsj.keyword']['lte'] = dt + " 23:59:59" +headers = CaseInsensitiveDict() +headers["Content-Type"] = "application/json" + + +resp = requests.get(url, headers=headers,data = json.dumps(data)) + +json_data = json.loads(resp.text) +all_data = json_data["hits"] +print('%s %d'%(dt,all_data["total"])) diff --git b/credit/EsCountMonth.py a/credit/EsCountMonth.py new file mode 100644 index 0000000..1484149 --- /dev/null +++ a/credit/EsCountMonth.py @@ -0,0 +1,35 @@ +import requests +import sys +import json + +dt = sys.argv[1] +index = sys.argv[2] +from requests.structures import CaseInsensitiveDict + +url = "http://172.16.25.23:9200/" + index + "/doc/_search" +data = { + "query": { + "bool": { + "filter": [ + { + "range": { + "gxsj.keyword": { + "gte": "2021-04-01 00:00:00", + "lte": "2021-04-01 00:59:59" + } + } + } + ] + } + } +} +data['query']['bool']['filter'][0]['range']['gxsj.keyword']['gte'] = dt + "-01 00:00:00" +data['query']['bool']['filter'][0]['range']['gxsj.keyword']['lte'] = dt + "-31 23:59:59" +headers = CaseInsensitiveDict() +headers["Content-Type"] = "application/json" + +resp = requests.get(url, headers=headers, data=json.dumps(data)) + +json_data = json.loads(resp.text) +all_data = json_data["hits"] +print('%s %d\n' % (dt, all_data["total"])) diff --git b/credit/hour_python.py a/credit/hour_python.py new file mode 100644 index 0000000..348090e --- /dev/null +++ a/credit/hour_python.py @@ -0,0 +1,38 @@ +import requests +import sys +import json + + +dt=sys.argv[1] +index=sys.argv[2] +from requests.structures import CaseInsensitiveDict + +url = "http://172.16.25.23:9200/"+index+"/doc/_search" +data = { + "size":10000, + "query": { + "bool": { + "filter": [ + { + "range": { + "gxsj.keyword": { + "gte": "2021-04-01 00:00:00", + "lte": "2021-04-01 00:59:59" + } + } + } + ] + } + } + } +data['query']['bool']['filter'][0]['range']['gxsj.keyword']['gte'] = dt + ":00:00" +data['query']['bool']['filter'][0]['range']['gxsj.keyword']['lte'] = dt + ":59:59" +headers = CaseInsensitiveDict() +headers["Content-Type"] = "application/json" + + +resp = requests.get(url, headers=headers,data = json.dumps(data)) + +json_data = json.loads(resp.text) +all_data = json_data["hits"] +print('%s %d'%(dt+":00:00",all_data["total"])) diff --git b/credit/schedure_2.sh a/credit/schedure_2.sh new file mode 100644 index 0000000..eee081a --- /dev/null +++ a/credit/schedure_2.sh @@ -0,0 +1,15 @@ +sdt=`cat /home/Wangkai/$2"_date.txt"|awk -F, '{print $1}'` +edt=`cat /home/Wangkai/$2"_date.txt"|awk -F, '{print $2}'` +status=$sdt"->"$edt":"$2"数据更新完成!" +if [[ `grep -c $status /opt/data/lfy/update_hbase/update_hbase.log` -eq '1' ]]; +then + sdta=`date -d "1 days $sdt" +%Y-%m-%d` + edta=`date -d "1 days $edt" +%Y-%m-%d` + echo $sdta","$edta>/home/Wangkai/$2"_date.txt" + url="http://172.16.25.23:8090/surORvio/"$1"?start="$sdta"&end="$edta + curl -X GET $url + echo $url + #echo $status +else + echo "不存在" +fi diff --git "b/credit/\350\277\235\346\263\225\346\225\260\346\215\256\350\241\245\346\225\260\346\215\256\346\226\207\346\241\243.txt" "a/credit/\350\277\235\346\263\225\346\225\260\346\215\256\350\241\245\346\225\260\346\215\256\346\226\207\346\241\243.txt" new file mode 100644 index 0000000..edceeaf --- /dev/null +++ "a/credit/\350\277\235\346\263\225\346\225\260\346\215\256\350\241\245\346\225\260\346\215\256\346\226\207\346\241\243.txt" @@ -0,0 +1,30 @@ +违法表补数据的相关文档 +一、违法的表涉及到的是两张表,根据以往的程序代码整理出来的源端和目标端的血缘关系如下: +1、surveil +目标端es端:surveil索引 +源端oracle:zckj.vm_vio_surveil,zckj.vio_surveil,zckj.vm_vio_surveil_his,zckj.vm_vio_surveil_del +目前补数据对应的源端表有zckj.vm_vio_surveil,zckj.vio_surveil,zckj.vm_vio_surveil_his +2、violation +目标端es端:violation索引 +源端oracle:zckj.vm_vio_violation,zckj.vio_violation,zckj.vm_vio_violation_his,zckj.vm_vio_violation_del +目前补数据涉及到的表:zckj.vm_vio_violation,zckj.vio_violation,zckj.vm_vio_violation_his + +二、补数据涉及到的接口 +接口程序是部署在172.16.25.23物理机上 +程序所在的目录:/opt/data/lfy/update_hbase/update_hbase-0.0.1-SNAPSHOT.jar;其包含了斑马信用到hbase和到es常用的表同步的程序;违法数据补数据的接口是按照时间的范围批量补数据的,区间采用的是前闭后开的区间模式(即>=start&<end)由于跑的时间范围大的话会导致es集群崩溃挂掉,目前是一天一天的补数据 +1、surveil表的补数据的接口: curl -X GET "http://172.16.25.23:8090/surORvio/surveilDirectUpdateByDate?start=&end=" +2、violation表的补数据的接口:curl -X GET "http://172.16.25.23:8090/surORvio/violationDirectUpdateByDate?start=&end=" +三、数据是手动的同步,同步的频率是一天,为了提高效率,写了一个自动化的脚本,然后定时每五分钟跑一次 +自动化脚本目录 +调度脚本所在的目录:/home/Wangkai/schedure_all.sh +自动化脚本所在的目录:/home/Wangkai/schedure_2.sh + +四、es数据总量统计脚本 +按天统计脚本目录:/home/Wangkai/EsCountDay.py +执行脚本的命令:for i in `seq 0 30`;do dt=`date -d "$i days 2021-01-01" +%Y-%m-%d`;python EsCountDay.py $dt violation;done + +按月统计的脚本:/home/Wangkai/EsCountMonth.py +执行脚本的命令:for i in `seq 0 11`;do dt=`date -d "$i month 2021-01-01" +%Y-%m`;python EsCountMonth.py $dt violation;done + +按小时统计:/home/Wangkai/hour_python.py +执行脚本的命令:for i in `seq 0 23`;do dt=`date -d "$i hours 2021-04-01 00" +'%Y-%m-%d %H'`;python hour_python.py "${dt}";done