Commit 7b12d4fd authored by Евгений Третьяков's avatar Евгений Третьяков
Browse files

Add new file

parent e1d5786e
from elasticsearch import Elasticsearch
import os
import json
from elasticsearch.helpers import bulk
class BulkUploader:
def __init__(self, index_name, hosts=None, login=None, password=None, debug=True):
self.debug = debug
if hosts is None:
hosts = [{"host": "127.0.0.1", "port": 9200}]
if login and password:
self.es_connection = Elasticsearch(hosts=hosts, http_auth=(login, password))
else:
self.es_connection = Elasticsearch(hosts=hosts)
self.index_name = index_name
if self.index_name not in self.es_connection.indices.get_alias("*"):
response = self.es_connection.indices.create(index=self.index_name)
if self.debug:
print(response)
def generate_data(self, folder):
file_paths = [os.path.join(folder, file_name) for file_name in os.listdir(folder)
if file_name.split(".")[-1] == "json"]
for file_path in file_paths:
with open(file_path, "r", encoding="utf-8") as file:
source = json.loads(file.read())
record = {
"_index": self.index_name,
"_source": source
}
yield record
def upload(self, folder):
response = bulk(self.es_connection, self.generate_data(folder))
return response
if __name__ == '__main__':
uploader = BulkUploader("inspire-beta-records")
r = uploader.upload("/mnt/vdb/inspireBetaDownload/russian-affiliations-query/records_jsons")
print(r)
uploader = BulkUploader("inspire-beta-affiliations")
r = uploader.upload("/mnt/vdb/inspireBetaDownload/russian-affiliations-query/affiliations_jsons")
print(r)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment