Commit 5064adf2 authored by Евгений Третьяков's avatar Евгений Третьяков
Browse files

Add new file

parent d4bf6b02
import json
import os
from elasticsearch import Elasticsearch
def get_grants(file_path):
with open(file_path, "r", encoding="utf-8") as file:
data = {int(line.split()[0]): [number for number in line.split()[1].split(",") if number]
for line in"\n") if line}
return data
def get_addresses(affiliation_id):
affiliation_path = f"/mnt/vdb/inspireBetaDownload/russian-affiliations-query/" \
with open(affiliation_path, "r", encoding="utf-8") as file:
affiliation = json.loads(
return affiliation["metadata"]["addresses"]
def get_record(recid):
record_path = f"/mnt/vdb/inspireBetaDownload/russian-affiliations-query/records_jsons/{recid}.json"
with open(record_path, "r", encoding="utf-8") as file:
record = json.loads(
for author in record["metadata"]["authors"]:
for r in author["affiliations"]:
affiliation_id = int(r["record"].get("$ref").split("/")[-1])
addresses = get_addresses(affiliation_id)
for address in addresses:
location = {
"lon": address["longitude"],
"lat": address["latitude"]
address["location"] = location
r["record"]["addresses"] = addresses
except ValueError:
except KeyError:
return record
def load_data(folder):
recids = [int(file_name.split(".")[0]) for file_name in os.listdir(folder)]
es = Elasticsearch()
mapping = {
"mappings": {
"properties": {
"metadata.authors.affiliations.record.addresses.location": {
"type": "geo_point"
es.indices.create(index="russian_affiliations_with_coords", body=mapping)
grants = get_grants("source/grants_numbers/grants_list.txt")
for i, recid in enumerate(recids, 1):
record = get_record(recid)
record["grants"] = grants.get(recid, [])
file_path = f"/mnt/vdb/inspireBetaDownload/russian-affiliations-query/records_with_coordinates/{recid}.json"
with open(file_path, "w", encoding="utf-8") as file:
print(f"Uploading {i} of {len(recids)}")
result = es.index(index="russian_affiliations_with_coords", body=record)
if __name__ == '__main__':
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment