Commit 79ff7d5d authored by Евгений Третьяков's avatar Евгений Третьяков
Browse files

Add new file

parent 82c1c9d5
import json
import os
import concurrent.futures
import requests
def load_affiliations(folder_path):
json_records = [os.path.join(folder_path, file_name) for file_name in os.listdir(folder_path)]
affiliations_urls = []
for json_file_path in json_records:
with open(json_file_path, "r", encoding="utf-8") as file:
json_record = json.loads(
metadata = json_record.get("metadata")
if metadata:
authors = metadata.get("authors")
if authors:
for author in authors:
affiliations = author.get("affiliations")
if affiliations:
for affiliation in affiliations:
affiliation_url = affiliation["record"]["$ref"]
if affiliation_url not in affiliations_urls:
except KeyError:
print(f"Authors is empty for {json_file_path}")
print(f"Metadata is empty for {json_file_path}")
return affiliations_urls
def download_json(json_url):
response = requests.get(json_url)
recid = json_url.split("/")[-1]
file_path = f"output/affiliations_jsons/{recid}.json"
with open(file_path, "w", encoding="utf-8") as file:
def download_jsons(folder_path):
affiliations_urls = load_affiliations(folder_path)
with concurrent.futures.ProcessPoolExecutor(max_workers=10) as executor:
for url in affiliations_urls:
executor.submit(download_json, url)
if __name__ == '__main__':
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment