Commit 09889555 authored by ETretyakov's avatar ETretyakov
Browse files

GeoData <- (Publications total, country_code to fullname)

parent 3c9893c6
from dataclasses import dataclass
from json import dumps
from json import dumps, loads
from elasticsearch import Elasticsearch
from datetime import datetime
from collections import defaultdict
......@@ -33,6 +33,9 @@ from collections import defaultdict
# ]
# }
with open('source/country-alpha-2.json', 'r', encoding="utf-8") as f:
COUNTRIES_ALPHA_2 = loads(f.read())
class Loader:
def __init__(self, hosts=None, login=None, password=None, index_name="russian_affiliations"):
......@@ -96,8 +99,13 @@ class Affiliation:
id: int
name: str
location: list
country_code: str
country_name: str = None
geojson_id: int = None
def __post_init__(self):
self.country_name = COUNTRIES_ALPHA_2[self.country_code]
def __eq__(self, other):
if isinstance(other, Affiliation):
return self.__dict__ == other.__dict__
......@@ -135,15 +143,22 @@ class Converter:
for r in author["affiliations"]:
affiliation_id = int(r["record"].get("$ref").split("/")[-1])
affiliation_name = r["value"]
country_code = ""
location = []
for address in r["record"].get("addresses", []):
location.append(address["location"].get("lon"))
location.append(address["location"].get("lat"))
location.append(0)
break
if affiliation_name == "CERN":
location = [6.051404, 46.234169, 0]
country_code = "CH"
else:
for address in r["record"].get("addresses", []):
location.append(address["location"].get("lon"))
location.append(address["location"].get("lat"))
location.append(0)
country_code = address["country_code"]
break
if location:
affiliation = Affiliation(affiliation_id, affiliation_name, location)
affiliation = Affiliation(affiliation_id, affiliation_name, location, country_code)
registered, self.counter = affiliation.register(self.counter, self.affiliations_integrity, self.affiliations)
if registered:
affiliations.append(affiliation)
......@@ -169,8 +184,11 @@ class Converter:
"type": "Feature",
"properties": {
"name": "",
"country_code": "",
"country": "",
"flows": {},
"centroid": []
"centroid": [],
"publications_total": 0
},
"geometry": {
"type": "Point",
......@@ -185,7 +203,9 @@ class Converter:
feature["properties"]["flows"] = flows
feature["properties"]["centroid"] = affiliation.location
feature["properties"]["country_code"] = affiliation.country_code
feature["geometry"]["coordinates"] = affiliation.location[:2]
feature["properties"]["country"] = affiliation.country_name
return feature
......@@ -198,15 +218,37 @@ class Converter:
yield self.assemble_feature(affiliation)
class AffiliationCounter:
def __init__(self):
self.counter = defaultdict(int)
def count(self, features):
for affiliation in features:
flows = affiliation["properties"]["flows"]
for flow in flows:
self.counter[int(flow)] += int(flows[flow])
def save_counts(self, features):
self.count(features)
for i, affiliation in enumerate(features):
affiliation["properties"]["publications_total"] = self.counter[i]
return features
def start():
loader = Loader()
data = loader.get_records()
converter = Converter()
counter = AffiliationCounter()
features = counter.save_counts([i for i in converter.convert(data)])
geojson = {
"type": "FeatureCollection",
"features": [i for i in converter.convert(data)]
"features": features
}
with open("geo.json", "w", encoding="utf-8") as file:
......
This diff is collapsed.
{
"AF": "Afghanistan",
"AX": "Aland Islands",
"AL": "Albania",
"DZ": "Algeria",
"AS": "American Samoa",
"AD": "Andorra",
"AO": "Angola",
"AI": "Anguilla",
"AQ": "Antarctica",
"AG": "Antigua And Barbuda",
"AR": "Argentina",
"AM": "Armenia",
"AW": "Aruba",
"AU": "Australia",
"AT": "Austria",
"AZ": "Azerbaijan",
"BS": "Bahamas",
"BH": "Bahrain",
"BD": "Bangladesh",
"BB": "Barbados",
"BY": "Belarus",
"BE": "Belgium",
"BZ": "Belize",
"BJ": "Benin",
"BM": "Bermuda",
"BT": "Bhutan",
"BO": "Bolivia",
"BA": "Bosnia And Herzegovina",
"BW": "Botswana",
"BV": "Bouvet Island",
"BR": "Brazil",
"IO": "British Indian Ocean Territory",
"BN": "Brunei Darussalam",
"BG": "Bulgaria",
"BF": "Burkina Faso",
"BI": "Burundi",
"KH": "Cambodia",
"CM": "Cameroon",
"CA": "Canada",
"CV": "Cape Verde",
"KY": "Cayman Islands",
"CF": "Central African Republic",
"TD": "Chad",
"CL": "Chile",
"CN": "China",
"CX": "Christmas Island",
"CC": "Cocos (Keeling) Islands",
"CO": "Colombia",
"KM": "Comoros",
"CG": "Congo",
"CD": "Congo, Democratic Republic",
"CK": "Cook Islands",
"CR": "Costa Rica",
"CI": "Cote D\"Ivoire",
"HR": "Croatia",
"CU": "Cuba",
"CY": "Cyprus",
"CZ": "Czech Republic",
"DK": "Denmark",
"DJ": "Djibouti",
"DM": "Dominica",
"DO": "Dominican Republic",
"EC": "Ecuador",
"EG": "Egypt",
"SV": "El Salvador",
"GQ": "Equatorial Guinea",
"ER": "Eritrea",
"EE": "Estonia",
"ET": "Ethiopia",
"FK": "Falkland Islands (Malvinas)",
"FO": "Faroe Islands",
"FJ": "Fiji",
"FI": "Finland",
"FR": "France",
"GF": "French Guiana",
"PF": "French Polynesia",
"TF": "French Southern Territories",
"GA": "Gabon",
"GM": "Gambia",
"GE": "Georgia",
"DE": "Germany",
"GH": "Ghana",
"GI": "Gibraltar",
"GR": "Greece",
"GL": "Greenland",
"GD": "Grenada",
"GP": "Guadeloupe",
"GU": "Guam",
"GT": "Guatemala",
"GG": "Guernsey",
"GN": "Guinea",
"GW": "Guinea-Bissau",
"GY": "Guyana",
"HT": "Haiti",
"HM": "Heard Island & Mcdonald Islands",
"VA": "Holy See (Vatican City State)",
"HN": "Honduras",
"HK": "Hong Kong",
"HU": "Hungary",
"IS": "Iceland",
"IN": "India",
"ID": "Indonesia",
"IR": "Iran, Islamic Republic Of",
"IQ": "Iraq",
"IE": "Ireland",
"IM": "Isle Of Man",
"IL": "Israel",
"IT": "Italy",
"JM": "Jamaica",
"JP": "Japan",
"JE": "Jersey",
"JO": "Jordan",
"KZ": "Kazakhstan",
"KE": "Kenya",
"KI": "Kiribati",
"KR": "Korea",
"KW": "Kuwait",
"KG": "Kyrgyzstan",
"LA": "Lao People\"s Democratic Republic",
"LV": "Latvia",
"LB": "Lebanon",
"LS": "Lesotho",
"LR": "Liberia",
"LY": "Libyan Arab Jamahiriya",
"LI": "Liechtenstein",
"LT": "Lithuania",
"LU": "Luxembourg",
"MO": "Macao",
"MK": "Macedonia",
"MG": "Madagascar",
"MW": "Malawi",
"MY": "Malaysia",
"MV": "Maldives",
"ML": "Mali",
"MT": "Malta",
"MH": "Marshall Islands",
"MQ": "Martinique",
"MR": "Mauritania",
"MU": "Mauritius",
"YT": "Mayotte",
"MX": "Mexico",
"FM": "Micronesia, Federated States Of",
"MD": "Moldova",
"MC": "Monaco",
"MN": "Mongolia",
"ME": "Montenegro",
"MS": "Montserrat",
"MA": "Morocco",
"MZ": "Mozambique",
"MM": "Myanmar",
"NA": "Namibia",
"NR": "Nauru",
"NP": "Nepal",
"NL": "Netherlands",
"AN": "Netherlands Antilles",
"NC": "New Caledonia",
"NZ": "New Zealand",
"NI": "Nicaragua",
"NE": "Niger",
"NG": "Nigeria",
"NU": "Niue",
"NF": "Norfolk Island",
"MP": "Northern Mariana Islands",
"NO": "Norway",
"OM": "Oman",
"PK": "Pakistan",
"PW": "Palau",
"PS": "Palestinian Territory, Occupied",
"PA": "Panama",
"PG": "Papua New Guinea",
"PY": "Paraguay",
"PE": "Peru",
"PH": "Philippines",
"PN": "Pitcairn",
"PL": "Poland",
"PT": "Portugal",
"PR": "Puerto Rico",
"QA": "Qatar",
"RE": "Reunion",
"RO": "Romania",
"RU": "Russian Federation",
"RW": "Rwanda",
"BL": "Saint Barthelemy",
"SH": "Saint Helena",
"KN": "Saint Kitts And Nevis",
"LC": "Saint Lucia",
"MF": "Saint Martin",
"PM": "Saint Pierre And Miquelon",
"VC": "Saint Vincent And Grenadines",
"WS": "Samoa",
"SM": "San Marino",
"ST": "Sao Tome And Principe",
"SA": "Saudi Arabia",
"SN": "Senegal",
"RS": "Serbia",
"SC": "Seychelles",
"SL": "Sierra Leone",
"SG": "Singapore",
"SK": "Slovakia",
"SI": "Slovenia",
"SB": "Solomon Islands",
"SO": "Somalia",
"ZA": "South Africa",
"GS": "South Georgia And Sandwich Isl.",
"ES": "Spain",
"LK": "Sri Lanka",
"SD": "Sudan",
"SR": "Suriname",
"SJ": "Svalbard And Jan Mayen",
"SZ": "Swaziland",
"SE": "Sweden",
"CH": "Switzerland",
"SY": "Syrian Arab Republic",
"TW": "Taiwan",
"TJ": "Tajikistan",
"TZ": "Tanzania",
"TH": "Thailand",
"TL": "Timor-Leste",
"TG": "Togo",
"TK": "Tokelau",
"TO": "Tonga",
"TT": "Trinidad And Tobago",
"TN": "Tunisia",
"TR": "Turkey",
"TM": "Turkmenistan",
"TC": "Turks And Caicos Islands",
"TV": "Tuvalu",
"UG": "Uganda",
"UA": "Ukraine",
"AE": "United Arab Emirates",
"GB": "United Kingdom",
"US": "United States",
"UM": "United States Outlying Islands",
"UY": "Uruguay",
"UZ": "Uzbekistan",
"VU": "Vanuatu",
"VE": "Venezuela",
"VN": "Vietnam",
"VG": "Virgin Islands, British",
"VI": "Virgin Islands, U.S.",
"WF": "Wallis And Futuna",
"EH": "Western Sahara",
"YE": "Yemen",
"ZM": "Zambia",
"ZW": "Zimbabwe"
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment