Commit c92d4eea authored by Мария Григорьева's avatar Мария Григорьева
Browse files

Replace affiliations.py

parent daef5e79
......@@ -6,7 +6,7 @@ import pprint
es = Elasticsearch([{'host': '127.0.0.1', 'port': 9200}])
INDEX_NAME = "russian_affiliations_with_coords"
START_DATE = "2019-05-01T00:00:00.000Z"
START_DATE = "2019-07-20T00:00:00.000Z"
END_DATE = "2019-08-01T00:00:00.000Z"
def _match_all_date_limit(start_date, end_date):
......@@ -121,6 +121,8 @@ def get_matrix(joint):
:return:
"""
unique = get_unique(joint)
print(unique)
print(len(unique))
column_names, row_names = unique, unique
matrix = np.zeros((len(unique), len(unique)))
......@@ -153,6 +155,7 @@ def get_connections_df(matrix_df):
df['level_0'] = df['level_0'].astype(np.int64)
df['level_1'] = df['level_1'].astype(np.int64)
df.rename(columns={0: "count"}, inplace=True)
df = df[df['count'] != 0.0]
return df
......@@ -177,6 +180,7 @@ def main():
query = _match_all_date_limit(START_DATE, END_DATE)
joint = get_joint_affiliations(es, INDEX_NAME, query)
print(joint)
matrix = get_matrix(joint)
update_matrix(matrix, joint)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment