We will first make the function to compute the great circle distance
from math import sin,cos,atan2,sqrt,radians
def getDistance(phi_1,lambda_1,phi_2=33.6547,lambda_2=73.2500):
phi_1,phi_2=radians(phi_1),radians(phi_2)
lambda_1,lambda_2=radians(lambda_1),radians(lambda_2)
d_phi=phi_2-phi_1
d_lambda=lambda_2-lambda_1
a=sin(d_phi/2.0)**2+cos(phi_1)*cos(phi_2)*sin(d_lambda/2.0)**2
b=2*atan2(sqrt(a),sqrt(1-a))
R=6371
d=R*b
return d
We will now import our data
import csv
fname = 'countries.csv'
with open(fname, 'rb') as f:
reader = csv.reader(f)
data=[]
for row in reader:
data.append(row[:2]+[float(v) for v in row[2:]])
Create our matrix of pairwise distances and a list of distances for computing the histogram.
import numpy as np
Ncities = len(data)
distances = np.zeros((Ncities,Ncities))
flat_distances = []
for i in range(Ncities):
distances[i,i] = np.nan
for j in range(i+1,Ncities):
distances[i,j] = getDistance(data[i][2],data[i][3],data[j][2],data[j][3])
distances[j,i] = distances[i,j]
flat_distances.append(distances[i,j])
print distances
import matplotlib.pyplot as plt
plt.imshow(distances)
The histogram is calculated below. Can you tell anything from the shape of this histogram about your data?
plt.hist(flat_distances,bins = 20); plt.xlabel('Distances'); plt.ylabel('Count'); plt.grid()
We will now compute the indices of the closest and farthest capital from each given capital.
ci = np.nanargmin(distances,axis=0)
fi = np.nanargmax(distances,axis=0)
We will now create a list of lists in which each row is: (country, capital city, closest capital, farthest capital), print it and save it to a csv file.
cf = [data[i][:2]+['-'.join(data[c][:2]),'-'.join(data[f][:2])] for i,f,c in zip(range(Ncities),fi,ci)] #There are easier ways
for v in cf:
print ' : '.join(v)
ofname = 'myoutput.csv'
with open(ofname, 'wb') as f:
writer = csv.writer(f)
for row in cf:
writer.writerow(row)
Let's see what capitals are the closest and farthest from Islamabad.
countries = [d[0].lower() for d in data]
pak_idx = countries.index('pakistan')
cf[pak_idx]
(f1,f2) = np.unravel_index(np.nanargmax(distances),distances.shape)
'-'.join(data[f1][:2]),'-'.join(data[f2][:2])
(c1,c2) = np.unravel_index(np.nanargmin(distances),distances.shape)
'-'.join(data[c1][:2]),'-'.join(data[c2][:2])
'-'.join(data[np.argmax(np.nansum(distances,axis=0))][:2]) # Capital with the maximum (avg/total) distance
'-'.join(data[np.argmax(np.nanmin(distances,axis=0))][:2]) # Alternate interpretation -- the capital closest city from which is farthest
from collections import Counter
count_fi = Counter(fi)
idx = count_fi.keys()[np.argmax(count_fi.values())]
'-'.join(data[idx][:2]) # Alternate interpretation
'-'.join(data[np.argmin(np.nansum(distances,axis=0))][:2]) # Capital with the minimum (avg/total) distance
'-'.join(data[np.argmin(np.nanmax(distances,axis=0))][:2]) # Alternate interpretation -- the capital farthest city from which is closest
count_ci = Counter(ci) # Alternate interpretation - most frequent closest
idx = count_ci.keys()[np.argmax(count_ci.values())]
'-'.join(data[idx][:2])
'-'.join(data[np.argmin(np.nanstd(distances,axis=0))][:2])
Hope you enjoyed the assignment!!
(C) Python-PIEAS (2015) by Dr. Fayyaz Minhas, DCIS, PIEAS, Islamabad, Pakistan.