import datetime as dt
from datetime import timedelta
import sys, os
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from viresclient import set_token
"..")
sys.path.append(
import utilities
from utilities.MagGeoFunctions import getGPSData
from utilities.MagGeoFunctions import Get_Swarm_residuals
from utilities.MagGeoFunctions import ST_IDW_Process
from utilities.MagGeoFunctions import CHAOS_ground_values
MagGeo - Sequential Mode
Authors | Fernando Benitez-Paez, Urška Demšar, Jed Long, Ciaran Beggan
Contact | Fernando.Benitez@st-andrews.ac.uk, ud2@st-andrews.ac.uk, jed.long@uwo.ca, ciar@bgs.ac.uk
Keywords | Bird migration, data fusion, Earth’s magnetic field, Swarm, GPS tracking
Overview
This Jupyter Notebook will guide you through the required steps to annotate your GPS tracking data with the earth’s magnetic field data from Swarm (European Space Agency). This version is called Sequential Mode, alternatively you can use Parallel Mode to take advantage of parallelized computing if required. More information about the Swarm satellites can be found in the Main Document on the MagGeo github repository. This script will use a sequential loop to run the annotation process for each GPS Point (row) from your data.
To execute the code, you can go through each cell (pressing Crtl+Enter
), you will also find inner comments ##
to describe each particular step. If you are not familiar with using Jupyter Notebooks, you might want to take some time to learn how first, for example take a look at the notebook-basics.ipynb
Notebook inside MagGeo.
Data requirements
🔎 Your trajectory must be in a csv format:
There are three columns that must be included in your GPS trajectory. Make sure your GPS trajectory includes Latitude , Longitude and timestamp. We suggest that the Timestamp column follow the day/month/year Hour:Minute (dd/mm/yyyy HH:MM:SS) format, Latitude and Longitude should be in decimal degrees (WGS84). Optionally an altitude column can be used providing altitude (the altitude must be in km). Other Columns will be ignored. Here it is an example of how your GPS track should look:
For this example we are reading the BirdGPSTrajectory.csv file. If you want to run the method using your own csv file, make sure you store your the file in the ./data
folder. For more information about the dataset we used in this example go to the Main Notebook.
Import the required python libraries
Add your VirES web client Token
The VirES client API, requires a token. Before start you need to get your own VirES token. You can visit https://vires.services/ to get yours, and then add it into the next cell.
"https://vires.services/ows", set_default=True) set_token(
Read the GPS track
The following steps will load the GPS track from a csv file, and set some requirements before downloading geomagnetic data from Swarm. If your csv track file doesnt not have any altitude attribute, MagGeo will use sea level as your altitude (i.e., 0 Km). Altitude column units must be Km
=os.path.dirname(os.getcwd())
base_dir= os.path.join(base_dir, "temp_data")
temp_results_dir = os.path.join(base_dir, "results")
results_dir = os.path.join(base_dir, "data")
data_dir = os.path.join(base_dir, "utilities") utilities_dir
# Make sure the csv file of your trackectory is stored in the Data folder.
# Enter the name of your GPS track csv file including the extension .csv and press Enter (e.g. BirdGPSTrajectory.csv)
# Make sure you have a columnn that integrates date and time, before include in MagGeo.
# If your csv track file does not have any altitude attribute, MagGeo will use sea level as your altitude (i.e. 0 Km).
# i.e height (Only in KM)
= "BirdGPSTrajectoryTest.csv"
gpsfilename="location-lat"
Lat="location-long"
Long="timestamp"
DateTime= "height" altitude
# Here MagGeo is reading your CSV file, taking the Lat, Long, Date&Time and Altitutes attributes and compute, some aditional attrubutes we need to the annotation process.
# Setting the date and time attributes for the required format and computing the epoch column. Values like Maximum and Minimun Date and time are also calculated.
= getGPSData(data_dir,gpsfilename,Lat,Long,DateTime,altitude)
GPSData GPSData
Validate the correct amount of Swarm measures
The following loop is identifiying the time and validating if the time is less than 4:00 hours and more than 20:00 hours to bring one extra day of data. The result of this validation is written in an empty python list which will be later validated to get the unique dates. This avoids duplicate downloading of data for the same day and reduces overall computational time.
= []
datestimeslist for index, row in GPSData.iterrows():
= row['gpsDateTime']
datetimerow = row['dates']
daterow = row['times']
hourrow = hourrow.strftime('%H:%M:%S')
hourrow if hourrow < '04:00:00':
= daterow - (timedelta(days=1))
date_bfr
datestimeslist.append(daterow)
datestimeslist.append(date_bfr)if hourrow > '20:00:00':
= daterow + (timedelta(days=1))
Date_aft
datestimeslist.append(daterow)
datestimeslist.append(Date_aft) else:
datestimeslist.append(daterow)
Getting a list of unique dates to download the Swarm Data
def uniquelistdates(list):
= np.array(list)
x = np.unique(x)
uniquelist return uniquelist
= uniquelistdates(datestimeslist)
uniquelist_dates uniquelist_dates
Download Swarm residuals data
Once the date and time columns have been defined and the unique dates are identified the script can start the download process. Usually the data from Swarm is requested using only one satellite, however MagGeo will use the magnetic measures from the three satellite of the Swarm Mission (Alpha, Bravo, Charlie). Be aware satellite Charlie, got its AMS broken earlier in the mission, although the initial dates still have valid data MagGeo can use.
Set a connection to the VirES client
and using the function Get_Swarm_residuals
we will get the swarm residuals for the dates included in the previous list.
%%time
= 24 #MagGeo needs the entire Swarm data for each day of the identified day.
hours_t_day = dt.timedelta(hours = hours_t_day)
hours_added
= []
listdfa = []
listdfb = []
listdfc
for d in tqdm(uniquelist_dates, desc="Getting Swarm Data"):
#print("Getting Swarm data for date:",d )
= dt.datetime.combine(d, dt.datetime.min.time())
startdate = startdate + hours_added
enddate = Get_Swarm_residuals(startdate, enddate)
SwarmResidualsA,SwarmResidualsB,SwarmResidualsC
listdfa.append(SwarmResidualsA)
listdfb.append(SwarmResidualsB) listdfc.append(SwarmResidualsC)
Concat the previous results and temporally save the requested data locally: Integrate the previous list for all dates, into pandas dataframes. We will temporally saved the previous results, in case you need to re-run MagGeo, with the following csv files you will not need to run the download process.
%%time
= pd.concat(listdfa, join='outer', axis=0)
PdSwarmRes_A 'TotalSwarmRes_A.csv'), header=True)
PdSwarmRes_A.to_csv (os.path.join(temp_results_dir,= pd.concat(listdfb, join='outer', axis=0)
PdSwarmRes_B 'TotalSwarmRes_B.csv'), header=True)
PdSwarmRes_B.to_csv (os.path.join(temp_results_dir,= pd.concat(listdfc, join='outer', axis=0)
PdSwarmRes_C 'TotalSwarmRes_C.csv'), header=True)
PdSwarmRes_C.to_csv (os.path.join(temp_results_dir,
= pd.read_csv(os.path.join(temp_results_dir,"TotalSwarmRes_A.csv"),low_memory=False, index_col='epoch')
TotalSwarmRes_A 'timestamp'] = pd.to_datetime(TotalSwarmRes_A['timestamp'])
TotalSwarmRes_A[= pd.read_csv(os.path.join(temp_results_dir,"TotalSwarmRes_B.csv"),low_memory=False, index_col='epoch')
TotalSwarmRes_B 'timestamp'] = pd.to_datetime(TotalSwarmRes_B['timestamp'])
TotalSwarmRes_B[= pd.read_csv(os.path.join(temp_results_dir,"TotalSwarmRes_C.csv"),low_memory=False, index_col='epoch')
TotalSwarmRes_C 'timestamp'] = pd.to_datetime(TotalSwarmRes_C['timestamp'])
TotalSwarmRes_C[
10) #If you need to take a look of the Swarm Data, you can print TotalSwarmRes_B, or TotalSwarmRes_C TotalSwarmRes_A.head(
Spatio-Temporal filter and interpolation process (ST-IDW)
Once we have requested the swarm data, now we need to filter
in space and time the available points to compute the magnetic values (NEC frame) for each GPS point based on its particular date and time. The function ST_IDW_Process
takes the GPS track and the downloaded data from swarm to filter in space and time based on the criteria defined in our method. With the swarm data filtered we interpolate (IDW) the NEC components for each GPS data point.
%%time
#Sequential mode, applying a traditional loop using iterrows.
if __name__ == '__main__':
= [] ## List used to add all the GPS points with the annotated MAG Data. See the last bullet point of this process
dn for index, row in tqdm(GPSData.iterrows(), total=GPSData.shape[0], desc="Annotating the GPS Trayectory"):
= row['gpsLat']
GPSLat = row['gpsLong']
GPSLong = row['gpsDateTime']
GPSDateTime = row['epoch']
GPSTime = row['gpsAltitude']
GPSAltitude #print("Process for:", index,"DateTime:",GPSDateTime)
try:
=ST_IDW_Process(GPSLat,GPSLong,GPSAltitude, GPSDateTime,GPSTime, TotalSwarmRes_A, TotalSwarmRes_B, TotalSwarmRes_C)
result
dn.append(result)except:
#print("Ups!.That was a bad Swarm Point, let's keep working with the next point")
= {'Latitude': GPSLat, 'Longitude': GPSLong, 'Altitude':GPSAltitude, 'DateTime': GPSDateTime, 'N_res': np.nan, 'E_res': np.nan, 'C_res':np.nan, 'TotalPoints':0, 'Minimum_Distance':np.nan, 'Average_Distance':np.nan}
result_badPoint
dn.append(result_badPoint)continue
Temporally save the ST-IDW result locally. Still MagGeo needs to run the calculation of geomagnetic components, brining the magnetic values at the altitude provided for your GPS track.
= pd.DataFrame(dn)
GPS_ResInt "GPS_ResInt.csv"), header=True)
GPS_ResInt.to_csv (os.path.join(temp_results_dir, GPS_ResInt
Compute the magnetic components at the trajectory altitute using CHAOS model
The function CHAOS_ground_values
is used to run the calculation of magnetic components. This adjustment requeries the magnetic components at the trajectory altitude (or at the ground level) using CHAOS (theta, phi, radial). This process also further conducts the rotation and transformation between a geocentric earth-based reference system (CHAOS) and geodetic earth-based reference system (GPS track). Once the corrected values are calculated the non-necesary columns are removed. For more information about this process go to the Main Notebook.
%%time
=CHAOS_ground_values(utilities_dir,GPS_ResInt)
X_obs, Y_obs, Z_obs, X_obs_internal, Y_obs_internal, Z_obs_internal 'N'] =pd.Series(X_obs)
GPS_ResInt['E'] =pd.Series(Y_obs)
GPS_ResInt['C'] =pd.Series(Z_obs)
GPS_ResInt['N_Obs'] =pd.Series(X_obs_internal)
GPS_ResInt['E_Obs'] =pd.Series(Y_obs_internal)
GPS_ResInt['C_Obs'] =pd.Series(Z_obs_internal)
GPS_ResInt[
=['N_res', 'E_res','C_res'], inplace=True)
GPS_ResInt.drop(columns GPS_ResInt
The final result
With the NEC components for each GPS Track point, it is possible to compute the aditional magnetic components. For more information about the magnetic components and their relevance go to the main paper or notebook.
<strong>📘 The annotated dataframe will include the following attributes:</strong> If you need more information about how the geomagnetic components are described go to the main MagGeo Notebook (Add Link).
<ul>
<li><strong>Latitude</strong> from the GPS Track.</li>
<li><strong>Longitude</strong> from the GPS Track.</li>
<li><strong>Timestamp</strong> from the GPS Track.</li>
<li><strong>Magnetic Field Intensity</strong> mapped as Fgps in nanoTeslas (nT).</li>
<li><strong>N (Northwards) component</strong> mapped as N in nanoTeslas (nT).</li>
<li><strong>E (Eastwards) component</strong> mapped as E. in nanoteslas (nT).</li>
<li><strong>C (Downwards or Center)</strong> component mapped as C in nanoTeslas (nT).</li>
<li><strong>Horizontal component</strong> mapped as H in nanoTeslas (nT).</li>
<li><strong>Magnetic Inclination </strong> mapped as I in degrees.</li>
<li><strong>Magnetic Declination or dip angle</strong> mapped as D in degrees</li>
<li><strong>Kp Index</strong> mapped as kp</li>
<li><strong>Total Points</strong> as the amount of Swarm points included in the ST-IDW process from the three satellites.</li>
<li><strong>Minimum Distance</strong> mapped as MinDist, representing the minimum distance from a Swarm points and each GPS point location.</li>
<li><strong>Average Distance</strong> mapped as AvDist, representing the average distance between the Swarm points and the GPS point location.</li>
</ul>
%%time
# Having Intepolated and weighted the magnetic values, we can compute the other magnectic components.
'H'] = np.sqrt((GPS_ResInt['N']**2)+(GPS_ResInt['E']**2))
GPS_ResInt[#check the arcgtan in python., From arctan2 is saver.
= np.arctan2(GPS_ResInt['E'],GPS_ResInt['N'])
DgpsRad 'D'] = np.degrees(DgpsRad)
GPS_ResInt[= np.arctan2(GPS_ResInt['C'],GPS_ResInt['H'])
IgpsRad 'I'] = np.degrees(IgpsRad)
GPS_ResInt['F'] = np.sqrt((GPS_ResInt['N']**2)+(GPS_ResInt['E']**2)+(GPS_ResInt['C']**2))
GPS_ResInt[ GPS_ResInt
The previous dataframe (GPS_ResInt), MagGeo has computed the geomagnetic components for each locations and time of your CSV trajectory. Now we will finish up combining the original atributes from your CSV with the annotated results from MagGeo.
%%time
=pd.read_csv(os.path.join(data_dir,gpsfilename))
originalGPSTrack= pd.concat([originalGPSTrack, GPS_ResInt], axis=1)
MagGeoResult #Drop duplicated columns. Latitude, Longitued, and DateTime will not be part of the final result.
=['Latitude', 'Longitude', 'DateTime'], inplace=True)
MagGeoResult.drop(columns MagGeoResult
Export the final results to a CSV file
%%time
#Exporting the CSV file
="GeoMagResult_"+gpsfilename
outputfile = MagGeoResult.to_csv (os.path.join(results_dir,outputfile), index = None, header=True) export_csv
Validate the results ( Optional)
To validate the results we plot the Fgps
column.
## Creating a copy of the results and setting the Datetime Column as dataframe index.
= GPS_ResInt.copy()
ValidateDF "DateTime", inplace=True)
ValidateDF.set_index(## Plotting the F column.
= ValidateDF.hist(column='F')
hist 'F distribution')
plt.title('F in nT')
plt.xlabel('# of measurements') plt.ylabel(
Map the GPS Track using the annotated Magnetic Values (Optional)
Now we are going to plot the annotated GPS track stored into the MagDataFinal dataframe to see the different magnetic components in a map to have a better prespective of the impact of the earth magnetic field.
="scatter", x="Latitude", y="Longitude",
ValidateDF.plot(kind="Magnetic Intensity in nT",
label="F", cmap=plt.get_cmap("gist_rainbow"),
c=True, alpha=0.4, figsize=(10,7),
colorbar=False #This is only needed to get the x-axis label working due to a current bug in pandas plot.
sharex
)
"Longitude", fontsize=12)
plt.ylabel("Latitude", fontsize=12)
plt.xlabel(=12)
plt.legend(fontsize plt.show()
import geopandas
import geoplot
import hvplot.pandas
= geopandas.GeoDataFrame(ValidateDF, geometry=geopandas.points_from_xy(ValidateDF.Longitude, ValidateDF.Latitude))
gdf gdf.head()
=f'Annotated trajectory using MagGeo - F GeoMag Intensity',
gdf.hvplot(title=True,
geo='F',
c='CartoLight',
tiles=700,
frame_width=500) frame_height
=f'Annotated trajectory using MagGeo - I Inclination',
gdf.hvplot(title=True,
geo='CartoLight',
tiles='I',
c='Viridis',
cmap=700,
frame_width=500) frame_height
= geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
world
= world.plot(color='white', edgecolor='gray', figsize = (18,8))
ax
= gdf.total_bounds
minx, miny, maxx, maxy
ax.set_xlim(minx, maxx)
ax.set_ylim(miny, maxy)
=ax, column='F', legend=True,
gdf.plot(ax={'label': "Magnetic Intensity in nT",
legend_kwds'orientation': "horizontal"})
"Longitude", fontsize=12)
plt.ylabel("Latitude", fontsize=12)
plt.xlabel(
plt.show()
= plt.subplots(ncols=2, figsize = (18,8))
fig, (ax1, ax2)
= world.plot(ax=ax1, color='white', edgecolor='black')
ax1 = ([gdf.total_bounds[0], gdf.total_bounds[2]])
xlim = ([gdf.total_bounds[1], gdf.total_bounds[3]])
ylim
ax1.set_xlim(xlim)
ax1.set_ylim(ylim)
=ax1, column='F', legend=True,
gdf.plot(ax={'label': "Magnetic Intensity in nT",
legend_kwds'orientation': "horizontal"})
"Longitude", fontsize=9)
plt.ylabel("Latitude", fontsize=9)
plt.xlabel('Magnetic Intensity - F')
ax1.set_title('Latitude')
ax1.set_xlabel('Longitude')
ax1.set_ylabel(
= world.plot( ax=ax2, color='white', edgecolor='black')
ax2 = ([gdf.total_bounds[0], gdf.total_bounds[2]])
xlim = ([gdf.total_bounds[1], gdf.total_bounds[3]])
ylim
ax2.set_xlim(xlim)
ax2.set_ylim(ylim)
# We can now plot our ``GeoDataFrame``.
=ax2, column='I', legend=True, cmap='Spectral',
gdf.plot(ax={'label': " Inclination in Degrees",
legend_kwds'orientation': "horizontal"})
'Inclination - I')
ax2.set_title('Latitude')
ax2.set_xlabel('Longitude') ax2.set_ylabel(