Correlating my Foobot measurements and official PM 2.5 measurements
Since I've recently bought a Foobot device I was curious how the measured values from my apartment correlate with official values from the www.wien.at website.
The first figure shows the max and average values from www.wien.at, there is only one measurement per day. The red line are my actual measurements from the Foobot device, one measurement per hour. The green shape shows the difference between means and actual measurements. We can see that the measurements in the apartment are lower than the www.wien.at measurements.
However, there is a correlation between the two measurements, shown in the second figure.
However, there is a correlation between the two measurements, shown in the second figure.
Foobot measurements (Python code)
First I downloaded the measurements from the foobot server, which is possible since they provide an API for doing that.
import requests import json import sys import datetime #startdate="2016-02-07" #enddate="2016-02-15" #get foobot measurements between startdate and enddate startdate=sys.argv[1] enddate=sys.argv[2] #open output file myfile = open(startdate+'_'+enddate+'.txt', "w") #login myurl ='https://api.foobot.io/v2/user/YOURUSERNAME/login/' mypasswd='YOURPASSWORD' myparams = {'password': mypasswd} r = requests.post(myurl, json=myparams) output=r.json() mytoken=r.headers['x-auth-token'] #get devices mykey = 'YOURKEY' myparams = {'x-auth-token': mytoken} r = requests.get(myurl, headers=myparams) output=r.json() print output #get data interval sampling=3600 myurl ='https://api.foobot.io/v2/device/'+output[0]['uuid']+'/datapoint/'+startdate+'T00:00:00/'+enddate+'T24:00:00/'+str(sampling)+'/' print myurl myparams = {'x-auth-token': mytoken} r = requests.get(myurl, headers=myparams) output=r.json() print output #print output['sensors'] #print output['thresholds'] #save datapoints for x in output['datapoints']: for y in x: myfile.write(str(y)+" ") myfile.write('\n') myfile.close()
Wien.at measurements (Python code)
Then I've downloaded the webpages with measurements from the www.wien.at website.
wget --recursive --level=1 https://www.wien.gv.at/ma22-lgb/lufttb.htm
And grepped the relevant PM 2.5 values from the downloaded pages.
import sys import datetime import os #startdate="2016-02-07" #enddate="2016-02-15" #parse PM 2.5 values from downloaded wien.at sites startdate=sys.argv[1] enddate=sys.argv[2] dirname="www.wien.gv.at/ma22-lgb/tb/" d1l=startdate.split("-") d2l=enddate.split("-") d1 = datetime.date(int(d1l[0]),int(d1l[1]),int(d1l[2])) print d1 d2 = datetime.date(int(d2l[0]),int(d2l[1]),int(d2l[2])) print d2 #get difference between dates delta=d2-d1 #open output file myfile = open(startdate+'_'+enddate+'_wienat.txt', "w") #find file for each day and grep PM 2.5 value for j in range(0,delta.days+1): print d1 fn=dirname+"tb"+str(d1).replace("-","")+".htm" grepcmdmax='grep "WIEN - MAXIMUM" '+fn+' | cut -f6 -d"|" > max.txt' grepcmdmean='grep "WIEN - MITTEL" '+fn+' | cut -f6 -d"|" > mean.txt' os.system(grepcmdmax) os.system(grepcmdmean) maxfile = open('max.txt', 'r') meanfile = open('mean.txt', 'r') maxval=maxfile.read().strip().replace(" ","") meanval=meanfile.read().strip().replace(" ","") print maxval print meanval if j==delta.days: for i in range(0,23): myfile.write(maxval+" "+meanval+"\n") else: for i in range(0,24): myfile.write(maxval+" "+meanval+"\n") #increase date d1 += datetime.timedelta(days=1) myfile.close()
Analysing the data and plotting (Python code)
For data analysis I've checked the differences and correlations in the data.import sys import datetime import matplotlib.pyplot as plt import os import numpy as np from scipy.stats import pearsonr #startdate="2016-02-07" #enddate="2016-02-15" #python plot_results.py 2016-02-15_2016-03-23.txt 2016-02-15_2016-03-23_wienat.txt "1020 Vienna" #generate two plots with measurements and wien.at values, and correlation and differences foobotfile=sys.argv[1] wienatfile=sys.argv[2] #for filename and graphs location=sys.argv[3] #grep start and enddate from filename startdate=foobotfile.replace(".txt","").split("_")[0] enddate=foobotfile.replace(".txt","").split("_")[1] d1l=startdate.split("-") d2l=enddate.split("-") d1 = datetime.date(int(d1l[0]),int(d1l[1]),int(d1l[2])) print d1 d2 = datetime.date(int(d2l[0]),int(d2l[1]),int(d2l[2])) print d2 #difference between date delta=d2-d1 #generate label file labels=[] for j in range(0,delta.days+1): print d1 if j==delta.days: for i in range(0,23): #add short date day.month at first hour of day in labels if i==0: newd=d1 + datetime.timedelta(days=j) labels.append(str(newd.day)+"."+str(newd.month)) else: labels.append("") else: for i in range(0,24): #add short date day.month at first hour of day in labels if i==0: newd=d1 + datetime.timedelta(days=j) labels.append(str(newd.day)+"."+str(newd.month)) else: labels.append("") print labels print len(labels) #read wien.at file into data string with open(wienatfile) as f: data = f.read() #split data string at newline data = data.split('\n') #get first row of each line split by empty string wmax=[] for row in data: if len(row.split(' '))>0: wmax.append(row.split(' ')[0]) #get second row of each line split by empty string wmean=[] for row in data: if len(row.split(' '))>1: wmean.append(row.split(' ')[1]) #store loaded date into numpy array as float wmaxn=np.array(wmax).astype(np.float) wmeann=np.array(wmean).astype(np.float) #read foobot file into data string with open(foobotfile) as f: data = f.read() #split data string at newline data = data.split('\n') #get second row of each line split by empty string foobot=[] for row in data: if len(row.split(' '))>1: foobot.append(row.split(' ')[1]) #store loaded date into numpy array as float foobotn=np.array(foobot).astype(np.float) #create array for x axis xax=[] for i in range(0,len(wmaxn)): xax.append(i) #create figure fig = plt.figure() fig.suptitle("Analysis for "+location, fontsize=14, fontweight='bold') #plot wmaxn, wmeann, foobotn plt.subplot(2,1,1) plt.plot(xax,wmaxn,label="Max value from wien.at") plt.plot(wmeann,label="Average from wien.at") plt.plot(foobotn,label="Measurements from "+location) #fill space between meann and foobotn plt.fill_between(xax, wmeann, foobotn, where=wmaxn>=foobotn, interpolate=True, color='green') plt.ylabel('PM 2.5') plt.title('PM 2.5 Measurements') plt.legend(loc=2,prop={'size':10}) plt.xticks(xax, labels, rotation='vertical') plt.subplots_adjust(bottom=0.15) #compute correlations corr_maxmeasure=pearsonr(wmaxn, foobotn) corr_meanmeasure=pearsonr(wmeann, foobotn) print corr_maxmeasure print corr_meanmeasure #compute differences absolute and in percent maxdiffhr=np.sum(wmaxn-foobotn)/len(wmaxn) meandiffhr=np.sum(wmeann-foobotn)/len(wmaxn) maxdiffhrpc=100*(np.sum(wmaxn-foobotn)/np.sum(wmaxn)) meandiffhrpc=100*(np.sum(wmeann-foobotn)/np.sum(wmeann)) #add subplot 2 ax = fig.add_subplot(212) ax.set_title('Correlation') ax.set_xlabel('Average from wien.at') ax.set_ylabel("Measurements from "+location) txtstr="Maxcorr: "+str(round(corr_maxmeasure[0],3))+"\nMeancorr: "+str(round(corr_meanmeasure[0],3))+"\nMaxdiff/hour: "+str(round(maxdiffhr,3))+","+str(round(maxdiffhrpc,3))+"%\nMeandiff/hour: "+str(round(meandiffhr,3))+","+str(round(meandiffhrpc,3))+"%" ax.text(3, 18, txtstr, style='italic',bbox={'facecolor':'red', 'alpha':0.5, 'pad':10}) ax.plot(wmeann, foobotn) plt.show() #save figure fig.set_size_inches(10,10) fig.savefig(location.replace(" ","")+".pdf") fig.savefig(location.replace(" ","")+".png")