require(sf)
require(mapview)
require(tidyverse)
require(tidyjson)
require(jsonlite)
require(httr)
require(rmarkdown)
require(plotly)
WySEaSON - SensorThings API Example
Exploring WySEaSON Data: A Guide to Fetching, Analyzing, and Visualizing Environmental Measurements from the SensorThings API
We are presenting methods to help facilitate data sharing and external use of data stored within our SensorThings database, which stores information, often in real-time, from sensors across Wyoming. This work and data were funded by NSF EPSCoR grant OIA 2149105 and the WyACT Project. This vignette hopefully provides you with some examples of how one might discover and interact with available data. For this example we will be using Program R.
Load R Packages
Obviously, please install any of the necessary packages if missing:
Useful Functions for Later Use
Given that data are returned from the SensorThings API (STA from this point forward) as JSON objects, we have found creating a few helper functions necessary.
# Safe fetch function to handle errors
<- function(url) {
safe_fetch tryCatch({
fromJSON(url)
error = function(e) {
}, message("⚠️ Failed to fetch: ", url)
return(NULL)
})
}
# useful for obtaining data from iot links
<- function(urls) {
fetch_and_parse_json # Safely fetch URLs to handle potential errors
<- safely(function(url) {
safe_fetch GET(url) %>% content(as = "text", encoding = "UTF-8")
})
# Fetch all URLs in parallel and convert to data frame
%>%
urls map(safe_fetch) %>%
map(~.x$result) %>%
map_df(fromJSON, .id = "url_index")
}
Accessing WySEaSON Projects
We have grouped data into different hierarchies within STA. We do this so as to have some control over access when necessary. As of publication of this document, all projects are currently public and do not require additional authentication. However, for brevity, we will only focus on data within WySEaSON. There is additional metadata within the properties, so we are choosing to expand all properties using the flatten
argument into additional columns. There are quite a few ‘wide’ columns, so use the arrow button in the top-right of the table to scroll through them.
# Fetch project data and filter for wyseason
<- fromJSON("https://wyseason.uwyo.edu/sta/v1.1/Projects",
wyseason_projects flatten = TRUE)$value %>%
filter(str_detect(name, "wyseason"))
paged_table(wyseason_projects)
Find What Data are Available
Given we know which project we are interested in exploring, let’s see what types of ‘Things’ or sensor frameworks are available. For brevity and simplicity in this example, we will drop the additional ‘properties’ column. You will notice we now have several hyperlinks to dive deeper into available data (please see the WySEaSON STA Documentation for more details).
<- fromJSON(wyseason_projects$`Things@iot.navigationLink`)$value %>%
thingsselect(-c(`@iot.selfLink`, properties))
paged_table(things)
As you can see, there are quite of a few different types of sensors or sensor groups. For the remainder of this example we are going to focus on Lake Buoy data. So let’s reconfigure our STA request to just locate buoy datastreams.
<- fromJSON(wyseason_projects$`Things@iot.navigationLink`)$value %>%
buoys filter(str_detect(description, "buoy")) %>%
rename(thing_id = `@iot.id`) %>%
select(-c(`@iot.selfLink`, properties))
paged_table(buoys)
At the time of publishing, we appear to have 5 different lake buoys that have collected data. It would be fun to put these on a map to see where in the world these buoys exist!
Make a Map of Buoy Locations
To find the lat/longs of the data, we can use the buoys
object and query STA, converting the returned result into an sf
object and create a map. This occurs in a couple of steps:
# first query STA, this returns something akin to GeoJSON
<- fetch_and_parse_json(buoys$`Locations@iot.navigationLink`)$value
tmploc
# Create sf point object
<- st_sf(
buoy_locations # Create a data frame with any attributes you want
data.frame(
id = tmploc$`@iot.id`,
name = tmploc$name,
longitude = sapply(tmploc$location$coordinates, `[`, 1), # Extract longitudes
latitude = sapply(tmploc$location$coordinates, `[`, 2) # Extract latitudes
),# Add geometry column
geometry = st_sfc(
lapply(tmploc$location$coordinates, st_point), crs = 4326 )
)
# Plot in an interactive map
mapview(buoy_locations, zcol = "name", legend = TRUE)
Get Buoy Datastreams
Maps are useful, but what about the data associated with each buoy? Well, a Thing
can have many datastreams, and datastreams are defined as different sensor types at a location.
# Get a listing of datastreams, so which variables are being measured at each buoy
<- fetch_and_parse_json(buoys$`Datastreams@iot.navigationLink`)$value %>%
datastrmunnest_wider(properties) %>%
rename(datastream_name = name,
datastream_description = description,
datastream_id = `@iot.id`) %>%
mutate(json_data = map(`Thing@iot.navigationLink`, ~GET(.x) %>%
content(as = "text") %>%
fromJSON())) %>%
mutate(thing_id = map_int(json_data, ~.x$`@iot.id`)) %>%
left_join(buoys %>%
select(thing_id:description), by = "thing_id") %>%
select(thing_id, name, description, datastream_id, datastream_name, unitOfMeasurement, parameterName, datastream_description, baseParameter, measureType, depth, `ObservedProperty@iot.navigationLink`, `Observations@iot.navigationLink`)
paged_table(datastrm)
Extract Water Temperature Data
It turns out these buoys are collecting quite a few different datastreams. Let’s focus just on water temperture
. After reading through the metadata and other properties, we can see that there is a variable wTemp
in the baseParameter
attribute that should work for our needs.
# After investigating what is available, Let's grab water temperature
<- datastrm %>%
wtmpfilter(baseParameter == "wTemp")
paged_table(wtmp)
Obtain the Observations for each Datastream
Looks like we currently have 17 different water temperature sensors, at different water depths, across the 5 buoys. Let’s grab some observations from STA because that is what we are really interested in. We will need to loop through each of the datastreams to iteratively extract observations:
<- data.frame()
outObsfor(i in 1:nrow(wtmp)){
# Fetch new batch of observations
<- safe_fetch(wtmp$`Observations@iot.navigationLink`[i])$value %>%
locobs mutate(timestamp = as.POSIXct(phenomenonTime, format = "%Y-%m-%dT%H:%M:%S", tz = "UTC"),
name = wtmp$name[i],
description = wtmp$description[i],
datastream_description = wtmp$datastream_description[i],
variable = wtmp$parameterName[i],
unit = wtmp$unitOfMeasurement[i, "symbol"],
depth_m = wtmp$depth[i]) %>%
select(name, description, datastream_description, variable, depth_m, timestamp, result, unit)
# save output
<- outObs %>%
outObsbind_rows(locobs)
}
The above code block takes a few seconds to run, but we end up with 159,587 rows of data. It is important to note that STA only returns the first 10,000 records for each datastream. This is by design and there are other methods we can use to extract a full set of observations (see below), but for now, this will suffice for our example.
Filter for Jackson Lake 01
We are only worried about looking at Jackson Lake 01 buoy data because it has more varied depths, so we can view a ‘cooler’ depth/temperature profile. Let’s filter for that buoy only, and then aggregate to daily temperatures instead of the raw information and then plot the results:
# now that we have the first 10000 observations for these lake buoys, let's perform some visualizations, focusing on Jackson Lake and compare both years of data and daily means for each depth
<- outObs %>%
jackfilter(name == "LB00001" & result != "null") %>%
mutate(sampdate = as.Date(timestamp),
result = as.numeric(result)) %>%
separate_wider_delim(variable, delim = "_", names = c("variable", "depth")) %>%
group_by(name, variable, depth, unit, sampdate) %>%
summarise(avgtemp = mean(result, na.rm = TRUE),
sdtemp = sd(result, na.rm = TRUE),
count = n()) %>%
mutate(ci95 = (sdtemp / sqrt(count)) * 1.96,
doy = yday(sampdate),
sampyear = year(sampdate),
depth = case_when(is.na(depth) ~ "1m",
TRUE ~ depth)) %>%
mutate(depth = ordered(depth, levels = c(".75m", "1m", "4m", "7m", "10m", "13m", "15m")))
paged_table(jack)
And a plot:
ggplotly(
%>%
jack ggplot(aes(x = sampdate, y = avgtemp, group = depth, color = depth, fill = depth)) +
geom_line() +
ylab(jack$unit[1]) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
%>%
) layout(title = list(text = paste0('Jackson Lake Buoy 01',
'<br>',
'<sup>',
'Mean Daily Water Temperature by Depth','</sup>')))
Get all Observations from Jackson Lake 01
Wonderful! We have visualized and we can see that the data are from 2023 only. We want to compare years and we know that this particular buoy was also deployed in 2024. Thus, we must get creative to extract the full timeseries of data and iteratively query STA until we obtain all water temperature data:
<- wtmp %>%
jack01filter(name == "LB00001")
<- data.frame()
outObsfor(i in 1:nrow(jack01)){
= 0
iter = 0
skip
while(iter == 0) {
# Fetch new batch of observations
<- safe_fetch(paste0(jack01$`Observations@iot.navigationLink`[i], "?$skip=", skip))$value
locobs
if(length(locobs) > 0){
<- locobs %>%
locobsmutate(timestamp = as.POSIXct(phenomenonTime, format = "%Y-%m-%dT%H:%M:%S", tz = "UTC"),
name = jack01$name[i],
variable = str_split(jack01$parameterName[i], pattern = "_", simplify = TRUE)[,1],
depth = str_split(jack01$parameterName[i], pattern = "_", simplify = TRUE)[,2],
description = jack01$description[i],
unit = jack01$unitOfMeasurement[i, "symbol"],
result = as.numeric(result)) %>%
mutate(doy = yday(timestamp),
sampyear = year(timestamp),
depth = case_when(is.na(depth) ~ "1m",
TRUE ~ depth)) %>%
mutate(depth = ordered(depth, levels = c(".75m", "1m", "4m", "7m", "10m", "13m", "15m"))) %>%
select(name, description, variable, timestamp, depth, result, unit)
else{
} <- 1
iter break
}
# Add to existing observations
if(skip == 0) {
<- locobs
obs else {
} <- bind_rows(obs, locobs)
obs
}
# Update skip for next iteration
<- nrow(obs)
skip
}
# append to global df
<- outObs %>%
outObsbind_rows(obs)
}
This returns 226,314 observations, after about 10 seconds, for 2023 and 2024. Let’s compare water temperatures between years. Specifically, let’s just compare at the 1m water depth for simplicity:
<- outObs %>%
jacksummutate(sampdate = as.Date(timestamp)) %>%
group_by(name, variable, sampdate, depth, unit) %>%
summarise(avgtemp = mean(result, na.rm = TRUE),
sdtemp = sd(result, na.rm = TRUE),
count = n()) %>%
mutate(ci95 = (sdtemp / sqrt(count)) * 1.96,
doy = yday(sampdate),
sampyear = as.factor(year(sampdate)),
depth = case_when(is.na(depth) ~ "1m",
TRUE ~ depth)) %>%
mutate(depth = ordered(depth, levels = c(".75m", "1m", "4m", "7m", "10m", "13m", "15m")))
#make interactive plot
ggplotly(
%>%
jacksum filter(depth == "1m") %>%
ggplot(aes(x = doy, y = avgtemp, group = sampyear, color = sampyear, fill = sampyear)) +
geom_line() +
geom_ribbon(aes(ymin = avgtemp - ci95, ymax = avgtemp + ci95), alpha = 0.4) +
ylab(jacksum$unit[1]) +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5))
%>%
) layout(title = list(text = paste0('Jackson Lake Buoy 01',
'<br>',
'<sup>',
'Mean Daily Water Temperature at 1m','</sup>')))
Hopefully you find this example useful in discovering and using WyACT data!