Basic usage of azmetr

library(azmetr)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union

Retrieving the most recent day

To retrieve the the most recent day of data for all stations simply by calling az_daily() or az_hourly() without any arguments. az_daily() retrieves daily summary data and az_hourly() retrieves hourly data.

daily <- az_daily()
#> Querying data from 2024-09-19
#> Returning data from 2024-09-19
hourly <- az_hourly()
#> Querying most recent hour of data ...
#> Returning data from 2024-09-20 21:00

head(daily)
#> # A tibble: 6 × 75
#>   meta_bat_volt_max meta_bat_volt_mean meta_bat_volt_min meta_needs_review
#>               <dbl>              <dbl>             <dbl>             <dbl>
#> 1              14.2               13.2              12.8                 0
#> 2              14.1               13                12.5                 0
#> 3              14.5               13.1              12.6                 0
#> 4              14.1               13.0              12.5                 0
#> 5              14.2               13.1              12.6                 0
#> 6              14.0               13.1              12.6                 0
#> # ℹ 71 more variables: meta_station_id <chr>, meta_station_name <chr>,
#> #   meta_version <dbl>, chill_hours_0C <dbl>, chill_hours_20C <dbl>,
#> #   chill_hours_32F <dbl>, chill_hours_45F <dbl>, chill_hours_68F <dbl>,
#> #   chill_hours_7C <dbl>, date_doy <dbl>, date_year <dbl>, datetime <date>,
#> #   dwpt_mean <dbl>, dwpt_meanF <dbl>, eto_azmet <dbl>, eto_azmet_in <dbl>,
#> #   eto_pen_mon <dbl>, eto_pen_mon_in <dbl>, heat_units_10C <dbl>,
#> #   heat_units_13C <dbl>, heat_units_3413C <dbl>, heat_units_45F <dbl>, …
head(hourly)
#> # A tibble: 6 × 42
#>   meta_bat_volt meta_needs_review meta_station_id meta_station_name meta_version
#>           <dbl>             <dbl> <chr>           <chr>                    <dbl>
#> 1          13.1                 0 az01            Tucson                       1
#> 2          12.7                 0 az02            Yuma Valley                  1
#> 3          12.8                 0 az04            Safford                      1
#> 4          12.8                 0 az05            Coolidge                     1
#> 5          12.8                 0 az06            Maricopa                     1
#> 6          12.8                 0 az07            Aguila                       1
#> # ℹ 37 more variables: date_datetime <dttm>, date_doy <dbl>, date_hour <chr>,
#> #   date_year <dbl>, dwpt <dbl>, dwptF <dbl>, eto_azmet <dbl>,
#> #   eto_azmet_in <dbl>, heatstress_cottonC <dbl>, heatstress_cottonF <dbl>,
#> #   precip_total <dbl>, precip_total_in <dbl>, relative_humidity <dbl>,
#> #   sol_rad_total <dbl>, sol_rad_total_ly <dbl>, temp_airC <dbl>,
#> #   temp_airF <dbl>, temp_soil_10cmC <dbl>, temp_soil_10cmF <dbl>,
#> #   temp_soil_50cmC <dbl>, temp_soil_50cmF <dbl>, vp_actual <dbl>, …

Specifying date ranges

By supplying start_date to az_daily() or start_date_time to az_hourly() you can retrieve data going back further in time.

last_date <- max(daily$datetime)
last_date
#> [1] "2024-09-19"
last_week <- last_date - lubridate::weeks(1)
wk <- az_daily(start_date = last_week)
#> Querying data from 2024-09-12 through 2024-09-19
#> Returning data from 2024-09-12 through 2024-09-19

range(wk$datetime)
#> [1] "2024-09-12" "2024-09-19"
last_datetime <- max(hourly$date_datetime)
last_datetime
#> [1] "2024-09-20 21:00:00 MST"
last_48h <- last_datetime - hours(48)
hr <- az_hourly(start_date_time = last_48h)
#> Querying data from 2024-09-18 21:00 through 2024-09-20 21:00
#> Returning data from 2024-09-18 21:00 through 2024-09-20 21:00

range(hr$date_datetime)
#> [1] "2024-09-18 21:00:00 MST" "2024-09-20 21:00:00 MST"

To specify an end date, use end_date or end_date_time. You must also supply a start date if you supply an end date.

daily_range <- az_daily(start_date = "2022-01-01", end_date = "2022-01-05")
#> Querying data from 2022-01-01 through 2022-01-05
#> Returning data from 2022-01-01 through 2022-01-05
range(daily_range$datetime)
#> [1] "2022-01-01" "2022-01-05"

Note that the dates and datetimes can be supplied as character values in year, month, day order or they can be supplied as Date or POSIXct vectors. If the supplied date is more precise than the data, it will be rounded down. For az_daily() datetimes will be rounded down to the nearest day and for az_hourly() datetimes will be rounded down to the nearest hour.

char_daily <- az_daily(start_date = "2023-01-10 12:43:22", end_date = "2023-01-11 15:00:01")
#> Querying data from 2023-01-10 through 2023-01-11
#> Returning data from 2023-01-10 through 2023-01-11
range(char_daily$datetime)
#> [1] "2023-01-10" "2023-01-11"

char_hourly <- az_hourly(start_date = "2023-01-10 12:43:22", end_date = "2023-01-11 15:00:01")
#> Querying data from 2023-01-10 12:43 through 2023-01-11 15:00
#> Warning in az_hourly(start_date = "2023-01-10 12:43:22", end_date = "2023-01-11
#> 15:00:01"): You requested data through 2023-01-11 15:00:00 but only data
#> through 2023-01-11 14:00:00 were available
#> Returning data from 2023-01-10 13:00 through 2023-01-11 14:00
range(char_hourly$date_datetime)
#> [1] "2023-01-10 13:00:00 MST" "2023-01-11 14:00:00 MST"

Filtering by station

Information on the stations available is contained in the station_info dataset including station name, station ID, and location.

station_info
#> # A tibble: 31 × 5
#>    meta_station_name meta_station_id latitude longitude elev_m
#>    <chr>             <chr>              <dbl>     <dbl>  <dbl>
#>  1 Tucson            az01                32.3     -111.    717
#>  2 Yuma Valley       az02                32.7     -115.     36
#>  3 Safford           az04                32.8     -110.    903
#>  4 Coolidge          az05                33.0     -112.    423
#>  5 Maricopa          az06                33.1     -112.    362
#>  6 Aguila            az07                33.9     -113.    657
#>  7 Parker            az08                34.0     -114.     98
#>  8 Bonita            az09                32.5     -110.   1349
#>  9 Phoenix Greenway  az12                33.6     -112.    403
#> 10 Yuma N.Gila       az14                32.8     -115.     43
#> # ℹ 21 more rows

If you only need data for a subset of stations, you can supply station_id. However, note that this will query the API once per station due to limitations of how the API works. It may be faster to just get data for all stations and subset it after since that only queries the web API once and results in an identical dataset.

system.time(
  sub_wk <- az_daily(station_id = c(1, 2, 8), start_date = "2022-01-01", end_date = "2022-01-15")
)
#> Querying data from 2022-01-01 through 2022-01-15
#> Returning data from 2022-01-01 through 2022-01-15
#>    user  system elapsed 
#>   0.104   0.002   0.835
system.time(
  sub_wk2 <- subset(
    az_daily(start_date = "2022-01-01", end_date = "2022-01-15"),
    meta_station_id %in% c("az01", "az02", "az08")
  )
)
#> Querying data from 2022-01-01 through 2022-01-15
#> Returning data from 2022-01-01 through 2022-01-15
#>    user  system elapsed 
#>   0.376   0.000   1.190
all(sub_wk2 == sub_wk)
#> [1] NA