# Import libraries (all that will be needed for this project)

import earthpy
import hvplot.pandas
import os
import pathlib

import pandas as pd
import numpy as np
import holoviews as hv
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression

print('Done!')

Done!

# Set the project directory

clim_dir = os.path.join(
    # Home Directory
    pathlib.Path.home(),
    'Documents',
    'Graduate_School',
    'EDA_Certificate',
    'Final-Project_Bozeman-Climate',
    'Data'
)

# Make the directory if needed
os.makedirs(clim_dir, exist_ok = True)

# Check that the filepath looks right.
clim_dir

'C:\\Users\\raini\\Documents\\Graduate_School\\EDA_Certificate\\Final-Project_Bozeman-Climate\\Data'

# Access Observed Temperature data from the Bozeman Weather station

bzn_tobs_url = ('https://www.ncei.noaa.gov/access/services/da'
'ta/v1?dataset=daily-summaries&dataTypes=TOBS&stations=USC00241044&'
'startDate=1892-04-08&endDate=2025-02-28&units=standard')

# We'll grab data starting on 4/8/1892 (the beginning of the record) and 
# ending on 02/28/2025 (the last winter on the record)

bzn_tobs_url

'https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=TOBS&stations=USC00241044&startDate=1892-04-08&endDate=2025-02-28&units=standard'

# Download the climate data

# Set the filepath in our data directory
temp_path = os.path.join(clim_dir, 'bozeman_tobs_1892-2025.csv')
temp_path

# Use an if/else statement in case the data have already been saved

if os.path.exists(temp_path):
    init_bzn_t_df = pd.read_csv(temp_path)
    print('The file was already saved.')
else:
    # I'm storing the Tobs data in bzn_t_df
    init_bzn_t_df = pd.read_csv(
        bzn_tobs_url,
        index_col='DATE',
        parse_dates=True,
        na_values=['NaN']
        )
    
    # Save the climate data to file
    init_bzn_t_df.to_csv(temp_path)
    print("The data were downloaded.")

# # Check that the download worked
# bzn_t_df.head()

The file was already saved.

# Check to make sure things look ok
init_bzn_t_df.tail()

# Check how many years are in the df

# ensure Date column is in datetime
init_bzn_t_df.DATE = pd.to_datetime(init_bzn_t_df.DATE) 

# Check the number of years
num_years = init_bzn_t_df.DATE.dt.year.nunique() 
num_years # 133 years

133

# Check for NaNs
init_bzn_t_df['Year'] = init_bzn_t_df['DATE'].dt.year
init_bzn_t_df['Month'] = init_bzn_t_df['DATE'].dt.month

na_counts = init_bzn_t_df.TOBS.isna().groupby(init_bzn_t_df['Year']).sum()
na_counts.name = 'NaNs'
na_counts.to_frame()#.rename(columns={'TOBS': "NaNs"}, inplace=True)
bad_years = na_counts[na_counts > 28].to_frame()
bad_years.reset_index()

# Count how many years were removed
num_rem = len(bad_years) # 40 removed
num_rem

40

# Create a cleaned version of the TOBS DF that's removed any year with >28 NaN values

clean_bzn_t_df = (
    init_bzn_t_df
    .groupby('Year')
    .filter(lambda g: g['TOBS'].isna().sum() <= 28)
)

# Check how many years were kept
num_kept = clean_bzn_t_df.Year.nunique()
num_kept # 93 kept

# Check to make sure things add up and the filter removed the correct number of years
if num_kept + num_rem == num_years:
    bzn_t_df = clean_bzn_t_df
    print("Things add up.")
else:
    print("Something is wrong")

Things add up.

# Double check things look right
bzn_t_df.head()

# Now the dataset starts in 1931, which seems right.

# Check the bad years - 1931 is not in the list.
bad_years.tail()

# Set month ranges

summer = [6, 7, 8]
winter = [12, 1, 2]

# Select data that fall into summer months

bzn_jja_df = (
    bzn_t_df.groupby(bzn_t_df['DATE'].dt.month)
    .filter(lambda g: g.name in summer)
)

# Cite ChatGPT, explain what's going on here

bzn_jja_df

# Get annual summer mean temperatures

jja_mean_df = bzn_jja_df.drop(columns=['DATE', 'STATION', 'Month']).groupby('Year').mean()
# We just need the year and TOBS variables, so we'll drop everything else.

# Take a look
jja_mean_df

# Select data that fall into winter months

bzn_djf_df = (
    bzn_t_df.groupby(bzn_t_df['DATE'].dt.month)
    .filter(lambda g: g.name in winter)
)

bzn_djf_df

# This will need to be reshaped so that we get December of 1931 and Jan/Feb of 1932 (ChatGPT helped me figure this out)
bzn_djf_df['winter_year'] = bzn_djf_df['Year']

# Shift December into the following year
bzn_djf_df.loc[bzn_djf_df['Month'] == 12, 'winter_year'] += 1

# Now compute mean TOBS per winter season
djf_mean_df = bzn_djf_df.drop(columns=['DATE', 'STATION', 'Month', 'Year']).groupby('winter_year').mean()

# Take a look
djf_mean_df

# Join the data frames

means_df = jja_mean_df
means_df['DJF TOBS'] = djf_mean_df['TOBS']
means_df.rename(columns={'TOBS': 'JJA Temp F', 'DJF TOBS': 'DJF Temp F'}, inplace=True)

means_df

# Plot the mean annual temperatures

seas_mean = means_df.hvplot(
    title='Winter and Summer Mean Temperatures in Bozeman, MT',
    xlabel='Year',
    ylabel='Temperature (°F)'
)

# Save plot
hv.save(seas_mean, 'seas_temp_means.html')

# Show plot
seas_mean

# Non-interactive plot
seas_t_mean_noint = means_df.plot(
    title='Winter and Summer Mean Temperatures in Bozeman, MT',
    xlabel='Year',
    ylabel='Temperature (°F)'
)

# Save the plot
plt.savefig('seas_temp_means.png')

# Show the plot
seas_t_mean_noint

<Axes: title={'center': 'Winter and Summer Mean Temperatures in Bozeman, MT'}, xlabel='Year', ylabel='Temperature (°F)'>

# Linear regression for JJA

jja_lr_df = jja_mean_df.reset_index()

x = jja_lr_df[['Year']].values
y_f = jja_lr_df['JJA Temp F'].values

## Create and fit the linear regression model
model = LinearRegression()
model.fit(x, y_f)

## Get the slope and intercept
slope = model.coef_[0]
intercept = model.intercept_

## Print the results
print(f"Slope: {slope}") # 0.048
print(f"Intercept: {intercept}") # -22.036

Slope: 0.04825926384785258
Intercept: -22.035500457263836

djf_mean_df

# Linear regression for DJF

# Reset the index
djf_lr_df = djf_mean_df.reset_index()

# Get X and Y values
x = djf_lr_df[['winter_year']].values
y_f = djf_lr_df['TOBS'].values

# Create and fit the linear regression model
model = LinearRegression()
model.fit(x, y_f)

# Get the slope and intercept
slope = model.coef_[0]
intercept = model.intercept_

# Print the results
print(f"Slope: {slope}") # 0.065
print(f"Intercept: {intercept}") # -101.816

Slope: 0.0654175898394332
Intercept: -101.81561120043123

# Plot annual average temperature with a trend line
jja_linreg = sns.regplot(
    x= jja_lr_df['Year'], 
    y= jja_lr_df[['JJA Temp F']],
    data= jja_lr_df,
    # type = 'line'
    ci= None,
)

# Set title and axis labels
jja_linreg.set_title('OLS Linear Regression of Summer\n' \
    'Mean Temperatures in Bozeman, MT')
jja_linreg.set_xlabel('Year')
jja_linreg.set_ylabel('Temperature (°F)')

# Save and show
plt.savefig('jja_temp_linreg.png')
plt.show()

# Winter plot
djf_linreg = sns.regplot(
    x= djf_lr_df['winter_year'], 
    y= djf_lr_df[['TOBS']],
    data= djf_lr_df,
    # type = 'line'
    ci= None
)

# Set labels
djf_linreg.set_title('OLS Linear Regression of Winter\n' \
    'Mean Temperatures in Bozeman, MT')
djf_linreg.set_xlabel('Year')
djf_linreg.set_ylabel('Temperature (°F)')

# Save and show plot
plt.savefig('djf_temp_linreg.png')
plt.show()

	DATE	STATION	TOBS
47627	2025-02-24	USC00241044	47.0
47628	2025-02-25	USC00241044	39.0
47629	2025-02-26	USC00241044	44.0
47630	2025-02-27	USC00241044	47.0
47631	2025-02-28	USC00241044	52.0

	NaNs
Year
1928	152
1929	90
1930	31
1932	61
1951	30

	DATE	STATION	TOBS	Year	Month
13441	1931-06-01	USC00241044	62.0	1931	6
13442	1931-06-02	USC00241044	56.0	1931	6
13443	1931-06-03	USC00241044	52.0	1931	6
13444	1931-06-04	USC00241044	51.0	1931	6
13445	1931-06-05	USC00241044	66.0	1931	6
...	...	...	...	...	...
47446	2024-08-27	USC00241044	83.0	2024	8
47447	2024-08-28	USC00241044	68.0	2024	8
47448	2024-08-29	USC00241044	70.0	2024	8
47449	2024-08-30	USC00241044	82.0	2024	8
47450	2024-08-31	USC00241044	84.0	2024	8

	TOBS
Year
1931	73.750000
1933	74.108696
1934	73.836957
1935	72.423913
1936	74.326087
...	...
2020	73.597826
2021	78.391304
2022	78.108696
2023	74.478261
2024	77.152174

	TOBS
winter_year
1931	26.983051
1932	22.451613
1933	21.220339
1934	33.277778
1935	27.455556
...	...
2021	29.855556
2022	30.022222
2023	26.877778
2024	32.450549
2025	29.622222

Final Project: A deeper investigation of climate change in Bozeman, MT¶

Step 1: Set up a project directory.¶

Step 2: Download Temperature Data¶

Talk about weather station, variable (Tobs) and how the url api works¶

Step 3: Clean the temperature data¶

Step 4: Divide the data into summer and winter groups and calculate annual means¶

Step 5: Plot the mean temperatures¶

Step 6: Compute linear regression (OLS) on each timeseries¶

Step 7: Plot linear regressions¶