# Import libraries (all that will be needed for this project)

import earthpy
import hvplot.pandas
import os
import pathlib

import pandas as pd
import numpy as np
import holoviews as hv
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LinearRegression

print('Done!')

c:\Users\raini\miniconda3\envs\earth-analytics-python\Lib\site-packages\earthpy\__init__.py:7: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
  from pkg_resources import resource_string

Done!

# Set the project directory

clim_dir = os.path.join(
    # Home Directory
    pathlib.Path.home(),
    'Documents',
    'Graduate_School',
    'EDA_Certificate',
    'Final-Project_Bozeman-Climate',
    'Data'
)

# Make the directory if needed
os.makedirs(clim_dir, exist_ok = True)

# Check that the filepath looks right.
clim_dir

'C:\\Users\\raini\\Documents\\Graduate_School\\EDA_Certificate\\Final-Project_Bozeman-Climate\\Data'

# Access Observed Temperature data from the Bozeman Weather station

bzn_precip_url = ('https://www.ncei.noaa.gov/access/services/da'
'ta/v1?dataset=daily-summaries&dataTypes=PRCP&stations=USC00241044&'
'startDate=1892-04-08&endDate=2025-02-28&units=standard')

# We'll grab data starting on 4/8/1892 (the beginning of the record) and 
# ending on 02/28/2025 (the last winter on the record)

bzn_precip_url

'https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP&stations=USC00241044&startDate=1892-04-08&endDate=2025-02-28&units=standard'

# Download the climate data

# Set the filepath in our data directory
prcp_path = os.path.join(clim_dir, 'bozeman_precip_1892-2025.csv')
prcp_path

# Use an if/else statement in case the data have already been saved

if os.path.exists(prcp_path):
    init_bzn_p_df = pd.read_csv(prcp_path)
    print('The file was already saved.')
else:
    # I'm storing the Tobs data in bzn_t_df
    init_bzn_p_df = pd.read_csv(
        bzn_precip_url,
        index_col='DATE',
        parse_dates=True,
        na_values=['NaN']
        )
    
    # Save the climate data to file
    init_bzn_p_df.to_csv(prcp_path)
    print("The data were downloaded.")

# # Check that the download worked
# bzn_t_df.head()

The file was already saved.

# Check to make sure things look ok
init_bzn_p_df.tail()

# Check how many years are in the df

# ensure Date column is in datetime
init_bzn_p_df.DATE = pd.to_datetime(init_bzn_p_df.DATE) 

# Check the number of years
num_years = init_bzn_p_df.DATE.dt.year.nunique() 
num_years # 133 years

133

# Check for NaNs
init_bzn_p_df['Year'] = init_bzn_p_df['DATE'].dt.year
init_bzn_p_df['Month'] = init_bzn_p_df['DATE'].dt.month

na_counts = init_bzn_p_df.PRCP.isna().groupby(init_bzn_p_df['Year']).sum()
na_counts.name = 'NaNs'
na_counts.to_frame()#.rename(columns={'PRCP': "NaNs"}, inplace=True)
bad_years = na_counts[na_counts > 28].to_frame()
bad_years.reset_index()

# Count how many years were removed
num_rem = len(bad_years) # 40 removed
num_rem

2

# Create a cleaned version of the TOBS DF that's removed any year with >28 NaN values

clean_bzn_p_df = (
    init_bzn_p_df
    .groupby('Year')
    .filter(lambda g: g['PRCP'].isna().sum() <= 28)
)

# Check how many years were kept
num_kept = clean_bzn_p_df.Year.nunique()
num_kept # 93 kept

# Check to make sure things add up and the filter removed the correct number of years
if num_kept + num_rem == num_years:
    bzn_p_df = clean_bzn_p_df
    print("Things add up.")
else:
    print("Something is wrong")

Things add up.

# Double check things look right
bzn_p_df.head()

# Only two missing years here!

# Check the bad years - 1931 is not in the list.
bad_years.tail()

# Set month ranges

summer = [6, 7, 8]
winter = [12, 1, 2]

# Select data that fall into summer months

bzn_jja_p_df = (
    bzn_p_df.groupby(bzn_p_df['DATE'].dt.month)
    .filter(lambda g: g.name in summer)
)

# Cite ChatGPT, explain what's going on here

bzn_jja_p_df

# Get annual summer total temperatures

jja_mean_p_df = bzn_jja_p_df.drop(columns=['DATE', 'STATION', 'Month']).groupby('Year').sum()


# Take a look
jja_mean_p_df

# Select data that fall into winter months

bzn_djf_p_df = (
    bzn_p_df.groupby(bzn_p_df['DATE'].dt.month)
    .filter(lambda g: g.name in winter)
)

bzn_djf_p_df

# This will need to be reshaped so that we get December of 1931 and Jan/Feb of 1932 (ChatGPT helped me figure this out)
bzn_djf_p_df['winter_year'] = bzn_djf_p_df['Year']

# Shift December into the following year
bzn_djf_p_df.loc[bzn_djf_p_df['Month'] == 12, 'winter_year'] += 1

# Now compute total precip per winter season
djf_mean_p_df = bzn_djf_p_df.drop(columns=['DATE', 'STATION', 'Month', 'Year']).groupby('winter_year').sum()

# Take a look
djf_mean_p_df

# Join the data frames

means_p_df = jja_mean_p_df
means_p_df['DJF PRCP'] = djf_mean_p_df['PRCP']
means_p_df.rename(columns={'PRCP': 'JJA Total Precip', 'DJF PRCP': 'DJF Total Precip'}, inplace=True)
means_p_df.dropna(inplace=True)

means_p_df

# Plot the mean annual temperatures

seas_p_mean = means_p_df.hvplot(
    title='Winter and Summer Total Precipitation in Bozeman, MT',
    xlabel='Year',
    ylabel='Precipitation (inches)'
)

# Save plot
save(seas_p_mean, 'seas_precip_means.html')


# Show plot
seas_p_mean

# Non-interactive plot
seas_p_mean_noint = means_p_df.plot(
    title='Winter and Summer Total Precipitation in Bozeman, MT',
    xlabel='Year',
    ylabel='Precipitation (inches)'
)

# Save the plot
plt.savefig('seas_precip_means.png')

# Show the plot
seas_p_mean_noint

<Axes: title={'center': 'Winter and Summer Total Precipitation in Bozeman, MT'}, xlabel='Year', ylabel='Precipitation (inches)'>

jja_mean_p_df

# Linear regression for JJA

jja_lr_p_df = jja_mean_p_df.reset_index()

x = jja_lr_p_df[['Year']].values
y_f = jja_lr_p_df['JJA Total Precip'].values

## Create and fit the linear regression model
model = LinearRegression()
model.fit(x, y_f)

## Get the slope and intercept
slope = model.coef_[0]
intercept = model.intercept_

## Print the results
print(f"Slope: {slope}") # 0.009
print(f"Intercept: {intercept}") # -12.442

Slope: 0.009115740813774195
Intercept: -12.442322649775683

# Linear regression for DJF

# Reset the index
djf_lr_p_df = djf_mean_p_df.reset_index()

# Get X and Y values
x = djf_lr_p_df[['winter_year']].values
y_f = djf_lr_p_df['PRCP'].values

# Create and fit the linear regression model
model = LinearRegression()
model.fit(x, y_f)

# Get the slope and intercept
slope = model.coef_[0]
intercept = model.intercept_

# Print the results
print(f"Slope: {slope}") # 0.005
print(f"Intercept: {intercept}") # -6.439

Slope: 0.00457137538829407
Intercept: -6.439333101602284

# Plot annual total precipitation with a trend line
jja_p_linreg = sns.regplot(
    x= jja_lr_p_df['Year'], 
    y= jja_lr_p_df[['JJA Total Precip']],
    data= jja_lr_p_df,
    # type = 'line'
    ci= None,
)

# Set title and axis labels
jja_p_linreg.set_title('OLS Linear Regression of Total\n' \
    'Summer Precipitation in Bozeman, MT')
jja_p_linreg.set_xlabel('Year')
jja_p_linreg.set_ylabel('Precipitation (in)')

# Save and show
plt.savefig('jja_precip_linreg.png')
plt.show()

# Winter plot
djf_p_linreg = sns.regplot(
    x= djf_lr_p_df['winter_year'], 
    y= djf_lr_p_df[['PRCP']],
    data= djf_lr_p_df,
    # type = 'line'
    ci= None
)

# Set labels
djf_p_linreg.set_title('OLS Linear Regression of Total\n' \
    'Winter Precipitation in Bozeman, MT')
djf_p_linreg.set_xlabel('Year')
djf_p_linreg.set_ylabel('Temperature (°F)')

# Save and show plot
plt.savefig('djf_precip_linreg.png')
plt.show()

	DATE	STATION	PRCP	Year	Month
54	1892-06-01	USC00241044	0.14	1892	6
55	1892-06-02	USC00241044	0.00	1892	6
56	1892-06-03	USC00241044	0.02	1892	6
57	1892-06-04	USC00241044	0.00	1892	6
58	1892-06-05	USC00241044	0.00	1892	6
...	...	...	...	...	...
47446	2024-08-27	USC00241044	0.00	2024	8
47447	2024-08-28	USC00241044	0.08	2024	8
47448	2024-08-29	USC00241044	0.00	2024	8
47449	2024-08-30	USC00241044	0.00	2024	8
47450	2024-08-31	USC00241044	0.00	2024	8

	PRCP
Year
1892	5.82
1894	7.42
1895	4.81
1896	4.58
1898	4.58
...	...
2020	5.08
2021	5.47
2022	5.18
2023	6.97
2024	5.90

	PRCP
winter_year
1893	1.72
1895	2.20
1896	2.70
1897	0.40
1898	1.09
...	...
2021	3.28
2022	3.27
2023	3.99
2024	2.98
2025	3.52

	JJA Total Precip	DJF Total Precip
Year
1895	4.81	2.20
1896	4.58	2.70
1898	4.58	1.09
1901	2.22	1.35
1902	4.60	2.53
...	...	...
2020	5.08	2.60
2021	5.47	3.28
2022	5.18	3.27
2023	6.97	3.99
2024	5.90	2.98

	JJA Total Precip	DJF Total Precip
Year
1895	4.81	2.20
1896	4.58	2.70
1898	4.58	1.09
1901	2.22	1.35
1902	4.60	2.53
...	...	...
2020	5.08	2.60
2021	5.47	3.28
2022	5.18	3.27
2023	6.97	3.99
2024	5.90	2.98

Final Project: A deeper investigation of climate change in Bozeman, MT¶

Step 1: Set up a project directory.¶

Step 2: Download Temperature Data¶

Talk about weather station, variable (PRCP) and how the url api works¶

Step 3: Clean the temperature data¶

Step 4: Divide the data into summer and winter groups and calculate annual means¶

Step 5: Plot the total precipiation ammounts¶

Step 6: Compute linear regression (OLS) on each timeseries¶

Step 7: Plot linear regressions¶

	DATE	STATION	PRCP
47627	2025-02-24	USC00241044	0.00
47628	2025-02-25	USC00241044	0.08
47629	2025-02-26	USC00241044	0.00
47630	2025-02-27	USC00241044	0.00
47631	2025-02-28	USC00241044	0.00