@Article{Nyaupane_Biogeosci_20241119, author = {Kamal Nyaupane and Umakant Mishra and Feng Tao and Kyongmin Yeo and William J. Riley and Forrest M. Hoffman and Sagar Gautam}, title = {Observational Benchmarks Inform Representation of Soil Organic Carbon Dynamics in Land Surface Models}, journal = Biogeosci, volume = 21, number = 22, pages = {5173--5183}, doi = {10.5194/bg-21-5173-2024}, day = 19, month = nov, year = 2024, abstract = {Representing soil organic carbon (SOC) dynamics in Earth system models (ESMs) is a key source of uncertainty in predicting carbon climate feedbacks. Machine learning models can help identify dominant environmental controllers and their functional relationships with SOC stocks. The resulting knowledge can be implemented in ESMs to reduce uncertainty and better predict SOC dynamics over space and time. In this study, we used a large number of SOC field observations ($n = 54,000$), geospatial datasets of environmental factors ($n = 46$), and two machine learning approaches (Random Forest (RF) and Generalized Additive Modeling (GAM)) to: (1) identify dominant environmental controllers of global and biome-specific SOC stocks, (2) derive functional relationships between environmental controllers and SOC stocks, and (3) compare the identified environmental controllers and predictive relationships with those in Coupled Model Intercomparison Project phase six (CMIP6) models. Our results showed that diurnal temperature, drought index, cation exchange capacity, and precipitation were important observed environmental controllers of SOC stocks. RF model predictions of global-scale SOC stocks were relatively accurate ($R^2 = 0.61$, RMSE $= 0.46~kg\,m$^{-2}$). In contrast, precipitation, temperature, and net primary productivity explained $>$96\% of ESM-modeled SOC stock variability. We also found very different functional relationships between environmental factors and SOC stocks in observations and ESMs. SOC predictions in ESMs may be improved significantly by including additional environmental controls (e.g., cation exchange capacity) and representing the functional relationships of environmental controllers consistent with observations.} }