""" Semantic code duplication: relative change since 2021 baseline. For each project, the 2021 percentage is the baseline (index = 100). Every later year is expressed as the percent change relative to that baseline, stock-chart style: raw 10% -> 15% is shown as +50%, not +5 points. The "average" line is the geometric mean of the per-project index values for each year -- the correct center for ratio/index data, treating a project that doubled and one that halved as symmetric. """ import numpy as np import matplotlib.pyplot as plt YEARS = [2021, 2022, 2023, 2024, 2025, 2026] PROJECTS = { "gitea": {2026: 22.38, 2025: 24.17, 2024: 24.65, 2023: 24.19, 2022: 24.39, 2021: 36.54}, "caffeine": {2026: 36.11, 2025: 36.00, 2024: 35.64, 2023: 35.61, 2022: 36.18, 2021: 35.74}, "openmct": {2026: 26.01, 2025: 26.07, 2024: 26.20, 2023: 25.52, 2022: 23.87, 2021: 17.54}, "tusky": {2026: 20.99, 2025: 26.54, 2024: 23.53, 2023: 24.91, 2022: 22.44, 2021: 21.89}, "paperless-ngx":{2026: 7.49, 2025: 10.32, 2024: 10.36, 2023: 8.56, 2022: 10.84, 2021: 10.36}, "helix": {2026: 17.44, 2025: 16.77, 2024: 18.46, 2023: 16.18, 2022: 17.36, 2021: 13.33}, "immich": {2026: 24.10, 2025: 29.26, 2024: 28.79, 2023: 28.79, 2022: 15.83, 2021: 15.83}, "caddy": {2026: 22.53, 2025: 22.75, 2024: 21.66, 2023: 19.68, 2022: 19.18, 2021: 16.54}, "subtitleedit": {2026: 47.16, 2025: 52.26, 2024: 52.19, 2023: 52.09, 2022: 51.72, 2021: 51.82}, "ghostfolio": {2026: 21.74, 2025: 19.04, 2024: 18.01, 2023: 16.70, 2022: 13.91, 2021: 14.78}, "zulip": {2026: 19.74, 2025: 19.23, 2024: 17.04, 2023: 15.37, 2022: 15.39, 2021: 14.39}, "mihon": {2026: 21.33, 2025: 20.73, 2024: 21.74, 2023: 21.87, 2022: 27.32, 2021: 30.31}, "opencast": {2026: 24.59, 2025: 24.57, 2024: 24.28, 2023: 23.88, 2022: 26.95, 2021: 27.17}, "outline": {2026: 25.59, 2025: 23.94, 2024: 22.51, 2023: 22.87, 2022: 24.29, 2021: 42.48}, } def relative_change(project): """Percent change of each year relative to the project's own 2021 value.""" baseline = project[2021] return [(project[y] / baseline - 1.0) * 100.0 for y in YEARS] # Per-project relative-change series. series = {name: relative_change(p) for name, p in PROJECTS.items()} index_matrix = np.array( [[PROJECTS[name][y] / PROJECTS[name][2021] for y in YEARS] for name in PROJECTS] ) # Geometric-mean line, computed on index values (baseline = 1.0) per year, # then converted back to percent change. exp(mean(log(.))) over projects. geo_index = np.exp(np.mean(np.log(index_matrix), axis=0)) geo_change = (geo_index - 1.0) * 100.0 # Arithmetic mean for comparison (plain average of index values per year). arith_index = np.mean(index_matrix, axis=0) arith_change = (arith_index - 1.0) * 100.0 # --- Plot --- fig, ax = plt.subplots(figsize=(9, 5)) # Each project: same semi-transparent line. Overlaps darken naturally. for name, vals in series.items(): ax.plot(YEARS, vals, color="#1f4e79", alpha=0.3, linewidth=2, zorder=2) # Average lines: stronger, primary. Geometric is the headline; arithmetic for comparison. ax.plot( YEARS, geo_change, color="#c0392b", linewidth=3, zorder=5, marker="o", markersize=6, label="Average (geometric mean)", ) ax.plot( YEARS, arith_change, color="#e08e0b", linewidth=2, zorder=4, marker="s", markersize=4, linestyle="--", label="Average (arithmetic mean)", ) # Baseline reference at 0% change. ax.axhline(0, color="#888888", linewidth=1, linestyle="--", zorder=1) ax.set_title("Semantic code duplication: change since 2021 baseline", fontsize=13, pad=13) ax.set_xlabel("Year") ax.set_ylabel("Change relative to 2021 (%)") ax.set_xticks(YEARS) ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda v, _: f"{v:+.0f}%")) ax.grid(True, axis="y", alpha=0.25) ax.legend(loc="best") fig.tight_layout() fig.savefig("chart.png", dpi=150)