mirror of
https://github.com/codecrafters-io/build-your-own-x
synced 2026-07-02 16:59:25 +00:00
Add language statistics reporting feature
- Add generate_stats.py script to analyze README.md and extract language distribution - Generate STATS-main.md with visualized language statistics - Add GitHub Actions workflow to auto-update stats when README.md changes
This commit is contained in:
parent
76d1a261bd
commit
ea0ce0872b
3 changed files with 243 additions and 0 deletions
151
.github/scripts/generate_stats.py
vendored
Normal file
151
.github/scripts/generate_stats.py
vendored
Normal file
|
|
@ -0,0 +1,151 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Generate language statistics from README.md and create a visualization in STATS-main.md
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
from collections import Counter
|
||||||
|
from typing import Dict, List, Tuple
|
||||||
|
|
||||||
|
def extract_languages_from_readme(filename: str = 'README.md') -> List[str]:
|
||||||
|
"""Extract all programming languages mentioned in project entries."""
|
||||||
|
languages = []
|
||||||
|
|
||||||
|
with open(filename, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# Pattern to match: * [**Language**: _Title_](url)
|
||||||
|
# Handles multiple languages separated by / or ,
|
||||||
|
pattern = r'^\* \[\*\*([^*]+)\*\*:'
|
||||||
|
|
||||||
|
matches = re.findall(pattern, content, re.MULTILINE)
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
# Split by / or , and clean up whitespace
|
||||||
|
langs = re.split(r'\s*/\s*|\s*,\s*', match)
|
||||||
|
for lang in langs:
|
||||||
|
lang = lang.strip()
|
||||||
|
if lang:
|
||||||
|
languages.append(lang)
|
||||||
|
|
||||||
|
return languages
|
||||||
|
|
||||||
|
def normalize_language(lang: str) -> str:
|
||||||
|
"""Normalize language names for consistency."""
|
||||||
|
# Handle common variations
|
||||||
|
normalizations = {
|
||||||
|
'Node.js': 'JavaScript'
|
||||||
|
}
|
||||||
|
return normalizations.get(lang, lang)
|
||||||
|
|
||||||
|
def count_languages(languages: List[str]) -> Dict[str, int]:
|
||||||
|
"""Count occurrences of each language."""
|
||||||
|
normalized = [normalize_language(lang) for lang in languages]
|
||||||
|
return dict(Counter(normalized))
|
||||||
|
|
||||||
|
def create_horizontal_bar(count: int, max_count: int, bar_width: int = 50) -> str:
|
||||||
|
"""Create a horizontal bar for visualization."""
|
||||||
|
filled = int((count / max_count) * bar_width)
|
||||||
|
bar = '█' * filled + '░' * (bar_width - filled)
|
||||||
|
return bar
|
||||||
|
|
||||||
|
def generate_stats_markdown(language_counts: Dict[str, int], num_projects: int) -> str:
|
||||||
|
"""Generate the markdown content for STATS-main.md."""
|
||||||
|
# Sort by count (descending) then by name
|
||||||
|
sorted_langs = sorted(language_counts.items(), key=lambda x: (-x[1], x[0]))
|
||||||
|
|
||||||
|
total_language_mentions = sum(language_counts.values())
|
||||||
|
max_count = max(language_counts.values())
|
||||||
|
|
||||||
|
# Separate languages >= 1% and < 1%
|
||||||
|
threshold = num_projects * 0.01 # 1% threshold
|
||||||
|
main_langs = []
|
||||||
|
other_langs = []
|
||||||
|
|
||||||
|
for lang, count in sorted_langs:
|
||||||
|
if count >= threshold:
|
||||||
|
main_langs.append((lang, count))
|
||||||
|
else:
|
||||||
|
other_langs.append((lang, count))
|
||||||
|
|
||||||
|
# Calculate "Other" total
|
||||||
|
other_count = sum(count for _, count in other_langs)
|
||||||
|
|
||||||
|
# Build markdown content
|
||||||
|
lines = [
|
||||||
|
"# Build Your Own X - Language Statistics\n",
|
||||||
|
f"**Total Projects:** {num_projects}\n",
|
||||||
|
f"**Total Language Mentions:** {total_language_mentions} *(some projects support multiple languages)*\n",
|
||||||
|
f"**Unique Languages:** {len(language_counts)}\n",
|
||||||
|
f"**Last Updated:** {get_current_date()}\n",
|
||||||
|
"---\n",
|
||||||
|
"## Language Distribution\n",
|
||||||
|
"| Language | Count | Percentage | Distribution |",
|
||||||
|
"|----------|-------|------------|--------------|"
|
||||||
|
]
|
||||||
|
|
||||||
|
for lang, count in main_langs:
|
||||||
|
percentage = (count / num_projects) * 100
|
||||||
|
bar = create_horizontal_bar(count, max_count, 30)
|
||||||
|
lines.append(f"| {lang} | {count} | {percentage:.1f}% | {bar} |")
|
||||||
|
|
||||||
|
# Add "Other" category if there are languages < 1%
|
||||||
|
if other_langs:
|
||||||
|
percentage = (other_count / num_projects) * 100
|
||||||
|
bar = create_horizontal_bar(other_count, max_count, 30)
|
||||||
|
lines.append(f"| Other* | {other_count} | {percentage:.1f}% | {bar} |")
|
||||||
|
|
||||||
|
lines.append("\n---\n")
|
||||||
|
lines.append("## Top 10 Languages\n")
|
||||||
|
|
||||||
|
for i, (lang, count) in enumerate(sorted_langs[:10], 1):
|
||||||
|
percentage = (count / num_projects) * 100
|
||||||
|
lines.append(f"{i}. **{lang}**: {count} projects ({percentage:.1f}%)")
|
||||||
|
|
||||||
|
# Add footnote for "Other" languages
|
||||||
|
if other_langs:
|
||||||
|
lines.append("## Footnotes\n")
|
||||||
|
lines.append(f"**\\* Other languages** (each < 1% of total projects): ")
|
||||||
|
other_names = [f"{lang} ({count})" for lang, count in sorted(other_langs, key=lambda x: (-x[1], x[0]))]
|
||||||
|
lines.append(", ".join(other_names))
|
||||||
|
|
||||||
|
return '\n'.join(lines) + '\n'
|
||||||
|
|
||||||
|
def get_current_date() -> str:
|
||||||
|
"""Get current date in YYYY-MM-DD format."""
|
||||||
|
from datetime import datetime
|
||||||
|
return datetime.now().strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
def count_projects(filename: str = 'README.md') -> int:
|
||||||
|
"""Count the actual number of project entries."""
|
||||||
|
with open(filename, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
pattern = r'^\* \[\*\*([^*]+)\*\*:'
|
||||||
|
matches = re.findall(pattern, content, re.MULTILINE)
|
||||||
|
return len(matches)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Analyzing README.md...")
|
||||||
|
num_projects = count_projects()
|
||||||
|
print(f"Found {num_projects} project entries")
|
||||||
|
|
||||||
|
languages = extract_languages_from_readme()
|
||||||
|
print(f"Extracted {len(languages)} language mentions (some projects list multiple languages)")
|
||||||
|
|
||||||
|
language_counts = count_languages(languages)
|
||||||
|
print(f"Detected {len(language_counts)} unique languages")
|
||||||
|
|
||||||
|
print("\nGenerating STATS-main.md...")
|
||||||
|
stats_content = generate_stats_markdown(language_counts, num_projects)
|
||||||
|
|
||||||
|
with open('STATS-main.md', 'w', encoding='utf-8') as f:
|
||||||
|
f.write(stats_content)
|
||||||
|
|
||||||
|
print("✓ STATS-main.md generated successfully!")
|
||||||
|
print(f"\nTop 5 languages:")
|
||||||
|
sorted_langs = sorted(language_counts.items(), key=lambda x: -x[1])
|
||||||
|
for lang, count in sorted_langs[:5]:
|
||||||
|
print(f" - {lang}: {count}")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
42
.github/workflows/update-stats.yml
vendored
Normal file
42
.github/workflows/update-stats.yml
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
name: Update Language Statistics
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
- master
|
||||||
|
paths:
|
||||||
|
- 'README.md'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
update-stats:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Generate language statistics
|
||||||
|
run: python3 .github/scripts/generate_stats.py
|
||||||
|
|
||||||
|
- name: Check for changes
|
||||||
|
id: git-check
|
||||||
|
run: |
|
||||||
|
git diff --exit-code STATS-main.md || echo "changed=true" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Commit and push if changed
|
||||||
|
if: steps.git-check.outputs.changed == 'true'
|
||||||
|
run: |
|
||||||
|
git config --local user.email "github-actions[bot]@users.noreply.github.com"
|
||||||
|
git config --local user.name "github-actions[bot]"
|
||||||
|
git add STATS-main.md
|
||||||
|
git commit -m "Auto-update language statistics [skip ci]"
|
||||||
|
git push
|
||||||
50
STATS-main.md
Normal file
50
STATS-main.md
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
# Build Your Own X - Language Statistics
|
||||||
|
|
||||||
|
**Total Projects:** 350
|
||||||
|
|
||||||
|
**Total Language Mentions:** 356 *(some projects support multiple languages)*
|
||||||
|
|
||||||
|
**Unique Languages:** 35
|
||||||
|
|
||||||
|
**Last Updated:** 2026-01-16
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Language Distribution
|
||||||
|
|
||||||
|
| Language | Count | Percentage | Distribution |
|
||||||
|
|----------|-------|------------|--------------|
|
||||||
|
| JavaScript | 69 | 19.7% | ██████████████████████████████ |
|
||||||
|
| Python | 68 | 19.4% | █████████████████████████████░ |
|
||||||
|
| C | 49 | 14.0% | █████████████████████░░░░░░░░░ |
|
||||||
|
| C++ | 33 | 9.4% | ██████████████░░░░░░░░░░░░░░░░ |
|
||||||
|
| Go | 23 | 6.6% | ██████████░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| Rust | 17 | 4.9% | ███████░░░░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| C# | 16 | 4.6% | ██████░░░░░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| Ruby | 13 | 3.7% | █████░░░░░░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| Java | 9 | 2.6% | ███░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| Nim | 9 | 2.6% | ███░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| Haskell | 6 | 1.7% | ██░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| PHP | 5 | 1.4% | ██░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| TypeScript | 5 | 1.4% | ██░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| (any) | 4 | 1.1% | █░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
|
||||||
|
| Other* | 30 | 8.6% | █████████████░░░░░░░░░░░░░░░░░ |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Top 10 Languages
|
||||||
|
|
||||||
|
1. **JavaScript**: 69 projects (19.7%)
|
||||||
|
2. **Python**: 68 projects (19.4%)
|
||||||
|
3. **C**: 49 projects (14.0%)
|
||||||
|
4. **C++**: 33 projects (9.4%)
|
||||||
|
5. **Go**: 23 projects (6.6%)
|
||||||
|
6. **Rust**: 17 projects (4.9%)
|
||||||
|
7. **C#**: 16 projects (4.6%)
|
||||||
|
8. **Ruby**: 13 projects (3.7%)
|
||||||
|
9. **Java**: 9 projects (2.6%)
|
||||||
|
10. **Nim**: 9 projects (2.6%)
|
||||||
|
## Footnotes
|
||||||
|
|
||||||
|
**\* Other languages** (each < 1% of total projects):
|
||||||
|
Assembly (3), Clojure (2), Crystal (2), F# (2), Kotlin (2), Lua (2), OCaml (2), Scala (2), ATS (1), Alloy (1), CSS (1), Common Lisp (1), Elixir (1), Pascal (1), Perl (1), Pseudocode (1), R (1), Racket (1), Shell (1), Swift (1), Zig (1)
|
||||||
Loading…
Reference in a new issue