Merge ea0ce0872b into 294aef8fde

Add language statistics reporting feature
- Add generate_stats.py script to analyze README.md and extract language distribution - Generate STATS-main.md with visualized language statistics - Add GitHub Actions workflow to auto-update stats when README.md changes
2026-07-02 16:59:25 +00:00 · 2026-03-06 03:03:11 -04:00 · 2026-01-16 04:25:51 +00:00
3 changed files with 243 additions and 0 deletions
--- a/.github/scripts/generate_stats.py
+++ b/.github/scripts/generate_stats.py
@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+"""
+Generate language statistics from README.md and create a visualization in STATS-main.md
+"""
+
+import re
+from collections import Counter
+from typing import Dict, List, Tuple
+
+def extract_languages_from_readme(filename: str = 'README.md') -> List[str]:
+    """Extract all programming languages mentioned in project entries."""
+    languages = []
+    
+    with open(filename, 'r', encoding='utf-8') as f:
+        content = f.read()
+    
+    # Pattern to match: * [**Language**: _Title_](url)
+    # Handles multiple languages separated by / or ,
+    pattern = r'^\* \[\*\*([^*]+)\*\*:'
+
+    matches = re.findall(pattern, content, re.MULTILINE)
+    
+    for match in matches:
+        # Split by / or , and clean up whitespace
+        langs = re.split(r'\s*/\s*|\s*,\s*', match)
+        for lang in langs:
+            lang = lang.strip()
+            if lang:
+                languages.append(lang)
+    
+    return languages
+
+def normalize_language(lang: str) -> str:
+    """Normalize language names for consistency."""
+    # Handle common variations
+    normalizations = {
+        'Node.js': 'JavaScript'
+    }
+    return normalizations.get(lang, lang)
+
+def count_languages(languages: List[str]) -> Dict[str, int]:
+    """Count occurrences of each language."""
+    normalized = [normalize_language(lang) for lang in languages]
+    return dict(Counter(normalized))
+
+def create_horizontal_bar(count: int, max_count: int, bar_width: int = 50) -> str:
+    """Create a horizontal bar for visualization."""
+    filled = int((count / max_count) * bar_width)
+    bar = '█' * filled + '░' * (bar_width - filled)
+    return bar
+
+def generate_stats_markdown(language_counts: Dict[str, int], num_projects: int) -> str:
+    """Generate the markdown content for STATS-main.md."""
+    # Sort by count (descending) then by name
+    sorted_langs = sorted(language_counts.items(), key=lambda x: (-x[1], x[0]))
+    
+    total_language_mentions = sum(language_counts.values())
+    max_count = max(language_counts.values())
+    
+    # Separate languages >= 1% and < 1%
+    threshold = num_projects * 0.01  # 1% threshold
+    main_langs = []
+    other_langs = []
+    
+    for lang, count in sorted_langs:
+        if count >= threshold:
+            main_langs.append((lang, count))
+        else:
+            other_langs.append((lang, count))
+    
+    # Calculate "Other" total
+    other_count = sum(count for _, count in other_langs)
+    
+    # Build markdown content
+    lines = [
+        "# Build Your Own X - Language Statistics\n",
+        f"**Total Projects:** {num_projects}\n",
+        f"**Total Language Mentions:** {total_language_mentions} *(some projects support multiple languages)*\n",
+        f"**Unique Languages:** {len(language_counts)}\n",
+        f"**Last Updated:** {get_current_date()}\n",
+        "---\n",
+        "## Language Distribution\n",
+        "| Language | Count | Percentage | Distribution |",
+        "|----------|-------|------------|--------------|"
+    ]
+    
+    for lang, count in main_langs:
+        percentage = (count / num_projects) * 100
+        bar = create_horizontal_bar(count, max_count, 30)
+        lines.append(f"| {lang} | {count} | {percentage:.1f}% | {bar} |")
+    
+    # Add "Other" category if there are languages < 1%
+    if other_langs:
+        percentage = (other_count / num_projects) * 100
+        bar = create_horizontal_bar(other_count, max_count, 30)
+        lines.append(f"| Other* | {other_count} | {percentage:.1f}% | {bar} |")
+    
+    lines.append("\n---\n")
+    lines.append("## Top 10 Languages\n")
+    
+    for i, (lang, count) in enumerate(sorted_langs[:10], 1):
+        percentage = (count / num_projects) * 100
+        lines.append(f"{i}. **{lang}**: {count} projects ({percentage:.1f}%)")
+    
+    # Add footnote for "Other" languages
+    if other_langs:
+        lines.append("## Footnotes\n")
+        lines.append(f"**\\* Other languages** (each < 1% of total projects): ")
+        other_names = [f"{lang} ({count})" for lang, count in sorted(other_langs, key=lambda x: (-x[1], x[0]))]
+        lines.append(", ".join(other_names))
+    
+    return '\n'.join(lines) + '\n'
+
+def get_current_date() -> str:
+    """Get current date in YYYY-MM-DD format."""
+    from datetime import datetime
+    return datetime.now().strftime('%Y-%m-%d')
+
+def count_projects(filename: str = 'README.md') -> int:
+    """Count the actual number of project entries."""
+    with open(filename, 'r', encoding='utf-8') as f:
+        content = f.read()
+    pattern = r'^\* \[\*\*([^*]+)\*\*:'
+    matches = re.findall(pattern, content, re.MULTILINE)
+    return len(matches)
+
+def main():
+    print("Analyzing README.md...")
+    num_projects = count_projects()
+    print(f"Found {num_projects} project entries")
+    
+    languages = extract_languages_from_readme()
+    print(f"Extracted {len(languages)} language mentions (some projects list multiple languages)")
+    
+    language_counts = count_languages(languages)
+    print(f"Detected {len(language_counts)} unique languages")
+    
+    print("\nGenerating STATS-main.md...")
+    stats_content = generate_stats_markdown(language_counts, num_projects)
+    
+    with open('STATS-main.md', 'w', encoding='utf-8') as f:
+        f.write(stats_content)
+    
+    print("✓ STATS-main.md generated successfully!")
+    print(f"\nTop 5 languages:")
+    sorted_langs = sorted(language_counts.items(), key=lambda x: -x[1])
+    for lang, count in sorted_langs[:5]:
+        print(f"  - {lang}: {count}")
+
+if __name__ == '__main__':
+    main()
--- a/.github/workflows/update-stats.yml
+++ b/.github/workflows/update-stats.yml
@ -0,0 +1,42 @@
+name: Update Language Statistics
+
+on:
+  push:
+    branches:
+      - main
+      - master
+    paths:
+      - 'README.md'
+  workflow_dispatch:
+
+jobs:
+  update-stats:
+    runs-on: ubuntu-latest
+    
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+      
+      - name: Generate language statistics
+        run: python3 .github/scripts/generate_stats.py
+      
+      - name: Check for changes
+        id: git-check
+        run: |
+          git diff --exit-code STATS-main.md || echo "changed=true" >> $GITHUB_OUTPUT
+      
+      - name: Commit and push if changed
+        if: steps.git-check.outputs.changed == 'true'
+        run: |
+          git config --local user.email "github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "github-actions[bot]"
+          git add STATS-main.md
+          git commit -m "Auto-update language statistics [skip ci]"
+          git push
--- a/STATS-main.md
+++ b/STATS-main.md
@ -0,0 +1,50 @@
+# Build Your Own X - Language Statistics
+
+**Total Projects:** 350
+
+**Total Language Mentions:** 356 *(some projects support multiple languages)*
+
+**Unique Languages:** 35
+
+**Last Updated:** 2026-01-16
+
+---
+
+## Language Distribution
+
+| Language | Count | Percentage | Distribution |
+|----------|-------|------------|--------------|
+| JavaScript | 69 | 19.7% | ██████████████████████████████ |
+| Python | 68 | 19.4% | █████████████████████████████░ |
+| C | 49 | 14.0% | █████████████████████░░░░░░░░░ |
+| C++ | 33 | 9.4% | ██████████████░░░░░░░░░░░░░░░░ |
+| Go | 23 | 6.6% | ██████████░░░░░░░░░░░░░░░░░░░░ |
+| Rust | 17 | 4.9% | ███████░░░░░░░░░░░░░░░░░░░░░░░ |
+| C# | 16 | 4.6% | ██████░░░░░░░░░░░░░░░░░░░░░░░░ |
+| Ruby | 13 | 3.7% | █████░░░░░░░░░░░░░░░░░░░░░░░░░ |
+| Java | 9 | 2.6% | ███░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
+| Nim | 9 | 2.6% | ███░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
+| Haskell | 6 | 1.7% | ██░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
+| PHP | 5 | 1.4% | ██░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
+| TypeScript | 5 | 1.4% | ██░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
+| (any) | 4 | 1.1% | █░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
+| Other* | 30 | 8.6% | █████████████░░░░░░░░░░░░░░░░░ |
+
+---
+
+## Top 10 Languages
+
+1. **JavaScript**: 69 projects (19.7%)
+2. **Python**: 68 projects (19.4%)
+3. **C**: 49 projects (14.0%)
+4. **C++**: 33 projects (9.4%)
+5. **Go**: 23 projects (6.6%)
+6. **Rust**: 17 projects (4.9%)
+7. **C#**: 16 projects (4.6%)
+8. **Ruby**: 13 projects (3.7%)
+9. **Java**: 9 projects (2.6%)
+10. **Nim**: 9 projects (2.6%)
+## Footnotes
+
+**\* Other languages** (each < 1% of total projects): 
+Assembly (3), Clojure (2), Crystal (2), F# (2), Kotlin (2), Lua (2), OCaml (2), Scala (2), ATS (1), Alloy (1), CSS (1), Common Lisp (1), Elixir (1), Pascal (1), Perl (1), Pseudocode (1), R (1), Racket (1), Shell (1), Swift (1), Zig (1)