Merge ea0ce0872b into 264b4547b3

Merge pull request #1812 from roiamiel1/add-build-deep-learning-from-scratch
add Build Your Own PyTorch
2026-07-02 16:59:25 +00:00 · 2026-06-26 10:11:51 +08:00 · 2026-06-25 20:11:09 +05:30 · 2026-06-23 22:14:39 +03:00 · 2026-06-21 21:00:09 +03:00 · 2026-01-16 04:25:51 +00:00
4 changed files with 244 additions and 0 deletions
--- a/.github/scripts/generate_stats.py
+++ b/.github/scripts/generate_stats.py
@ -0,0 +1,151 @@
 #!/usr/bin/env python3
 """
 Generate language statistics from README.md and create a visualization in STATS-main.md
 """
 import re
 from collections import Counter
 from typing import Dict, List, Tuple
 def extract_languages_from_readme(filename: str = 'README.md') -> List[str]:
    """Extract all programming languages mentioned in project entries."""
    languages = []
    with open(filename, 'r', encoding='utf-8') as f:
        content = f.read()
    # Pattern to match: * [**Language**: _Title_](url)
    # Handles multiple languages separated by / or ,
    pattern = r'^\* \[\*\*([^*]+)\*\*:'
    matches = re.findall(pattern, content, re.MULTILINE)
    for match in matches:
        # Split by / or , and clean up whitespace
        langs = re.split(r'\s*/\s*|\s*,\s*', match)
        for lang in langs:
            lang = lang.strip()
            if lang:
                languages.append(lang)
    return languages
 def normalize_language(lang: str) -> str:
    """Normalize language names for consistency."""
    # Handle common variations
    normalizations = {
        'Node.js': 'JavaScript'
    }
    return normalizations.get(lang, lang)
 def count_languages(languages: List[str]) -> Dict[str, int]:
    """Count occurrences of each language."""
    normalized = [normalize_language(lang) for lang in languages]
    return dict(Counter(normalized))
 def create_horizontal_bar(count: int, max_count: int, bar_width: int = 50) -> str:
    """Create a horizontal bar for visualization."""
    filled = int((count / max_count) * bar_width)
    bar = '█' * filled + '░' * (bar_width - filled)
    return bar
 def generate_stats_markdown(language_counts: Dict[str, int], num_projects: int) -> str:
    """Generate the markdown content for STATS-main.md."""
    # Sort by count (descending) then by name
    sorted_langs = sorted(language_counts.items(), key=lambda x: (-x[1], x[0]))
    total_language_mentions = sum(language_counts.values())
    max_count = max(language_counts.values())
    # Separate languages >= 1% and < 1%
    threshold = num_projects * 0.01  # 1% threshold
    main_langs = []
    other_langs = []
    for lang, count in sorted_langs:
        if count >= threshold:
            main_langs.append((lang, count))
        else:
            other_langs.append((lang, count))
    # Calculate "Other" total
    other_count = sum(count for _, count in other_langs)
    # Build markdown content
    lines = [
        "# Build Your Own X - Language Statistics\n",
        f"**Total Projects:** {num_projects}\n",
        f"**Total Language Mentions:** {total_language_mentions} *(some projects support multiple languages)*\n",
        f"**Unique Languages:** {len(language_counts)}\n",
        f"**Last Updated:** {get_current_date()}\n",
        "---\n",
        "## Language Distribution\n",
        "| Language | Count | Percentage | Distribution |",
        "|----------|-------|------------|--------------|"
    ]
    for lang, count in main_langs:
        percentage = (count / num_projects) * 100
        bar = create_horizontal_bar(count, max_count, 30)
        lines.append(f"| {lang} | {count} | {percentage:.1f}% | {bar} |")
    # Add "Other" category if there are languages < 1%
    if other_langs:
        percentage = (other_count / num_projects) * 100
        bar = create_horizontal_bar(other_count, max_count, 30)
        lines.append(f"| Other* | {other_count} | {percentage:.1f}% | {bar} |")
    lines.append("\n---\n")
    lines.append("## Top 10 Languages\n")
    for i, (lang, count) in enumerate(sorted_langs[:10], 1):
        percentage = (count / num_projects) * 100
        lines.append(f"{i}. **{lang}**: {count} projects ({percentage:.1f}%)")
    # Add footnote for "Other" languages
    if other_langs:
        lines.append("## Footnotes\n")
        lines.append(f"**\\* Other languages** (each < 1% of total projects): ")
        other_names = [f"{lang} ({count})" for lang, count in sorted(other_langs, key=lambda x: (-x[1], x[0]))]
        lines.append(", ".join(other_names))
    return '\n'.join(lines) + '\n'
 def get_current_date() -> str:
    """Get current date in YYYY-MM-DD format."""
    from datetime import datetime
    return datetime.now().strftime('%Y-%m-%d')
 def count_projects(filename: str = 'README.md') -> int:
    """Count the actual number of project entries."""
    with open(filename, 'r', encoding='utf-8') as f:
        content = f.read()
    pattern = r'^\* \[\*\*([^*]+)\*\*:'
    matches = re.findall(pattern, content, re.MULTILINE)
    return len(matches)
 def main():
    print("Analyzing README.md...")
    num_projects = count_projects()
    print(f"Found {num_projects} project entries")
    languages = extract_languages_from_readme()
    print(f"Extracted {len(languages)} language mentions (some projects list multiple languages)")
    language_counts = count_languages(languages)
    print(f"Detected {len(language_counts)} unique languages")
    print("\nGenerating STATS-main.md...")
    stats_content = generate_stats_markdown(language_counts, num_projects)
    with open('STATS-main.md', 'w', encoding='utf-8') as f:
        f.write(stats_content)
    print("✓ STATS-main.md generated successfully!")
    print(f"\nTop 5 languages:")
    sorted_langs = sorted(language_counts.items(), key=lambda x: -x[1])
    for lang, count in sorted_langs[:5]:
        print(f"  - {lang}: {count}")
 if __name__ == '__main__':
    main()
--- a/.github/workflows/update-stats.yml
+++ b/.github/workflows/update-stats.yml
@ -0,0 +1,42 @@
 name: Update Language Statistics
 on:
  push:
    branches:
      - main
      - master
    paths:
      - 'README.md'
  workflow_dispatch:
 jobs:
  update-stats:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.x'
      - name: Generate language statistics
        run: python3 .github/scripts/generate_stats.py
      - name: Check for changes
        id: git-check
        run: |
          git diff --exit-code STATS-main.md || echo "changed=true" >> $GITHUB_OUTPUT
      - name: Commit and push if changed
        if: steps.git-check.outputs.changed == 'true'
        run: |
          git config --local user.email "github-actions[bot]@users.noreply.github.com"
          git config --local user.name "github-actions[bot]"
          git add STATS-main.md
          git commit -m "Auto-update language statistics [skip ci]"
          git push
--- a/README.md
+++ b/README.md
@ -264,6 +264,7 @@ It's a great way to learn.
 * [**JavaScript / Java**: _Neural Networks - The Nature of Code_](https://www.youtube.com/playlist?list=PLRqwX-V7Uu6aCibgK1PTWWu9by6XFdCfh) [video]
 * [**JavaScript**: _Neural networks from scratch for JavaScript linguists (Part1 — The Perceptron)_](https://hackernoon.com/neural-networks-from-scratch-for-javascript-linguists-part1-the-perceptron-632a4d1fbad2)
 * [**Python**: _A Neural Network in 11 lines of Python_](https://iamtrask.github.io/2015/07/12/basic-python-network/)
 * [**Python**: _Build Deep Learning From Scratch (reimplement PyTorch internals across 34 stages)_](https://github.com/roiamiel1/Build-Deep-Learning-From-Scratch)
 * [**Python**: _Implement a Neural Network from Scratch_](https://victorzhou.com/blog/intro-to-neural-networks/)
 * [**Python**: _Optical Character Recognition (OCR)_](http://aosabook.org/en/500L/optical-character-recognition-ocr.html)
 * [**Python**: _Traffic signs classification with a convolutional network_](https://navoshta.com/traffic-signs-classification/)
--- a/STATS-main.md
+++ b/STATS-main.md
@ -0,0 +1,50 @@
 # Build Your Own X - Language Statistics
 **Total Projects:** 350
 **Total Language Mentions:** 356 *(some projects support multiple languages)*
 **Unique Languages:** 35
 **Last Updated:** 2026-01-16
 ---
 ## Language Distribution
 | Language | Count | Percentage | Distribution |
 |----------|-------|------------|--------------|
 | JavaScript | 69 | 19.7% | ██████████████████████████████ |
 | Python | 68 | 19.4% | █████████████████████████████░ |
 | C | 49 | 14.0% | █████████████████████░░░░░░░░░ |
 | C++ | 33 | 9.4% | ██████████████░░░░░░░░░░░░░░░░ |
 | Go | 23 | 6.6% | ██████████░░░░░░░░░░░░░░░░░░░░ |
 | Rust | 17 | 4.9% | ███████░░░░░░░░░░░░░░░░░░░░░░░ |
 | C# | 16 | 4.6% | ██████░░░░░░░░░░░░░░░░░░░░░░░░ |
 | Ruby | 13 | 3.7% | █████░░░░░░░░░░░░░░░░░░░░░░░░░ |
 | Java | 9 | 2.6% | ███░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
 | Nim | 9 | 2.6% | ███░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
 | Haskell | 6 | 1.7% | ██░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
 | PHP | 5 | 1.4% | ██░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
 | TypeScript | 5 | 1.4% | ██░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
 | (any) | 4 | 1.1% | █░░░░░░░░░░░░░░░░░░░░░░░░░░░░░ |
 | Other* | 30 | 8.6% | █████████████░░░░░░░░░░░░░░░░░ |
 ---
 ## Top 10 Languages
 1. **JavaScript**: 69 projects (19.7%)
 2. **Python**: 68 projects (19.4%)
 3. **C**: 49 projects (14.0%)
 4. **C++**: 33 projects (9.4%)
 5. **Go**: 23 projects (6.6%)
 6. **Rust**: 17 projects (4.9%)
 7. **C#**: 16 projects (4.6%)
 8. **Ruby**: 13 projects (3.7%)
 9. **Java**: 9 projects (2.6%)
 10. **Nim**: 9 projects (2.6%)
 ## Footnotes
 **\* Other languages** (each < 1% of total projects): 
 Assembly (3), Clojure (2), Crystal (2), F# (2), Kotlin (2), Lua (2), OCaml (2), Scala (2), ATS (1), Alloy (1), CSS (1), Common Lisp (1), Elixir (1), Pascal (1), Perl (1), Pseudocode (1), R (1), Racket (1), Shell (1), Swift (1), Zig (1)
Author	SHA1	Message	Date
Cameron Crouch	bf76da1988	Merge `ea0ce0872b` into `264b4547b3`	2026-06-26 10:11:51 +08:00
Paul Kuruvilla	264b4547b3	Merge pull request #1812 from roiamiel1/add-build-deep-learning-from-scratch add Build Your Own PyTorch	2026-06-25 20:11:09 +05:30
RoiAmiel	750f70669b	Update link for 'Build Deep Learning From Scratch'	2026-06-23 22:14:39 +03:00
Roi Amiel	1b3cb7479d	add build add-build-deep-learning-from-scratch	2026-06-21 21:00:09 +03:00
Cameron Crouch	ea0ce0872b	Add language statistics reporting feature - Add generate_stats.py script to analyze README.md and extract language distribution - Generate STATS-main.md with visualized language statistics - Add GitHub Actions workflow to auto-update stats when README.md changes	2026-01-16 04:25:51 +00:00