Skip to main content

Top 50 in Github


Write a program to fetch the latest top 50 open-source projects from the GitHub website. Save the retrieved data to a JSON file.

Python implementation
import requests
from bs4 import BeautifulSoup
import json

def get_top_projects():
url = ''
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
project_list = []

projects ='article.Box-row')
for project in projects[:50]:
project_data = {}

# Get project name
name_elem = project.select_one('.h3')
project_data['name'] = name_elem.text.replace(' ','').replace('\n','').strip()

# Get project description
desc_elem = project.select_one('p')
if desc_elem:
project_data['description'] = desc_elem.text.strip()
project_data['description'] = None

# Get project URL
link_elem = project.select_one('.h3 a')
project_data['url'] = '' + link_elem['href']


return project_list

def save_to_json(data, filename):
with open(filename, 'w') as f:
json.dump(data, f, indent=4)

if __name__ == '__main__':
projects = get_top_projects()
save_to_json(projects, 'top_projects.json')


Install dependencies and run
pip install bs4 requests
"name": "pytorch-labs/gpt-fast",
"description": "Simple and efficient pytorch-native transformer text generation in <1000 LOC of python.",
"url": ""
"name": "dangeng/visual_anagrams",
"description": "Code for the paper \"Visual Anagrams: Generating Multi-View Optical Illusions with Diffusion Models\"",
"url": ""