Python Automation: 8 Scripts That Save Hours of Manual Work
Eight practical Python automation scripts with complete code. File organizer, bulk renamer, PDF merger, email sender, Excel reports, and more.
The best reason to learn Python isn't machine learning or web development. It's automation. The boring stuff you do every day -- organizing files, renaming hundreds of photos, merging PDFs, generating reports -- Python can do all of it in seconds.
Each script below is complete and ready to use. Copy them, modify them, combine them. That's the whole point.
1. File Organizer
Your Downloads folder is a disaster. This script sorts files into subfolders by type:
import os
import shutil
from pathlib import Path
# Map extensions to folder names
FILE_TYPES = {
'Images': ['.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.bmp', '.ico'],
'Documents': ['.pdf', '.doc', '.docx', '.txt', '.rtf', '.odt', '.xls', '.xlsx', '.csv'],
'Videos': ['.mp4', '.avi', '.mkv', '.mov', '.wmv', '.flv', '.webm'],
'Audio': ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.wma'],
'Archives': ['.zip', '.rar', '.7z', '.tar', '.gz', '.bz2'],
'Code': ['.py', '.js', '.ts', '.html', '.css', '.json', '.xml', '.yaml', '.yml'],
'Installers': ['.exe', '.msi', '.dmg', '.deb', '.rpm'],
}
def get_category(extension):
for category, extensions in FILE_TYPES.items():
if extension.lower() in extensions:
return category
return 'Other'
def organize_folder(folder_path):
folder = Path(folder_path)
if not folder.exists():
print(f"Folder not found: {folder_path}")
return
moved = 0
for item in folder.iterdir():
if item.is_file():
category = get_category(item.suffix)
dest_folder = folder / category
dest_folder.mkdir(exist_ok=True)
dest_file = dest_folder / item.name
# Handle duplicates
if dest_file.exists():
stem = item.stem
suffix = item.suffix
counter = 1
while dest_file.exists():
dest_file = dest_folder / f"{stem}_{counter}{suffix}"
counter += 1
shutil.move(str(item), str(dest_file))
moved += 1
print(f" {item.name} -> {category}/")
print(f"\nOrganized {moved} files.")
# Usage
organize_folder(os.path.expanduser("~/Downloads"))
Run it once a week. Or better yet, schedule it (see script #8).
2. Bulk File Renamer
Rename hundreds of files with a consistent pattern. Useful for photos, screenshots, or any batch of files:
import os
from pathlib import Path
from datetime import datetime
def bulk_rename(folder_path, prefix="file", start_number=1, use_date=True,
extension_filter=None, dry_run=True):
folder = Path(folder_path)
files = sorted(folder.iterdir())
if extension_filter:
files = [f for f in files if f.suffix.lower() in extension_filter]
else:
files = [f for f in files if f.is_file()]
if not files:
print("No files found.")
return
print(f"{'DRY RUN - ' if dry_run else ''}Renaming {len(files)} files:\n")
for i, file in enumerate(files, start=start_number):
suffix = file.suffix
if use_date:
# Use file modification time
mod_time = datetime.fromtimestamp(file.stat().st_mtime)
date_str = mod_time.strftime("%Y%m%d")
new_name = f"{prefix}_{date_str}_{i:04d}{suffix}"
else:
new_name = f"{prefix}_{i:04d}{suffix}"
new_path = file.parent / new_name
print(f" {file.name} -> {new_name}")
if not dry_run:
file.rename(new_path)
if dry_run:
print("\nThis was a dry run. Add dry_run=False to actually rename.")
# Usage -- always dry run first!
bulk_rename(
"~/Photos/vacation",
prefix="hawaii",
extension_filter=['.jpg', '.jpeg', '.png'],
dry_run=True
)
The dry_run=True default is intentional. Always preview before renaming. There's no undo for file renames.
3. PDF Merger
Merge multiple PDFs into one. Useful for combining reports, receipts, or scanned documents:
from pypdf import PdfReader, PdfWriter
from pathlib import Path
def merge_pdfs(pdf_paths, output_path="merged.pdf"):
writer = PdfWriter()
for pdf_path in pdf_paths:
reader = PdfReader(pdf_path)
for page in reader.pages:
writer.add_page(page)
print(f" Added: {Path(pdf_path).name} ({len(reader.pages)} pages)")
with open(output_path, 'wb') as output_file:
writer.write(output_file)
total_pages = len(writer.pages)
print(f"\nMerged into {output_path} ({total_pages} pages total)")
def merge_all_in_folder(folder_path, output_path="merged.pdf"):
folder = Path(folder_path)
pdfs = sorted(folder.glob("*.pdf"))
if not pdfs:
print("No PDF files found.")
return
print(f"Found {len(pdfs)} PDFs:\n")
merge_pdfs([str(p) for p in pdfs], output_path)
# Usage
# Specific files
merge_pdfs([
"report_q1.pdf",
"report_q2.pdf",
"report_q3.pdf",
"report_q4.pdf"
], output_path="annual_report.pdf")
# All PDFs in a folder
merge_all_in_folder("~/Documents/receipts", "all_receipts.pdf")
Install the dependency with pip install pypdf.
4. Email Sender
Send emails programmatically. Perfect for notifications, reports, or alerts:
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
from pathlib import Path
def send_email(to, subject, body, attachments=None, html=False):
# Use environment variables for credentials
import os
smtp_server = os.environ.get("SMTP_SERVER", "smtp.gmail.com")
smtp_port = int(os.environ.get("SMTP_PORT", "587"))
sender_email = os.environ["SENDER_EMAIL"]
sender_password = os.environ["SENDER_PASSWORD"]
msg = MIMEMultipart()
msg["From"] = sender_email
msg["To"] = to if isinstance(to, str) else ", ".join(to)
msg["Subject"] = subject
# Body
if html:
msg.attach(MIMEText(body, "html"))
else:
msg.attach(MIMEText(body, "plain"))
# Attachments
if attachments:
for filepath in attachments:
path = Path(filepath)
with open(path, "rb") as f:
part = MIMEBase("application", "octet-stream")
part.set_payload(f.read())
encoders.encode_base64(part)
part.add_header(
"Content-Disposition",
f"attachment; filename={path.name}"
)
msg.attach(part)
# Send
with smtplib.SMTP(smtp_server, smtp_port) as server:
server.starttls()
server.login(sender_email, sender_password)
recipients = [to] if isinstance(to, str) else to
server.sendmail(sender_email, recipients, msg.as_string())
print(f"Email sent to {msg['To']}")
# Usage
send_email(
to="colleague@company.com",
subject="Monthly Report - March 2026",
body="Hi,\n\nPlease find the monthly report attached.\n\nBest regards",
attachments=["report.pdf", "data.xlsx"]
)
# HTML email
send_email(
to=["team@company.com", "manager@company.com"],
subject="Build Status: Passed",
body="<h2>Build #142 Passed</h2><p>All 847 tests passed in 3m 22s.</p>",
html=True
)
For Gmail, use an App Password (not your regular password). Enable 2FA first, then generate an App Password from your Google Account settings.
5. Excel Report Generator
Generate formatted Excel reports from data. Much better than CSV when you need charts, formatting, or multiple sheets:
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.chart import BarChart, Reference
from datetime import datetime
def generate_sales_report(data, output_path="sales_report.xlsx"):
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Sales Report"
# Styles
header_font = Font(bold=True, color="FFFFFF", size=12)
header_fill = PatternFill(start_color="2F5496", end_color="2F5496", fill_type="solid")
border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin')
)
# Title
ws.merge_cells('A1:E1')
ws['A1'] = f"Sales Report - {datetime.now().strftime('%B %Y')}"
ws['A1'].font = Font(bold=True, size=16)
ws['A1'].alignment = Alignment(horizontal='center')
# Headers
headers = ['Product', 'Units Sold', 'Revenue', 'Cost', 'Profit']
for col, header in enumerate(headers, 1):
cell = ws.cell(row=3, column=col, value=header)
cell.font = header_font
cell.fill = header_fill
cell.alignment = Alignment(horizontal='center')
cell.border = border
# Data
for row_idx, row_data in enumerate(data, 4):
for col_idx, value in enumerate(row_data, 1):
cell = ws.cell(row=row_idx, column=col_idx, value=value)
cell.border = border
if col_idx >= 3: # Format currency columns
cell.number_format = '$#,##0.00'
last_row = len(data) + 3
# Totals row
total_row = last_row + 1
ws.cell(row=total_row, column=1, value="TOTAL").font = Font(bold=True)
for col in range(2, 6):
cell = ws.cell(row=total_row, column=col)
col_letter = openpyxl.utils.get_column_letter(col)
cell.value = f"=SUM({col_letter}4:{col_letter}{last_row})"
cell.font = Font(bold=True)
cell.border = border
if col >= 3:
cell.number_format = '$#,##0.00'
# Column widths
ws.column_dimensions['A'].width = 20
for col in ['B', 'C', 'D', 'E']:
ws.column_dimensions[col].width = 15
# Chart
chart = BarChart()
chart.title = "Revenue by Product"
chart.y_axis.title = "Revenue ($)"
chart.x_axis.title = "Product"
chart.style = 10
categories = Reference(ws, min_col=1, min_row=4, max_row=last_row)
values = Reference(ws, min_col=3, min_row=3, max_row=last_row)
chart.add_data(values, titles_from_data=True)
chart.set_categories(categories)
chart.shape = 4
ws.add_chart(chart, f"A{total_row + 3}")
wb.save(output_path)
print(f"Report saved to {output_path}")
# Usage
sales_data = [
["Widget A", 1250, 31250.00, 18750.00, 12500.00],
["Widget B", 890, 44500.00, 22250.00, 22250.00],
["Widget C", 2100, 21000.00, 14700.00, 6300.00],
["Widget D", 450, 67500.00, 33750.00, 33750.00],
["Widget E", 1800, 27000.00, 16200.00, 10800.00],
]
generate_sales_report(sales_data)
Install with pip install openpyxl.
6. Web Scraping Monitor
Monitor a web page for changes and get notified. Useful for price tracking, stock monitoring, or tracking updates:
import requests
from bs4 import BeautifulSoup
import hashlib
import json
import time
from pathlib import Path
from datetime import datetime
CACHE_FILE = Path("monitor_cache.json")
def load_cache():
if CACHE_FILE.exists():
return json.loads(CACHE_FILE.read_text())
return {}
def save_cache(cache):
CACHE_FILE.write_text(json.dumps(cache, indent=2))
def get_page_content(url, selector=None):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
if selector:
elements = soup.select(selector)
content = "\n".join(el.get_text(strip=True) for el in elements)
else:
content = soup.get_text(strip=True)
return content
def get_content_hash(content):
return hashlib.sha256(content.encode()).hexdigest()
def monitor(targets, interval_seconds=3600):
print(f"Monitoring {len(targets)} targets...")
print(f"Checking every {interval_seconds} seconds\n")
cache = load_cache()
while True:
for target in targets:
url = target["url"]
name = target["name"]
selector = target.get("selector")
try:
content = get_page_content(url, selector)
current_hash = get_content_hash(content)
previous_hash = cache.get(url)
if previous_hash is None:
print(f"[{datetime.now():%H:%M:%S}] {name}: First check recorded")
cache[url] = current_hash
elif current_hash != previous_hash:
print(f"[{datetime.now():%H:%M:%S}] {name}: CHANGED!")
print(f" Content preview: {content[:200]}...")
cache[url] = current_hash
# You could call send_email() here to get notified
else:
print(f"[{datetime.now():%H:%M:%S}] {name}: No changes")
except Exception as e:
print(f"[{datetime.now():%H:%M:%S}] {name}: Error - {e}")
save_cache(cache)
time.sleep(interval_seconds)
# Usage
targets = [
{
"name": "Product Price",
"url": "https://example.com/product",
"selector": ".price"
},
{
"name": "Blog Updates",
"url": "https://example.com/blog",
"selector": "article h2"
}
]
monitor(targets, interval_seconds=1800) # Check every 30 minutes
Install with pip install requests beautifulsoup4.
7. Screenshot Automation
Take screenshots of web pages automatically. Great for visual monitoring, documentation, or archiving:
from playwright.sync_api import sync_playwright
from pathlib import Path
from datetime import datetime
def take_screenshots(urls, output_dir="screenshots", full_page=True,
viewport_width=1280, viewport_height=720):
output = Path(output_dir)
output.mkdir(exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
with sync_playwright() as p:
browser = p.chromium.launch()
context = browser.new_context(
viewport={"width": viewport_width, "height": viewport_height}
)
page = context.new_page()
for i, url in enumerate(urls):
try:
page.goto(url, wait_until="networkidle", timeout=30000)
# Create a clean filename from the URL
clean_name = url.replace("https://", "").replace("http://", "")
clean_name = clean_name.replace("/", "_").replace(".", "_")
filename = f"{timestamp}_{clean_name}.png"
filepath = output / filename
page.screenshot(path=str(filepath), full_page=full_page)
print(f" Captured: {url} -> {filename}")
except Exception as e:
print(f" Failed: {url} - {e}")
browser.close()
print(f"\nScreenshots saved to {output_dir}/")
# Usage
urls = [
"https://news.ycombinator.com",
"https://github.com/trending",
"https://example.com"
]
take_screenshots(urls)
# Mobile screenshots
take_screenshots(
urls,
output_dir="screenshots/mobile",
viewport_width=390,
viewport_height=844,
full_page=False
)
Install with pip install playwright && playwright install chromium.
8. Scheduled Backup Script
Back up important directories to a timestamped archive. Combine with your OS task scheduler for automated backups:
import shutil
import os
from pathlib import Path
from datetime import datetime
import logging
# Setup logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('backup.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def backup(source_dirs, backup_dir, max_backups=10):
backup_base = Path(backup_dir)
backup_base.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
for source in source_dirs:
source_path = Path(source).expanduser()
if not source_path.exists():
logger.warning(f"Source not found: {source}")
continue
dir_name = source_path.name
archive_name = f"{dir_name}_{timestamp}"
archive_path = backup_base / archive_name
try:
logger.info(f"Backing up: {source_path}")
# Create compressed archive
shutil.make_archive(
str(archive_path),
'gztar', # .tar.gz format
root_dir=str(source_path.parent),
base_dir=source_path.name
)
archive_file = Path(f"{archive_path}.tar.gz")
size_mb = archive_file.stat().st_size / (1024 * 1024)
logger.info(f"Created: {archive_file.name} ({size_mb:.1f} MB)")
except Exception as e:
logger.error(f"Failed to backup {source}: {e}")
# Clean old backups
cleanup_old_backups(backup_base, max_backups)
def cleanup_old_backups(backup_dir, max_backups):
archives = sorted(
backup_dir.glob("*.tar.gz"),
key=lambda f: f.stat().st_mtime,
reverse=True
)
if len(archives) > max_backups:
for old_archive in archives[max_backups:]:
old_archive.unlink()
logger.info(f"Removed old backup: {old_archive.name}")
def verify_backup(archive_path):
"""Verify a backup archive is not corrupted."""
import tarfile
try:
with tarfile.open(archive_path, 'r:gz') as tar:
members = tar.getmembers()
logger.info(f"Verified: {Path(archive_path).name} ({len(members)} files)")
return True
except Exception as e:
logger.error(f"Corrupted backup: {archive_path} - {e}")
return False
# Usage
backup(
source_dirs=[
"~/Documents/projects",
"~/Documents/notes",
"~/.config"
],
backup_dir="~/Backups",
max_backups=10
)
To schedule it:
# Linux/macOS: add to crontab (runs daily at 2 AM)
# crontab -e
0 2 * /usr/bin/python3 /path/to/backup_script.py
# Windows: use Task Scheduler
# Or with PowerShell:
# schtasks /create /tn "DailyBackup" /tr "python C:\scripts\backup.py" /sc daily /st 02:00
Combining Scripts
The real power comes from combining these scripts. Here's a Monday morning automation:
def monday_morning():
"""Run every Monday at 8 AM."""
# Organize downloads from the past week
organize_folder("~/Downloads")
# Back up important files
backup(["~/Documents/work", "~/Projects"], "~/Backups")
# Generate and email weekly report
generate_sales_report(get_weekly_data(), "weekly_report.xlsx")
send_email(
to="team@company.com",
subject=f"Weekly Report - {datetime.now():%B %d, %Y}",
body="Hi team,\n\nAttached is this week's sales report.\n\nBest",
attachments=["weekly_report.xlsx"]
)
# Take screenshots of competitor sites
take_screenshots(competitor_urls, "screenshots/weekly")
Common Mistakes
Not usingpathlib. String concatenation for file paths is fragile and OS-dependent. pathlib.Path handles Windows backslashes, home directory expansion, and path manipulation correctly.
Hardcoding credentials. Use environment variables or a .env file. Never put passwords, API keys, or email credentials directly in your scripts.
No dry run mode. Any script that modifies files should have a preview/dry-run mode. You don't want to accidentally rename 10,000 files with the wrong pattern.
Ignoring errors silently. Use try/except, but log the errors. A backup script that fails silently is worse than no backup script.
Running without testing on a small sample first. Test with 5 files before running on 5,000.
What's Next
These eight scripts cover the most common automation tasks. The pattern is always the same: identify repetitive work, write a Python script, schedule it. Once you build this habit, you'll start seeing automation opportunities everywhere.
For more Python practice and project ideas, check out CodeUp.