.pre-commit-config.yaml# .pre-commit-config.yaml
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: trailing-whitespace # trims trailing whitespace
- id: end-of-file-fixer # makes sure files end in a newline and only a newline
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/kynan/nbstripout
rev: 0.8.1
hooks:
- id: nbstripout
# - repo: local
# hooks:
# - id: collapse
# name: Run my Python script
# entry: python pre_commit_collapse.py
# language: system
pre_commit_collapse.py"""Collapse all notebooks in notebooks/.
Added to precommit"""
import nbformat
import os
import logging
logging.basicConfig(filename='notebook_collapse.log', level=logging.ERROR)
def collapse_notebook_cells(notebook_path):
try:
nb = nbformat.read(notebook_path, as_version=4)
for cell in nb.cells:
if cell.cell_type == 'code':
cell.outputs = []
cell.metadata.collapsed = True
nbformat.write(nb, notebook_path)
except Exception as e: # Catch any potential errors
logging.error(f"Failed to collapse notebook {notebook_path}: {e}")
def process_notebooks(directory):
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.ipynb'):
notebook_path = os.path.join(root, file)
collapse_notebook_cells(notebook_path)
if __name__ == '__main__':
repo_root = 'notebooks'
process_notebooks(repo_root)
run# Install the git hook scripts
pre-commit install
# Run against all files
pre-commit run --all-files
# Run specific hook
pre-commit run collapse --all-files
nbstripout# Upgrade and install nbstripout
pip install --upgrade nbstripout
# Install nbstripout in the current repository
nbstripout --install
# To uninstall nbstripout, use the following command
# nbstripout --uninstall
Bash
My Favorite Commands
ls
ls -lrRha | grep *.txt
ls -l --reverse --recursive --human-readable --all
mkdir
mkdir -p par/child/grandchild/
-exlistfor# stop on error and print each command before executing
set -ex
versions=(0.2 0.1)
echo "Versions: ${versions[@]}"
pip install -r src/requirements.txt --user
python src/visualization/report.py
for version in ${versions[@]}; do
echo "Running model_predict.py with version ${version}"
python src/models/model_predict.py --version=${version}
done
Getting Started
System & Files
aptapt update # update package list
apt upgrade # upgrade all packages
apt install curl # install pkg curl
free# Display the amount of free and used memory in the system
free -h
# The -h flag makes the output human-readable
# Example output:
# total used free shared buff/cache available
# Mem: 15Gi 3.2Gi 8.0Gi 1.0Gi 3.8Gi 10Gi
# Swap: 2.0Gi 0B 2.0Gi
sudorm# super user do
sudo rm -rf MyNewFolder
# remove a folder and all its content
rm -rf MyNewFolder
# copy a folder
cp -r old_folder/ new_folder/
# remove all png files
rm *.png
Hello World !!!
readecho$echo "What is your name ? "
read name
echo "Hi, $name"sed<<<|sed 's/:/\n/g' file_url
path=folder1/folder2/file.txt
sed 's/:/\n/g' <<< $PATH
echo $PATH | sed 's/:/\n/g'
nanotouch# create and open the file
nano hello.sh
# in the file:
#!/bin/bash
S="HELLO"
echo $S
# Ctrl + o ENTER# save
# Ctrl + X to save and exit
headtailsedwcgrephead -2 A.txt # first 2 lines in A.txt
tail -1 A.txt # last line in A.txt
head -2 SMSSpamCollection.tsv | sed 's/ /\n/g' | wc
# 65 67 362
wc -l *.tsv # count lines in all tsv files
# search lines that contain the word "line"
grep line 3A.txt
# search for the complement
grep -v line 3A.txt
Scripts
substitution <<<<# store the content of file
data=$(< A.txt)
echo "$data"
diff <(ls) <(ls -l)
wc <<< $(sed 's/http/- /g' object.json)
chaining | cat hello.sh | cat -n | tac
aliasalias ls='ls -l --color=auto'
ls *.txt # list all .txt files
alias -p # list all alias
unalias ls # remove an alis
seqdo done# for MONTH in 1 2 3 4 5 6;
for MONTH in `seq 1 6`;
do
echo $((2*$MONTH + 1));
done
if elseecho -n "Please enter a whole number: "
read VAR
if test $VAR -gt 100
then echo "It's greater than 100"
else echo "It's less than 100"
fi
*/# for each in the current directory
for i in */;
do
# if is a directory
if [ -d "$i" ]; then
# do something on $i
echo "$i"
fi
done
Files
File Modification
>>>catls >ls.txt # to file
# echo to file (create and modify)
echo "this is A" > A.txt
cat A.txt A.txt > AA.txt # save
cat A.txt A.txt >> AA.txt # append
setecho# Add Git to the PATH for the current session
set PATH=%PATH%;C:\Users\AppData\Local\Programs\Git\bin
echo %PATH:;=&echo.%
Get-CommandGet-Command python
Cloud
AWS
aws s3 ls# List all files in the S3 bucket
aws s3 ls s3://folder/ --recursive --human-readable --summarize
aws s3 ls s3://folder/ --recursive --h --s
# Filter and display only .txt files
aws s3 ls s3://folder/ --recursive --human-readable --summarize | grep ".txt"
GCP
Favorite Commands
check last modified dateres=$(bq query \
--use_legacy_sql=false \
--project_id=your_project_id \
"SELECT
EXTRACT(DATETIME FROM TIMESTAMP_MILLIS(last_modified_time) AT TIME ZONE 'America/New_York') AS last_modified_time,
EXTRACT(DATE FROM TIMESTAMP_MILLIS(last_modified_time) AT TIME ZONE 'America/New_York') AS date
FROM your_dataset.__TABLES__ where table_id = 'MW_PRED'")
date_now=$(TZ=America/New_York date +"%Y-%m-%d")
# check if the date is contained in the result
if [[ $res == *"$date_now"* ]]; then
echo "Date $date_now is contained in the result"
else
echo "Date $date_now is not contained in the result"
exit 1
fi
Set-Up
authprojectgcloud auth login
gcloud projects list
gcloud config set project {PROJECT_ID}
Make Bucket🔗gsutil mb -b on -l us-east1 gs://my-awesome-bucket/
Files & Folders
lsgsutil ls gs://bucket/folder/rmgsutil -m rm -r gs://bucket/folder
cp# copy all files in the directory ending with .csv to local data folder
gsutil cp gs://remote_bucket/*.csv $local_data_dir
# local to remote
gsutil cp $local_data_dir gs://remote_bucket/*.csv
rsyncgsutil -m -q rsync -r gs://src_url/ des_url
-m : multi-thread, parallel
-q : quiet
-r : recursive