Linux

configgit config --global user.email "wei@abc.com"
git config --global user.name "wei"
git config --global credential.helper store
git config --global http.sslVerify false.gitconfig
[credential "https://git.xxx.com"]
    provider = generic
    helper = cache
[user]
    email = wyang@xxx.com
    name = Wei Yang
ssh-keygenssh-keygen -t ed25519

clonebranchcheckout--forcegit clone https://LINK OPTIONAL_FOLDER_NAME

# option 1
git branch NEW_BRANCH_NAME
git checkout NEW_BRANCH_NAME

# option 2 (create and checkout)
git checkout -b NEW_BRANCH_NAME

git checkout --force NEW_BRANCH_NAME

statusaddcommitpushdelete
git status # check the status

git add --all
# git add file1 file2
git commit -m "added all"

git commit file1 file2 -m "added file1 and file2"

git push

fetchmergepull
git restore .
git checkout main
git fetch
git pull origin main
git merge new_features
git push

# delete a remote branch
git push origin --delete new_features
# delete a local branch
git fetch --prune

Pre-commit

install# Install pre-commit
pip install pre-commit

# Create config file
touch .pre-commit-config.yaml

.pre-commit-config.yaml# .pre-commit-config.yaml
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.4.0
    hooks:
    - id: trailing-whitespace # trims trailing whitespace
    - id: end-of-file-fixer #  makes sure files end in a newline and only a newline
    - id: check-yaml
    - id: check-added-large-files
- repo: https://github.com/kynan/nbstripout
    rev: 0.8.1
    hooks:
        - id: nbstripout
# - repo: local
#   hooks:
#     - id: collapse
#       name: Run my Python script
#       entry: python pre_commit_collapse.py
#       language: system


pre_commit_collapse.py"""Collapse all notebooks in notebooks/.
Added to precommit"""
import nbformat
import os
import logging

logging.basicConfig(filename='notebook_collapse.log', level=logging.ERROR)

def collapse_notebook_cells(notebook_path):
    try:
        nb = nbformat.read(notebook_path, as_version=4)
        for cell in nb.cells:
            if cell.cell_type == 'code':
                cell.outputs = []
                cell.metadata.collapsed = True

        nbformat.write(nb, notebook_path)

    except Exception as e:  # Catch any potential errors
        logging.error(f"Failed to collapse notebook {notebook_path}: {e}")


def process_notebooks(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.ipynb'):
                notebook_path = os.path.join(root, file)
                collapse_notebook_cells(notebook_path)

if __name__ == '__main__':
    repo_root = 'notebooks'
    process_notebooks(repo_root)

run# Install the git hook scripts
pre-commit install

# Run against all files
pre-commit run --all-files

# Run specific hook
pre-commit run collapse --all-files

nbstripout# Upgrade and install nbstripout
pip install --upgrade nbstripout

# Install nbstripout in the current repository
nbstripout --install

# To uninstall nbstripout, use the following command
# nbstripout --uninstall

Bash

My Favorite Commands

ls 
ls -lrRha | grep *.txt

ls -l --reverse --recursive --human-readable --all
mkdir
mkdir -p par/child/grandchild/

TZ=echoTZ=America/New_York
time_now=$(date +"%Y-%m-%d_%H-%M-%S")
echo "Time: ${time_now}"
lsmvecho "Current directory: $(pwd)"

mv log_file.log /mnt/artifacts/log_file_${time_now}.log

-exlistfor# stop on error and print each command before executing
set -ex

versions=(0.2 0.1)
echo "Versions: ${versions[@]}"

pip install -r src/requirements.txt --user

python src/visualization/report.py
for version in ${versions[@]}; do
    echo "Running model_predict.py with version ${version}"
    python src/models/model_predict.py --version=${version}
done

Getting Started

System & Files

aptapt update # update package list
apt upgrade # upgrade all packages
apt install curl # install pkg curl

free# Display the amount of free and used memory in the system
free -h
# The -h flag makes the output human-readable
# Example output:
#               total        used        free      shared  buff/cache   available
# Mem:           15Gi       3.2Gi       8.0Gi       1.0Gi       3.8Gi       10Gi
# Swap:         2.0Gi          0B       2.0Gi

sudorm# super user do
sudo rm -rf MyNewFolder
# remove a folder and all its content
rm -rf MyNewFolder

# copy a folder
cp -r old_folder/ new_folder/

# remove all png files
rm *.png

Hello World !!!

readecho$echo "What is your name ? "
read name
echo "Hi, $name"
sed<<<|sed 's/:/\n/g' file_url
path=folder1/folder2/file.txt
sed 's/:/\n/g' <<< $PATH
echo $PATH | sed 's/:/\n/g'

nanotouch# create and open the file
nano hello.sh

# in the file:
    #!/bin/bash
    S="HELLO"
    echo $S

# Ctrl + o ENTER# save
# Ctrl + X to save and exit

headtailsedwcgrephead -2 A.txt # first 2 lines in A.txt
tail -1 A.txt # last line in A.txt
head -2 SMSSpamCollection.tsv | sed 's/ /\n/g' | wc
#      65      67     362

wc -l *.tsv # count lines in all tsv files

# search lines that contain the word "line"
grep line 3A.txt

# search for the complement
grep -v line 3A.txt

Scripts

substitution <<<<# store the content of file
data=$(< A.txt)
echo "$data"

diff <(ls) <(ls -l)

wc <<< $(sed 's/http/- /g' object.json)

chaining | cat hello.sh | cat -n | tac

aliasalias ls='ls -l --color=auto'

ls *.txt # list all .txt files

alias -p # list all alias

unalias ls # remove an alis

seqdo done# for MONTH in 1 2 3 4 5 6;
for MONTH in `seq 1 6`;
do
    echo $((2*$MONTH + 1));
done

if elseecho -n "Please enter a whole number: "
read VAR
if test $VAR -gt 100
then echo "It's greater than 100"
else echo "It's less than 100"
fi

*/# for each in the current directory
for i in */;
do
    # if is a directory
    if [ -d "$i" ]; then
        # do something on $i
        echo "$i"
    fi
done

Files

File Modification

>>>catls >ls.txt # to file

# echo to file (create and modify)
echo "this is A" > A.txt

cat A.txt A.txt > AA.txt # save
cat A.txt A.txt >> AA.txt # append

<{ }&grep "AA.txt" < ls.txt
# AAA.txt
# AA.txt

{ echo "1" && ls; } > A.txt
cat < A.txt

tee# combine stderr into stdout
# display on screen and save to file
2>&1 | tee filename.txt

File Packaging

gzipgunzip# zip three files
cat A.txt AA.txt AAA.txt | gzip > 3A.txt.gz

# unzip to get the concatenated file
gunzip 3A.txt.gz

tarOut=`date +%Y%m%d%s`doc.tar.gz
tar -czf $Out Documents; ls
# 1.ipynb  1.py  202204011648848502doc.tar.gz  A.txt  Documents/

File Upload and Download

curlurl=https://raw.githubusercontent.com/weiyang2048/Garage/main/Data%20Sets/SMSSpamCollection.tsv
curl -ko $(basename $url) $url

wget!wget -q --show-progress 'https://github.com/weiyang2048/Garage/blob/main/Data%20Sets/sky.jpg?raw=true' -O sky.jpg
!wget -q --show-progress 'https://github.com/weiyang2048/Garage/blob/main/Data%20Sets/stone.jpg?raw=true' -O stone.jpg

CMD & PS

set echo# Add Git to the PATH for the current session
set PATH=%PATH%;C:\Users\AppData\Local\Programs\Git\bin
echo %PATH:;=&echo.%

Get-CommandGet-Command python

Cloud

AWS

aws s3 ls# List all files in the S3 bucket
aws s3 ls s3://folder/ --recursive --human-readable --summarize
aws s3 ls s3://folder/ --recursive --h --s

# Filter and display only .txt files
aws s3 ls s3://folder/ --recursive --human-readable --summarize | grep ".txt"

GCP

Favorite Commands

check last modified dateres=$(bq query \
    --use_legacy_sql=false \
    --project_id=your_project_id \
    "SELECT
    EXTRACT(DATETIME FROM TIMESTAMP_MILLIS(last_modified_time) AT TIME ZONE 'America/New_York') AS last_modified_time,
    EXTRACT(DATE FROM TIMESTAMP_MILLIS(last_modified_time) AT TIME ZONE 'America/New_York') AS date
    FROM your_dataset.__TABLES__ where table_id = 'MW_PRED'")
date_now=$(TZ=America/New_York date +"%Y-%m-%d")
# check if the date is contained in the result
if [[ $res == *"$date_now"* ]]; then
    echo "Date $date_now is contained in the result"
else
    echo "Date $date_now is not contained in the result"
    exit 1
fi

Set-Up

authprojectgcloud auth login
gcloud projects list
gcloud config set project {PROJECT_ID}

Make Bucket🔗gsutil mb -b on -l us-east1 gs://my-awesome-bucket/

Files & Folders

lsgsutil ls gs://bucket/folder/
rmgsutil -m rm -r gs://bucket/folder

cp# copy all files in the directory ending with .csv to local data folder
gsutil cp gs://remote_bucket/*.csv $local_data_dir
# local to remote
gsutil cp $local_data_dir gs://remote_bucket/*.csv
rsyncgsutil -m -q rsync -r gs://src_url/ des_url
-m : multi-thread, parallel
-q : quiet
-r : recursive

Shells

References

Git

Basics