Pre-train Static Word Embeddings
pip install tokenlearn
python3 -m tokenlearn.featurize \ --model-name "baai/bge-base-en-v1.5" \ --output-dir "data/c4_features" \ --dataset-path "allenai/c4" \ --dataset-name "en" \ --dataset-split "train"
python3 -m tokenlearn.train \ --model-name "baai/bge-base-en-v1.5" \ --data-path "data/c4_features" \ --save-path "<path-to-save-model>"