scandi_reddit.cli
Command line interface for creating a Scandinavian Reddit dataset.
View Source
"""Command line interface for creating a Scandinavian Reddit dataset.""" from typing import Optional import click from .build import build_reddit_dataset @click.command() @click.option( "--overwrite/--no-overwrite", "-o", default=False, help="Overwrite existing files.", ) @click.option( "--n-jobs", "-j", default=-2, help="The number of jobs to run in parallel.", ) @click.option( "--starting-year", "-y", default=2005, help="The year to start downloading from. Defaults to 2005.", ) @click.option( "--starting-month", "-m", default=1, help="The month to start downloading from. Defaults to 1.", ) @click.option( "--skip-download/--no-skip-download", default=False, help="Whether to skip downloading the files.", ) @click.option( "--hub-repo-id", default=None, help="The ID of the Hugging Face Hub repository to upload the dataset to.", ) def main( overwrite: bool, n_jobs: int, starting_year: int, starting_month: int, skip_download: bool, hub_repo_id: Optional[str], ) -> None: """Build a Scandinavian Reddit dataset.""" build_reddit_dataset( overwrite=overwrite, n_jobs=n_jobs, starting_year=starting_year, starting_month=starting_month, skip_download=skip_download, hub_repo_id=hub_repo_id, ) if __name__ == "__main__": main()