basedosdados
diff --git a/‎.github/workflows/python-ci.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/python-ci.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎bases/br_bd_diretorios_brasil/dataset_config.yaml
Lines changed: 2 additions & 1 deletion b/‎bases/br_bd_diretorios_brasil/dataset_config.yaml
Lines changed: 2 additions & 1 deletion
diff --git a/‎bases/test_dataset/README.md
Lines changed: 0 additions & 7 deletions b/‎bases/test_dataset/README.md
Lines changed: 0 additions & 7 deletions
diff --git a/‎python-package/README.md
Lines changed: 7 additions & 0 deletions b/‎python-package/README.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎python-package/basedosdados/__init__.py
Lines changed: 2 additions & 1 deletion b/‎python-package/basedosdados/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎python-package/basedosdados/cli/cli.py
Lines changed: 83 additions & 24 deletions b/‎python-package/basedosdados/cli/cli.py
Lines changed: 83 additions & 24 deletions
diff --git a/‎python-package/basedosdados/configs/templates/table/table_description.txt
Lines changed: 2 additions & 2 deletions b/‎python-package/basedosdados/configs/templates/table/table_description.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎python-package/basedosdados/constants.py
Lines changed: 9 additions & 1 deletion b/‎python-package/basedosdados/constants.py
Lines changed: 9 additions & 1 deletion
diff --git a/‎python-package/basedosdados/download/download.py
Lines changed: 19 additions & 6 deletions b/‎python-package/basedosdados/download/download.py
Lines changed: 19 additions & 6 deletions
@@ -61,7 +61,7 @@ jobs:
       run: |
         cd python-package
         pip install -r requirements-dev.txt
-        pip install coveralls
+        pip install coveralls toml
       shell: bash
     - name: Install package
       run: |
@@ -109,7 +109,7 @@ jobs:
       run: |
         cd python-package
         pip install -r requirements-dev.txt
-        pip install coveralls
+        pip install coveralls toml
       shell: cmd
     - name: Install package
       run: |
 
@@ -45,4 +45,5 @@ github_url:
 
 # Não altere esse campo.
 # Data da última modificação dos metadados gerada automaticamente pelo CKAN.
-metadata_modified: '2022-02-09T21:59:32.440801'
+
+metadata_modified: '2022-02-09T21:59:32.440801'
@@ -37,3 +37,10 @@ Publique nova versão
 poetry version [patch|minor|major]
 poetry publish --build
 ```
+
+Versão Alpha e Beta
+
+```
+version = "1.6.2-alpha.3"
+version = "1.6.2-beta.3"
+```
@@ -21,4 +21,5 @@
     get_dataset_description,
     get_table_columns,
     get_table_size,
-)
+    search
+)
@@ -77,10 +77,25 @@ def mode_text(mode, verb, obj_id):
     default="raise",
     help="[raise|update|replace|pass] if dataset alread exists",
 )
+@click.option(
+    "--dataset_is_public",
+    default=True,
+    help="Control if prod dataset is public or not. By default staging datasets like `dataset_id_staging` are not public.",
+)
+@click.option(
+    "--location",
+    default=None,
+    help="Location of dataset data. List of possible region names locations: https://cloud.google.com/bigquery/docs/locations",
+)
 @click.pass_context
-def create_dataset(ctx, dataset_id, mode, if_exists):
+def create_dataset(ctx, dataset_id, mode, if_exists, dataset_is_public, location):
 
-    Dataset(dataset_id=dataset_id, **ctx.obj).create(mode=mode, if_exists=if_exists)
+    Dataset(dataset_id=dataset_id, **ctx.obj).create(
+        mode=mode,
+        if_exists=if_exists,
+        dataset_is_public=dataset_is_public,
+        location=location,
+    )
 
     click.echo(
         click.style(
@@ -96,9 +111,9 @@ def create_dataset(ctx, dataset_id, mode, if_exists):
     "--mode", "-m", default="all", help="What datasets to create [prod|staging|all]"
 )
 @click.pass_context
-def update_dataset(ctx, dataset_id, mode):
+def update_dataset(ctx, dataset_id, mode, location):
 
-    Dataset(dataset_id=dataset_id, **ctx.obj).update(mode=mode)
+    Dataset(dataset_id=dataset_id, **ctx.obj).update(mode=mode, location=location)
 
     click.echo(
         click.style(
@@ -110,10 +125,17 @@ def update_dataset(ctx, dataset_id, mode):
 
 @cli_dataset.command(name="publicize", help="Make a dataset public")
 @click.argument("dataset_id")
+@click.option(
+    "--dataset_is_public",
+    default=True,
+    help="Control if prod dataset is public or not. By default staging datasets like `dataset_id_staging` are not public.",
+)
 @click.pass_context
-def publicize_dataset(ctx, dataset_id):
+def publicize_dataset(ctx, dataset_id, dataset_is_public):
 
-    Dataset(dataset_id=dataset_id, **ctx.obj).publicize()
+    Dataset(dataset_id=dataset_id, **ctx.obj).publicize(
+        dataset_is_public=dataset_is_public
+    )
 
     click.echo(
         click.style(
@@ -168,7 +190,12 @@ def cli_table():
     help="[raise|replace|pass] actions if table config files already exist",
 )
 @click.option(
-    "--columns_config_url",
+    "--source_format",
+    default="csv",
+    help="Data source format. Only 'csv' is supported. Defaults to 'csv'.",
+)
+@click.option(
+    "--columns_config_url_or_path",
     default=None,
     help="google sheets URL. Must be in the format https://docs.google.com/spreadsheets/d/<table_key>/edit#gid=<table_gid>. The sheet must contain the column name: 'coluna' and column description: 'descricao'.",
 )
@@ -180,14 +207,16 @@ def init_table(
     data_sample_path,
     if_folder_exists,
     if_table_config_exists,
-    columns_config_url,
+    source_format,
+    columns_config_url_or_path,
 ):
 
     t = Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).init(
         data_sample_path=data_sample_path,
         if_folder_exists=if_folder_exists,
         if_table_config_exists=if_table_config_exists,
-        columns_config_url=columns_config_url,
+        source_format=source_format,
+        columns_config_url_or_path=columns_config_url_or_path,
     )
 
     click.echo(
@@ -232,9 +261,24 @@ def init_table(
     help="[raise|replace|pass] actions if table config files already exist",
 )
 @click.option(
-    "--columns_config_url",
+    "--source_format",
+    default="csv",
+    help="Data source format. Only 'csv' is supported. Defaults to 'csv'.",
+)
+@click.option(
+    "--columns_config_url_or_path",
+    default=None,
+    help="Path to the local architeture file or a public google sheets URL. Path only suports csv, xls, xlsx, xlsm, xlsb, odf, ods, odt formats. Google sheets URL must be in the format https://docs.google.com/spreadsheets/d/<table_key>/edit#gid=<table_gid>.",
+)
+@click.option(
+    "--dataset_is_public",
+    default=True,
+    help="Control if prod dataset is public or not. By default staging datasets like `dataset_id_staging` are not public.",
+)
+@click.option(
+    "--location",
     default=None,
-    help="google sheets URL. Must be in the format https://docs.google.com/spreadsheets/d/<table_key>/edit#gid=<table_gid>",
+    help="Location of dataset data. List of possible region names locations: https://cloud.google.com/bigquery/docs/locations",
 )
 @click.pass_context
 def create_table(
@@ -247,7 +291,10 @@ def create_table(
     force_dataset,
     if_storage_data_exists,
     if_table_config_exists,
-    columns_config_url,
+    source_format,
+    columns_config_url_or_path,
+    dataset_is_public,
+    location,
 ):
 
     Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).create(
@@ -257,7 +304,10 @@ def create_table(
         force_dataset=force_dataset,
         if_storage_data_exists=if_storage_data_exists,
         if_table_config_exists=if_table_config_exists,
-        columns_config_url=columns_config_url,
+        source_format=source_format,
+        columns_config_url_or_path=columns_config_url_or_path,
+        dataset_is_public=dataset_is_public,
+        location=location,
     )
 
     click.echo(
@@ -297,23 +347,32 @@ def update_table(ctx, dataset_id, table_id, mode):
 @click.argument("dataset_id")
 @click.argument("table_id")
 @click.option(
-    "--columns_config_url",
+    "--columns_config_url_or_path",
     default=None,
-    help="""\nGoogle sheets URL. Must be in the format https://docs.google.com/spreadsheets/d/<table_key>/edit#gid=<table_gid>. 
-\nThe sheet must contain the columns:\n
-    - nome: column name\n
-    - descricao: column description\n
-    - tipo: column bigquery type\n
-    - unidade_medida: column mesurement unit\n
-    - dicionario: column related dictionary\n
-    - nome_diretorio: column related directory in the format <dataset_id>.<table_id>:<column_name>
+    help="""\nFills columns in table_config.yaml automatically using a public google sheets URL or a local file. Also regenerate
+        \npublish.sql and autofill type using bigquery_type.\n
+
+    \nThe sheet must contain the columns:\n
+        - name: column name\n
+        - description: column description\n
+        - bigquery_type: column bigquery type\n
+        - measurement_unit: column mesurement unit\n
+        - covered_by_dictionary: column related dictionary\n
+        - directory_column: column related directory in the format <dataset_id>.<table_id>:<column_name>\n
+        - temporal_coverage: column temporal coverage\n
+        - has_sensitive_data: the column has sensitive data\n
+        - observations: column observations\n
+    \nArgs:\n
+    \ncolumns_config_url_or_path (str): Path to the local architeture file or a public google sheets URL.\n
+        Path only suports csv, xls, xlsx, xlsm, xlsb, odf, ods, odt formats.\n
+        Google sheets URL must be in the format https://docs.google.com/spreadsheets/d/<table_key>/edit#gid=<table_gid>.\n
 """,
 )
 @click.pass_context
-def update_columns(ctx, dataset_id, table_id, columns_config_url):
+def update_columns(ctx, dataset_id, table_id, columns_config_url_or_path):
 
     Table(table_id=table_id, dataset_id=dataset_id, **ctx.obj).update_columns(
-        columns_config_url=columns_config_url,
+        columns_config_url_or_path=columns_config_url_or_path,
     )
 
     click.echo(
 
@@ -48,8 +48,8 @@ Email: {{ data_cleaned_by.email }}
 {% call input(partitions) -%}
 Partições (Filtre a tabela por essas colunas para economizar dinheiro e tempo)
 ---------
-{% if (partitions.split(',') is not none) -%}
-{% for partition in partitions.split(',') -%}
+{% if (partitions is not none) -%}
+{% for partition in partitions -%}
 - {{ partition }}
 {% endfor -%}
 {%- endif %}
 
@@ -1,6 +1,14 @@
-__all__ = ["constants"]
+__all__ = ["config", "constants"]
 
 from enum import Enum
+from dataclasses import dataclass
+
+
+@dataclass
+class config:
+    verbose: bool = True
+    billing_project_id: str = None
+    project_config_path: str = None
 
 
 class constants(Enum):
 
@@ -18,6 +18,7 @@
     BaseDosDadosInvalidProjectIDException,
     BaseDosDadosNoBillingProjectIDException,
 )
+from basedosdados.constants import config, constants
 from pandas_gbq.gbq import GenericGBQException
 
 
@@ -49,6 +50,10 @@ def read_sql(
             Query result
     """
 
+    # standard billing_project_id configuration
+    if billing_project_id is None:
+        billing_project_id == config.billing_project_id
+
     try:
         # Set a two hours timeout
         bigquery_storage_v1.client.BigQueryReadClient.read_rows = partialmethod(
@@ -86,8 +91,8 @@ def read_sql(
 def read_table(
     dataset_id,
     table_id,
-    query_project_id="basedosdados",
     billing_project_id=None,
+    query_project_id="basedosdados",
     limit=None,
     from_file=False,
     reauth=False,
@@ -101,10 +106,10 @@ def read_table(
         table_id (str): Optional.
             Table id available in basedosdados.dataset_id.
             It should always come with dataset_id.
-        query_project_id (str): Optional.
-            Which project the table lives. You can change this you want to query different projects.
         billing_project_id (str): Optional.
             Project that will be billed. Find your Project ID here https://console.cloud.google.com/projectselector2/home/dashboard
+        query_project_id (str): Optional.
+            Which project the table lives. You can change this you want to query different projects.
         limit (int): Optional.
             Number of rows to read from table.
         from_file (boolean): Optional.
@@ -122,6 +127,10 @@ def read_table(
             Query result
     """
 
+    # standard billing_project_id configuration
+    if billing_project_id is None:
+        billing_project_id == config.billing_project_id
+
     if (dataset_id is not None) and (table_id is not None):
         query = f"""
         SELECT * 
@@ -147,8 +156,8 @@ def download(
     query=None,
     dataset_id=None,
     table_id=None,
-    query_project_id="basedosdados",
     billing_project_id=None,
+    query_project_id="basedosdados",
     limit=None,
     from_file=False,
     reauth=False,
@@ -180,10 +189,10 @@ def download(
         table_id (str): Optional.
             Table id available in basedosdados.dataset_id.
             It should always come with dataset_id.
-        query_project_id (str): Optional.
-            Which project the table lives. You can change this you want to query different projects.
         billing_project_id (str): Optional.
             Project that will be billed. Find your Project ID here https://console.cloud.google.com/projectselector2/home/dashboard
+        query_project_id (str): Optional.
+            Which project the table lives. You can change this you want to query different projects.
         limit (int): Optional
             Number of rows.
         from_file (boolean): Optional.
@@ -201,6 +210,10 @@ def download(
             "Either table_id, dataset_id or query should be filled."
         )
 
+    # standard billing_project_id configuration
+    if billing_project_id is None:
+        billing_project_id == config.billing_project_id
+
     client = google_client(query_project_id, billing_project_id, from_file, reauth)
 
     # makes sure that savepath is a filepath and not a folder