1
1
from labelbox import Client as labelboxClient
2
2
from labelbox .schema .dataset import Dataset as labelboxDataset
3
- from labelbase . metadata import sync_metadata_fields
4
- from labelbase . uploader import batch_create_data_rows
3
+ import labelpandas
4
+ import labelbase
5
5
import pandas as pd
6
- from labelpandas import connector
6
+
7
7
8
8
class Client ():
9
9
"""
@@ -27,64 +27,110 @@ def __init__(
27
27
# return table
28
28
29
29
def create_data_rows_from_table (
30
- self , table :pd .core .frame .DataFrame , lb_dataset : labelboxDataset , row_data_col :str , global_key_col = None , external_id_col = None ,
31
- metadata_index : dict = {}, local_files : bool = False , skip_duplicates :bool = False , verbose :bool = False , divider = "___ " ):
30
+ self , table :pd .core .frame .DataFrame , dataset_id : str "" , project_id :str = "" , priority : int = 5 ,
31
+ upload_method : str = "" , skip_duplicates :bool = False , verbose :bool = False , divider = "/// " ):
32
32
""" Creates Labelbox data rows given a Pandas table and a Labelbox Dataset
33
33
Args:
34
- table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
35
- lb_dataset : Required (labelbox.schema.dataset.Dataset) - Labelbox dataset to add data rows to
36
- row_data_col : Required (str) - Column containing asset URL or file path
37
- local_files : Required (bool) - Determines how to handle row_data_col values
38
- If True, treats row_data_col values as file paths uploads the local files to Labelbox
39
- If False, treats row_data_col values as urls (assuming delegated access is set up)
40
- global_key_col : Optional (str) - Column name containing the data row global key - defaults to row_data_col
41
- external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global_key_col
42
- metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type}
43
- metadata_type must be either "enum", "string", "datetime" or "number"
44
- skip_duplicates : Optional (bool) - Determines how to handle if a global key to-be-uploaded is already in use
45
- If True, will skip duplicate global_keys and not upload them
46
- If False, will generate a unique global_key with a suffix "_1", "_2" and so on
47
- verbose : Required (bool) - If True, prints details about code execution; if False, prints minimal information
48
- divider : Optional (str) - String delimiter for all name keys generated for parent/child schemas
49
- Returns:
50
- A dictionary with "upload_results" and "conversion_errors" keys
51
- - "upload_results" key pertains to the results of the data row upload itself
52
- - "conversion_errors" key pertains to any errors related to data row conversion
53
- """
54
-
55
- # Ensure all your metadata_index keys are metadata fields in Labelbox and that your Pandas DataFrame has all the right columns
56
- table = sync_metadata_fields (
57
- client = self .lb_client , table = table , get_columns_function = connector .get_columns_function , add_column_function = connector .add_column_function ,
58
- get_unique_values_function = connector .get_unique_values_function , metadata_index = metadata_index , verbose = verbose
34
+ table : Required (pandas.core.frame.DataFrame) - Pandas DataFrame
35
+ dataset_id : Required (str) - Labelbox dataset ID to add data rows to - only necessary if no "dataset_id" column exists
36
+ project_id : Required (str) - Labelbox project ID to add data rows to - only necessary if no "project_id" column exists
37
+ priority : Optinoal (int) - Between 1 and 5, what priority to give to data row batches sent to projects
38
+ upload_method : Optional (str) - Either "mal" or "import" - required to upload annotations (otherwise leave as "")
39
+ skip_duplicates : Optional (bool) - Determines how to handle if a global key to-be-uploaded is already in use
40
+ If True, will skip duplicate global_keys and not upload them
41
+ If False, will generate a unique global_key with a suffix {divider} + "1", "2" and so on
42
+ verbose : Required (bool) - If True, prints details about code execution; if False, prints minimal information
43
+ divider : Optional (str) - String delimiter for schema name keys and suffix added to duplocate global keys
44
+ """
45
+ # Create a metadata_index, attachment_index, and annotation_index
46
+ # row_data_col : column with name "row_data"
47
+ # global_key_col : column with name "global_key" - defaults to row_data_col
48
+ # external_id_col : column with name "external_id" - defaults to global_key_col
49
+ # project_id_col : column with name "project_id" - defaults to "" (requires project_id input argument if no "project_id" column exists)
50
+ # dataset_id_col : column with name "dataset_id" - defaults to "" (requires project_id input argument if no "dataset_id" column exists)
51
+ # external_id_col : column with name "external_id" - defaults to global_key_col
52
+ # metadata_index : Dictonary where {key=column_name : value=metadata_type}
53
+ # attachment_index : Dictonary where {key=column_name : value=attachment_type}
54
+ # annotation_index : Dictonary where {key=column_name : value=top_level_feature_name}
55
+ row_data_col , global_key_col , external_id_col , project_id_col , dataset_id_col , metadata_index , attachment_index , annotation_index = labelbase .connector .validate_columns (
56
+ table = table ,
57
+ get_columns_function = connector .get_columns_function ,
58
+ get_unique_values_function = connector .get_unique_values_function ,
59
+ divider = divider ,
60
+ verbose = verbose ,
61
+ extra_client = None
59
62
)
60
63
61
- # If df returns False, the sync failed - terminate the upload
62
- if type (table ) == bool :
63
- return {"upload_results" : [], "conversion_errors" : []}
64
+ # Iterating over your pandas DataFrame is faster once converted to a list of dictionaries where {key=column_name : value=row_value}
65
+ table_dict = table .to_dict ('records' )
64
66
65
- # Create a dictionary where {key=global_key : value=labelbox_upload_dictionary} - this is unique to Pandas
66
- global_key_to_upload_dict , conversion_errors = connector .create_upload_dict (
67
- table = table , lb_client = self .lb_client ,
68
- row_data_col = row_data_col , global_key_col = global_key_col , external_id_col = external_id_col ,
69
- metadata_index = metadata_index , local_files = local_files , divider = divider , verbose = verbose
70
- )
67
+ if (dataset_id_col == "" ) and (dataset_id == "" ):
68
+ raise ValueError (f"To create data rows, please provide either a `dataset_id` column or a Labelbox dataset id to argument `dataset_id`" )
71
69
72
- # If there are conversion errors, let the user know; if there are no successful conversions, terminate the upload
73
- if conversion_errors :
74
- print (f'There were { len (conversion_errors )} errors in creating your upload list - see result["conversion_errors"] for more information' )
75
- if global_key_to_upload_dict :
76
- print (f'Data row upload will continue' )
77
- else :
78
- print (f'Data row upload will not continue' )
79
- return {"upload_results" : [], "conversion_errors" : errors }
70
+ if (upload_method != "" ) and (project_id_col == "" ) and (project_id == "" ) and (annotation_index != {}):
71
+ raise ValueError (f"To upload annotations, please provide either a `project_id` column or a Lablebox project id to argument `project_id`" )
72
+
73
+ # Create a dictionary where {key=dataset_id : value={key=global_key : value=data_row_upload_dict}} - this is unique to Pandas
74
+ dataset_to_global_key_to_upload_dict = labelpandas .data_rows .create_data_row_upload_dict (
75
+ client = self .lb_client , table = table , table_dict = table_dict ,
76
+ row_data_col = row_data_col , global_key_col = global_key_col , external_id_col = external_id_col , dataset_id_col = dataset_id_col ,
77
+ dataset_id = dataset_id , metadata_index = metadata_index , attachment_index = attachment_index ,
78
+ divider = divider , verbose = verbose , extra_client = None
79
+ )
80
80
81
81
# Upload your data rows to Labelbox
82
- upload_results = batch_create_data_rows (
83
- client = self .lb_client , dataset = lb_dataset , global_key_to_upload_dict = global_key_to_upload_dict ,
82
+ data_row_upload_results = labelbase . uploader . batch_create_data_rows (
83
+ client = self .lb_client , dataset_to_global_key_to_upload_dict = dataset_to_global_key_to_upload_dict ,
84
84
skip_duplicates = skip_duplicates , divider = divider , verbose = verbose
85
85
)
86
86
87
- return {"upload_results" : upload_results , "conversion_errors" : conversion_errors }
87
+ # If project ids are provided, we can batch data rows to projects
88
+ if project_id or project_id_col :
89
+
90
+ # Create a dictionary where {key=global_key : value=data_row_id}
91
+ global_key_to_data_row_id = labelbase .uploader .create_global_key_to_data_row_dict (
92
+ client = self .lb_client , global_keys = connector .get_unique_values_function (table , global_key_col )
93
+ )
94
+
95
+ # Create a dictionary where {key=project_id : value=list_of_data_row_ids}, if applicable
96
+ project_id_to_batch_dict = labelpandas .batches .create_batches_dict (
97
+ client = self .lb_client , table = table , table_dict = table_dict ,
98
+ global_key_col = global_key_col , project_id_col = project_id_col ,
99
+ global_key_to_data_row_id = global_key_to_data_row_id
100
+ )
101
+
102
+ # Batch data rows to projects, if applicable
103
+ batch_to_project_results = labelbase .uploader .batch_rows_to_project (
104
+ client = self .lb_client , project_id_to_batch_dict , priority = priority
105
+ )
106
+
107
+ if (upload_method in ["mal" , "import" ]) and (annotation_index != {}):
108
+
109
+ # Create a dictionary where {key=project_id : value=annotation_upload_list}, if applicable
110
+ project_id_to_upload_dict = connector .create_annotation_upload_dict (
111
+ client = self .lb_client , table = table , table_dict = table_dict ,
112
+ row_data_col = row_data_col , global_key_col = global_key_col , project_id_col = project_id_col ,
113
+ project_id = project_id , annotation_index = annotation_index , global_key_to_data_row_id = global_key_to_data_row_id ,
114
+ divider = divider , verbose = verbose
115
+ )
116
+
117
+ # Upload your annotations to Labelbox, if applicable
118
+ annotation_upload_results = uploader .batch_upload_annotations (
119
+ client = self .lb_client , project_id_to_upload_dict = project_id_to_upload_dict , how = upload_method , verbose = verbose
120
+ )
121
+
122
+ else : # If no proper upload_method is provided or annotation_index is generated, we don't upload annotations
123
+ annotation_upload_results = []
124
+
125
+ else : # If project ids are not provided, we don't batch data rows to projects or upload annotations
126
+ batch_to_project_results = []
127
+ annotation_upload_results = []
128
+
129
+ return {
130
+ "data_row_upload_results" : data_row_upload_results ,
131
+ "batch_to_project_results" : batch_to_project_results ,
132
+ "annotation_upload_results" : annotation_upload_results
133
+ }
88
134
89
135
# def upsert_table_metadata():
90
136
# return table
0 commit comments