|
30 | 30 | from labelbase.models import create_model_run_with_name
|
31 | 31 | from labelbase.annotate import create_ndjsons
|
32 | 32 | from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 33 | +from uuid import uuid4 |
33 | 34 |
|
34 | 35 | def create_upload_dict(client:labelboxClient, table: pandas.core.frame.DataFrame, table_dict:dict,
|
35 | 36 | row_data_col:str, global_key_col:str, external_id_col:str,
|
@@ -186,6 +187,13 @@ def create_upload(row_dict:dict, row_data_col:str, global_key_col:str, external_
|
186 | 187 | "predictions" : [] -- List of predictions for a given data row, if applicable
|
187 | 188 | }
|
188 | 189 | """
|
| 190 | + #Remove nan values from dictionary |
| 191 | + nan_keys = [] |
| 192 | + for key in row_dict.keys(): |
| 193 | + if pandas.isna(row_dict[key]): |
| 194 | + nan_keys.append(key) |
| 195 | + for key in nan_keys: |
| 196 | + del row_dict[key] |
189 | 197 | # Determine dataset ID
|
190 | 198 | if dataset_id:
|
191 | 199 | datasetId = dataset_id
|
@@ -214,24 +222,31 @@ def create_upload(row_dict:dict, row_data_col:str, global_key_col:str, external_
|
214 | 222 | modelRunId = ""
|
215 | 223 | # Create a base data row dictionary
|
216 | 224 | data_row = {}
|
217 |
| - if create_action or batch_action: |
| 225 | + if create_action or batch_action: |
218 | 226 | data_row["row_data"] = row_dict[row_data_col]
|
219 |
| - data_row["global_key"] = row_dict[global_key_col] |
220 |
| - data_row["external_id"] = row_dict[external_id_col] |
| 227 | + if len(row_dict[global_key_col]) <= 200: |
| 228 | + data_row["global_key"] = row_dict[global_key_col] |
| 229 | + else: |
| 230 | + if verbose: |
| 231 | + print("Global key too long (>200 characters). Replacing with randomly generated global key.") |
| 232 | + data_row["global_key"] = str(uuid4()) |
| 233 | + if external_id_col in row_dict.keys(): |
| 234 | + data_row["external_id"] = row_dict[external_id_col] |
221 | 235 | # Create a list of metadata for a data row
|
222 | 236 | metadata_fields = [{"schema_id" : metadata_name_key_to_schema['lb_integration_source'], "value" : "LabelPandas"}]
|
223 | 237 | if metadata_index:
|
224 | 238 | for metadata_field_name in metadata_index.keys():
|
225 | 239 | metadata_type = metadata_index[metadata_field_name]
|
226 | 240 | column_name = f"metadata{divider}{metadata_type}{divider}{metadata_field_name}"
|
227 |
| - input_metadata = process_metadata_value( |
228 |
| - metadata_value=row_dict[column_name], metadata_type=metadata_type, |
229 |
| - parent_name=metadata_field_name, metadata_name_key_to_schema=metadata_name_key_to_schema, divider=divider |
230 |
| - ) |
231 |
| - if input_metadata: |
232 |
| - metadata_fields.append({"schema_id" : metadata_name_key_to_schema[metadata_field_name], "value" : input_metadata}) |
233 |
| - else: |
234 |
| - continue |
| 241 | + if column_name in row_dict.keys(): |
| 242 | + input_metadata = process_metadata_value( |
| 243 | + metadata_value=row_dict[column_name], metadata_type=metadata_type, |
| 244 | + parent_name=metadata_field_name, metadata_name_key_to_schema=metadata_name_key_to_schema, divider=divider |
| 245 | + ) |
| 246 | + if input_metadata: |
| 247 | + metadata_fields.append({"schema_id" : metadata_name_key_to_schema[metadata_field_name], "value" : input_metadata}) |
| 248 | + else: |
| 249 | + continue |
235 | 250 | data_row["metadata_fields"] = metadata_fields
|
236 | 251 | # Create a list of attachments for a data row
|
237 | 252 | if attachment_index:
|
|
0 commit comments