@@ -13,9 +13,10 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_cl
13
13
row_data_col : Required (str) - Column containing asset URL or file path
14
14
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
15
15
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
16
- metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
16
+ metadata_index : Optional (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
17
+ local_files : Optional (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
17
18
divider : Optional (str) - String delimiter for all name keys generated
18
- verbose : Required (bool) - If True, prints information about code execution
19
+ verbose : Optional (bool) - If True, prints information about code execution
19
20
Returns:
20
21
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
21
22
"""
@@ -32,7 +33,7 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_cl
32
33
futures .append (
33
34
exc .submit (
34
35
create_data_rows , lb_client , base_client , row ,
35
- metadata_name_key_to_schema , metadata_schema_to_name_key ,
36
+ metadata_name_key_to_schema , metadata_schema_to_name_key , local_files ,
36
37
row_data_col , global_key_col , external_id_col , metadata_index , divider
37
38
)
38
39
)
@@ -43,24 +44,26 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_cl
43
44
print (f'Generated upload list - { len (global_key_to_upload_dict )} data rows to upload' )
44
45
return global_key_to_upload_dict
45
46
46
- def create_data_rows (lb_client :Client , base_client :baseClient , row :pandas .core .series .Series ,
47
- metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict ,
48
- row_data_col : str , global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, divider :str = "///" ):
47
+ def create_data_rows (lb_client :Client , base_client :baseClient , row :pandas .core .series .Series ,
48
+ metadata_name_key_to_schema :dict , metadata_schema_to_name_key :dict , row_data_col : str ,
49
+ global_key_col :str = "" , external_id_col :str = "" , metadata_index :dict = {}, local_files = False , divider :str = "///" ):
49
50
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
50
51
Args:
51
52
lb_client : Required (labelbox.client.Client) - Labelbox Client object
52
53
base_client : Required (labelbase.client.Client) - Labelbase Client object
53
- row_data_col : Required (str) - Column containing asset URL or file path
54
+ row : Required (pandas.core.series.Series) - Pandas Series object, corresponds to one row in a df.iterrow()
55
+ metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
56
+ metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
57
+ row_data_col : Required (str) - Column containing asset URL or file path
54
58
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
55
59
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
56
- metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
57
- metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
58
- metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
60
+ metadata_index : Optional (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
61
+ local_files : Optional (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
59
62
divider : Optional (str) - String delimiter for all name keys generated
60
63
Returns:
61
64
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
62
65
"""
63
- row_data = str (row [row_data_col ])
66
+ row_data = str (row [row_data_col ]) if not local_files else base_client . connector . upload_local_file ( lb_client , str ( row [ row_data_col ]))
64
67
metadata_fields = [{"schema_id" : metadata_name_key_to_schema ['lb_integration_source' ], "value" : "Pandas" }]
65
68
if metadata_index :
66
69
for metadata_field_name in metadata_index .keys ():
0 commit comments