Skip to content
This repository was archived by the owner on Jul 29, 2024. It is now read-only.

Commit 12cd0fd

Browse files
Update connector.py
1 parent 848d53a commit 12cd0fd

File tree

1 file changed

+14
-11
lines changed

1 file changed

+14
-11
lines changed

labelpandas/connector.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_cl
1313
row_data_col : Required (str) - Column containing asset URL or file path
1414
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
1515
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
16-
metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
16+
metadata_index : Optional (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
17+
local_files : Optional (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
1718
divider : Optional (str) - String delimiter for all name keys generated
18-
verbose : Required (bool) - If True, prints information about code execution
19+
verbose : Optional (bool) - If True, prints information about code execution
1920
Returns:
2021
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
2122
"""
@@ -32,7 +33,7 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_cl
3233
futures.append(
3334
exc.submit(
3435
create_data_rows, lb_client, base_client, row,
35-
metadata_name_key_to_schema, metadata_schema_to_name_key,
36+
metadata_name_key_to_schema, metadata_schema_to_name_key, local_files,
3637
row_data_col, global_key_col, external_id_col, metadata_index, divider
3738
)
3839
)
@@ -43,24 +44,26 @@ def create_upload_dict(df:pandas.core.frame.DataFrame, lb_client:Client, base_cl
4344
print(f'Generated upload list - {len(global_key_to_upload_dict)} data rows to upload')
4445
return global_key_to_upload_dict
4546

46-
def create_data_rows(lb_client:Client, base_client:baseClient, row:pandas.core.series.Series,
47-
metadata_name_key_to_schema:dict, metadata_schema_to_name_key:dict,
48-
row_data_col:str, global_key_col:str="", external_id_col:str="", metadata_index:dict={}, divider:str="///"):
47+
def create_data_rows(lb_client:Client, base_client:baseClient, row:pandas.core.series.Series,
48+
metadata_name_key_to_schema:dict, metadata_schema_to_name_key:dict, row_data_col:str,
49+
global_key_col:str="", external_id_col:str="", metadata_index:dict={}, local_files=False, divider:str="///"):
4950
""" Function to-be-multithreaded to create data row dictionaries from a Pandas DataFrame
5051
Args:
5152
lb_client : Required (labelbox.client.Client) - Labelbox Client object
5253
base_client : Required (labelbase.client.Client) - Labelbase Client object
53-
row_data_col : Required (str) - Column containing asset URL or file path
54+
row : Required (pandas.core.series.Series) - Pandas Series object, corresponds to one row in a df.iterrow()
55+
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
56+
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
57+
row_data_col : Required (str) - Column containing asset URL or file path
5458
global_key_col : Optional (str) - Column name containing the data row global key - defaults to row data
5559
external_id_col : Optional (str) - Column name containing the data row external ID - defaults to global key
56-
metadata_index : Required (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
57-
metadata_name_key_to_schema : Required (dict) - Dictionary where {key=metadata_field_name_key : value=metadata_schema_id}
58-
metadata_schema_to_name_key : Required (dict) - Inverse of metadata_name_key_to_schema
60+
metadata_index : Optional (dict) - Dictionary where {key=column_name : value=metadata_type} - metadata_type = "enum", "string", "datetime" or "number"
61+
local_files : Optional (bool) - If True, will create urls for local files; if False, uploads `row_data_col` as urls
5962
divider : Optional (str) - String delimiter for all name keys generated
6063
Returns:
6164
Two items - the global_key, and a dictionary with "row_data", "global_key", "external_id" and "metadata_fields" keys
6265
"""
63-
row_data = str(row[row_data_col])
66+
row_data = str(row[row_data_col]) if not local_files else base_client.connector.upload_local_file(lb_client, str(row[row_data_col]))
6467
metadata_fields = [{"schema_id" : metadata_name_key_to_schema['lb_integration_source'], "value" : "Pandas"}]
6568
if metadata_index:
6669
for metadata_field_name in metadata_index.keys():

0 commit comments

Comments
 (0)