File tree 1 file changed +5
-9
lines changed
1 file changed +5
-9
lines changed Original file line number Diff line number Diff line change 25
25
26
26
27
27
def __collect_asts (json_file ):
28
- asts = []
29
28
with open (json_file , 'r' , encoding = 'utf-8' ) as f :
30
- for line in f :
31
- ast = json .loads (line .strip ())
32
- asts .append (ast )
33
-
34
- return asts
29
+ for line in tqdm .tqdm (f ):
30
+ yield line
35
31
36
32
37
33
def __terminals (ast , node_index , args ):
@@ -170,8 +166,8 @@ def main():
170
166
np .random .seed (args .seed )
171
167
172
168
data_dir = Path (args .data_dir )
173
- trains = __collect_asts (data_dir / 'python100k_train.json' )
174
- evals = __collect_asts (data_dir / 'python50k_eval.json' )
169
+ trains = list ( __collect_asts (data_dir / 'python100k_train.json' ) )
170
+ evals = list ( __collect_asts (data_dir / 'python50k_eval.json' ) )
175
171
176
172
train , valid = sklearn_model_selection .train_test_split (
177
173
trains ,
@@ -186,7 +182,7 @@ def main():
186
182
(train , valid , test ),
187
183
):
188
184
output_file = output_dir / f'{ split_name } _output_file.txt'
189
- __collect_all_and_save (split , args , output_file )
185
+ __collect_all_and_save (( json . loads ( line ) for line in split ) , args , output_file )
190
186
191
187
192
188
if __name__ == '__main__' :
You can’t perform that action at this time.
0 commit comments