Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions RecommenderSystems/dlrm/tools/criteo1t_parquet.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ val integer_names = Seq("label") ++ dense_names
val col_names = integer_names ++ categorical_names

val mod_idx = 40000000L
val src_dir = "/path/to/unziped/criteo1t"
val dst_dir = "/path/to/output"
val tmp_dir = "/path/to/tmp_spark"
val src_dir = "/workspace/dataset/criteo1t/raw"
val dst_dir = "/workspace/dataset/criteo1t/dlrm_parquet"
val tmp_dir = "/workspace/tmp_spark"

val day_23 = s"${src_dir}/day_23"
val test_csv = s"${tmp_dir}/test.csv"
Expand Down
6 changes: 4 additions & 2 deletions RecommenderSystems/dlrm/tools/split_day_23.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# split day_23 to test.csv and val.csv
src_dir="/path/to/unziped/criteo1t"
tmp_dir="/path/to/tmp_spark"
src_dir="/workspace/dataset/criteo1t/raw"
tmp_dir="/workspace/tmp_spark"

mkdir -p /workspace/tmp_spark

# total 178274637, test 89137319, val 89137318
head -n 89137319 $src_dir/day_23 > $tmp_dir/test.csv
Expand Down