diff --git a/framework/core/Constants.pm b/framework/core/Constants.pm index dd905040e..1d7a586f4 100644 --- a/framework/core/Constants.pm +++ b/framework/core/Constants.pm @@ -294,6 +294,8 @@ our $TAG_POST_FIX = "POST_FIX_REVISION"; our $TAG_POST_FIX_COMP = "POST_FIX_COMPILABLE"; our $TAG_FIXED = "FIXED_VERSION"; our $TAG_BUGGY = "BUGGY_VERSION"; +our $TAG_BUGGY_MIN = "BUGGY_MIN_VERSION"; +our $TAG_BUGGY_ORIG = "BUGGY_ORIG_VERSION"; our $TAG_PRE_FIX = "PRE_FIX_REVISION"; # Filename for directory layout csv @@ -379,6 +381,8 @@ $TAG_POST_FIX $TAG_POST_FIX_COMP $TAG_FIXED $TAG_BUGGY +$TAG_BUGGY_MIN +$TAG_BUGGY_ORIG $TAG_PRE_FIX $LAYOUT_FILE diff --git a/framework/core/Project.pm b/framework/core/Project.pm index e83135de3..aad287738 100644 --- a/framework/core/Project.pm +++ b/framework/core/Project.pm @@ -363,7 +363,8 @@ sub sanity_check { $project->checkout_vid(vid [, work_dir, is_bugmine]) Checks out the provided version id (C) to F, and tags the buggy AND -the fixed program version of this bug. Format of C: C<\d+[bf]>. +the fixed program version of this bug. +(Utils::check_vid defines the version-id format.) The temporary working directory (C) is optional, the default is C from the instance of this class. The is_bugmine flag (C) is optional and indicates whether the framework is used for bug mining, the default is false. @@ -397,7 +398,9 @@ sub checkout_vid { if (_can_reuse_work_dir($pid, $vid, $old_pid, $old_vid)) { my $version_type = Utils::check_vid($vid)->{type}; my $tag_name = Utils::tag_prefix($pid, $bid) . - ($version_type eq "b" ? $TAG_BUGGY : $TAG_FIXED); + ($version_type eq "b" ? $TAG_BUGGY : + $version_type eq "b.min" ? $TAG_BUGGY_MIN : + $version_type eq "b.orig" ? $TAG_BUGGY_ORIG : $TAG_FIXED); my $cmd = "cd $work_dir" . " && git checkout $tag_name 2>&1" . " && git clean -xdf 2>&1"; @@ -457,7 +460,7 @@ sub checkout_vid { } # Note: will skip both of these for bug mining, for two reasons: - # (1) it isnt necessary and (2) we don't have dependencies yet. + # (1) it isn't necessary and (2) we don't have dependencies yet. # Fix test suite if necessary $self->fix_tests("${bid}f"); # Write version-specific properties @@ -472,7 +475,7 @@ sub checkout_vid { Utils::exec_cmd($cmd, "Initialize fixed program version") or confess("Couldn't tag fixed program version!"); - # Apply patch to obtain buggy version + # Apply patch to obtain the buggy version my $patch_dir = "$PROJECTS_DIR/$pid/patches"; my $src_patch = "$patch_dir/${bid}.src.patch"; $self->apply_patch($work_dir, $src_patch) or return 0; @@ -489,19 +492,60 @@ sub checkout_vid { Utils::exec_cmd($cmd, "Initialize buggy program version") or confess("Couldn't tag buggy program version!"); + # TODO: For now we create two tags for buggy and minimal buggy for + # backward-compatibility. Since these are identical, we should only tag the + # minimal buggy version and treat 'b' as an alias for 'b.min'. + # + # Write program and version id of buggy program version to config file + Utils::write_config_file("$work_dir/$CONFIG", {$CONFIG_PID => $pid, $CONFIG_VID => "${bid}b.min"}); + + # Commit and tag the buggy program version + $tag_name = Utils::tag_prefix($pid, $bid) . $TAG_BUGGY_MIN; + $cmd = "cd $work_dir" . + " && git add -A 2>&1" . + " && git commit -a -m \"$tag_name\" 2>&1" . + " && git tag $tag_name 2>&1"; + Utils::exec_cmd($cmd, "Initialize buggy program version (minimal)") + or confess("Couldn't tag buggy program version!"); + + # Temporary patch file + my $tmp_patch = "$work_dir/.defects4j.diff"; + my $rev_f = $self->lookup("${bid}f"); + my $rev_b = $self->lookup("${bid}b"); + + # Checkout the fixed version and apply original source-code diff + $cmd = "cd $work_dir && git checkout " . Utils::tag_prefix($pid, $bid) . "$TAG_FIXED 2>&1"; + `$cmd`; $?==0 or confess("Couldn't checkout $TAG_FIXED"); + + # Apply original source-code patch to obtain the original buggy version + $self->export_diff($rev_f, $rev_b, "$tmp_patch", $self->src_dir($vid)); + $self->apply_patch($work_dir, $tmp_patch) or return 0; + + # Remove temporary patch + system("rm $tmp_patch"); + + # Write program and version id of buggy program version to config file + Utils::write_config_file("$work_dir/$CONFIG", {$CONFIG_PID => $pid, $CONFIG_VID => "${bid}b.orig"}); + + # Commit and tag the buggy program version + $tag_name = Utils::tag_prefix($pid, $bid) . $TAG_BUGGY_ORIG; + $cmd = "cd $work_dir" . + " && git add -A 2>&1" . + " && git commit -a -m \"$tag_name\" 2>&1" . + " && git tag $tag_name 2>&1"; + Utils::exec_cmd($cmd, "Initialize buggy program version (original)") + or confess("Couldn't tag buggy program version!"); + # Checkout post-fix revision and apply unmodified diff to obtain the pre-fix revision - my $tmp_file = "$work_dir/.defects4j.diff"; $cmd = "cd $work_dir && git checkout " . Utils::tag_prefix($pid, $bid) . "$TAG_POST_FIX 2>&1"; `$cmd`; $?==0 or confess("Couldn't checkout $TAG_POST_FIX"); - my $rev1 = $self->lookup("${bid}f"); - my $rev2 = $self->lookup("${bid}b"); # TODO: svn doesn't support diffing of binary files # -> checkout and tag the pre-fix revision instead - $self->{_vcs}->export_diff($rev1, $rev2, $tmp_file); - $self->{_vcs}->apply_patch($work_dir, $tmp_file); + $self->{_vcs}->export_diff($rev_f, $rev_b, $tmp_patch); + $self->{_vcs}->apply_patch($work_dir, $tmp_patch); - # Remove temporary diff file - system("rm $tmp_file"); + # Remove temporary patch + system("rm $tmp_patch"); # Commit and tag the pre-fix revision $tag_name = Utils::tag_prefix($pid, $bid) . $TAG_PRE_FIX; @@ -513,7 +557,10 @@ sub checkout_vid { or confess("Couldn't tag pre-fix revision!"); # Checkout the requested program version - $tag_name = Utils::tag_prefix($pid, $bid) . ($version_type eq "b" ? $TAG_BUGGY : $TAG_FIXED); + $tag_name = Utils::tag_prefix($pid, $bid) . + ($version_type eq "b" ? $TAG_BUGGY : + $version_type eq "b.min" ? $TAG_BUGGY_MIN : + $version_type eq "b.orig" ? $TAG_BUGGY_ORIG : $TAG_FIXED); $cmd = "cd $work_dir && git checkout $tag_name 2>&1"; Utils::exec_cmd($cmd, "Check out program version: $pid-$vid") or confess("Couldn't check out program version!"); diff --git a/framework/core/Utils.pm b/framework/core/Utils.pm index 631246527..d83407ed6 100644 --- a/framework/core/Utils.pm +++ b/framework/core/Utils.pm @@ -446,8 +446,9 @@ success, write: sub check_vid { @_ == 1 or die $ARG_ERROR; - my ($vid) = @_; - $vid =~ /^(\d+)([bf])$/ or confess("Wrong version_id: $vid -- expected \\d+[bf]!"); + my $vid = shift; + $vid =~ /^(\d+)(f|b|b\.min|b\.orig)$/ + or confess("Wrong version_id: '$vid' -- expected: \\d+(f|b|b.min|b.orig)\n"); return {valid => 1, bid => $1, type => $2}; } diff --git a/framework/core/Vcs.pm b/framework/core/Vcs.pm index 05d31aba5..a3e47520a 100644 --- a/framework/core/Vcs.pm +++ b/framework/core/Vcs.pm @@ -173,10 +173,11 @@ using L. sub lookup { @_ == 2 or die $ARG_ERROR; my ($self, $vid) = @_; - Utils::check_vid($vid); - $vid =~ /^(\d+)([bf])$/ or die "Unexpected version id: $vid"; - defined $self->{_cache}->{$1}->{$2} or die "Version id does not exist: $vid!"; - return $self->{_cache}->{$1}->{$2}; + my $result = Utils::check_vid($vid); + my $bid = $result->{bid}; + my $type = $result->{type}; + defined $self->{_cache}->{$bid}->{$type} or confess("Version id does not exist: '$vid'\n"); + return $self->{_cache}->{$bid}->{$type}; } =pod @@ -230,7 +231,6 @@ sub get_bug_ids { Given a valid version id (C), this subroutine returns true if C exists in the L file and false otherwise. -Format of C checked using L. This subroutine dies if C is invalid. =cut @@ -248,7 +248,6 @@ sub contains_version_id { Performs a lookup of C in the L file followed by a checkout of the corresponding revision with C to F. -Format of C checked using L. B. @@ -435,14 +434,14 @@ sub rev_date { sub _build_db_cache { @_ == 1 or die $ARG_ERROR; my ($db) = @_; - open (IN, "<$db") or die "Cannot open $BUGS_CSV_ACTIVE $db: $!"; + open (IN, "<$db") or die "Cannot open $BUGS_CSV_ACTIVE '$db': $!"; my $cache = {}; my $header = ; while () { chomp; /(\d+),([^,]+),([^,]+),([^,]+),([^,]+)/ or die "Corrupted $BUGS_CSV_ACTIVE!"; - $cache->{$1} = {b => $2, f => $3, line => $_}; + $cache->{$1} = {'b.min' => $2, 'b.orig' => $2, b => $2, f => $3, line => $_}; } close IN; diff --git a/framework/test/test.include b/framework/test/test.include index 5dcc15e32..dfde42ace 100755 --- a/framework/test/test.include +++ b/framework/test/test.include @@ -112,6 +112,10 @@ get_bug_ids() { tail -n +2 $1 | cut -f1 -d',' | tr '\n' ' ' } +# Determine and print all project ids found in the Project directory. +get_project_ids() { + echo $(cd $BASE_DIR/framework/core/Project && ls *.pm | sed -e's/.pm//g') +} ################################################################################ # Determine and print all test suites that exist in the provided suite_dir diff --git a/framework/test/test_version_ids.sh b/framework/test/test_version_ids.sh new file mode 100755 index 000000000..bafe56dd5 --- /dev/null +++ b/framework/test/test_version_ids.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +################################################################################ +# +# This script tests whether Defects4J correctly parses and interprets the four +# possible versions (f, b, b.min, b.orig) for two bugs of each project. +# +################################################################################ +# Import helper subroutines and variables, and init Defects4J +source test.include +init + +################################################################################ +test_vids() { + local pid=$1 + local bid=$2 + + local work_dir=$TMP_DIR/$pid-$bid + # Test the buggy version + for type in f b b.min b.orig; do + local vid=${bid}${type} + # Checkout buggy version + defects4j checkout -p $pid -v $vid -w $work_dir || die "checkout program version $pid-$vid" + + # Verify that defects4j's config file exists + [ -e $work_dir/.defects4j.config ] || die "read config file" + # Verify that defects4j's config file provides the correct data + grep -q "pid=$pid" $work_dir/.defects4j.config || die "verify pid in config file" + grep -q "vid=$vid" $work_dir/.defects4j.config || die "verify vid in config file" + done + + # Diff between b.min and b + # -> only .defects4j.config must change + defects4j checkout -p $pid -v ${bid}b.min -w $work_dir + cd $work_dir && git diff D4J_${pid}_${bid}_BUGGY_VERSION | diffstat -t > $work_dir/diff.stats + num=$(cat $work_dir/diff.stats | wc -l) + [ $num -eq 2 ] || die "verify changed files between ${pid}-${bid}b.min and ${pid}-${bid}b" + grep -q "^1,1,0,.defects4j.config" $work_dir/diff.stats || die "verify diff between ${pid}-${bid}b.min and ${pid}-${bid}b" + + # Diff between b.orig and f + # -> same changes as diff between pre-fix and post-fix revisions -- source path only) + tag_1=D4J_${pid}_${bid}_PRE_FIX_REVISION + tag_2=D4J_${pid}_${bid}_POST_FIX_REVISION + rev_f=$(grep "^$bid," $BASE_DIR/framework/projects/$pid/commit-db | cut -f3 -d',') + src_dir=$(grep "^${rev_f}," $BASE_DIR/framework/projects/$pid/dir-layout.csv | cut -f2 -d',') + + defects4j checkout -p $pid -v ${bid}b.orig -w $work_dir + (cd $work_dir && git diff D4J_${pid}_${bid}_FIXED_VERSION | filterdiff -x '*/.defects4j.config' | diffstat -t > $work_dir/diff.1.stats) + # Special case for older versions of JodaTime + if [ "$pid" == "Time" ] && git cat-file -e $tag_1:JodaTime 2>/dev/null; then + (cd $work_dir && git diff ${tag_2} ${tag_1} -- "JodaTime/$src_dir" | diffstat -t > $work_dir/diff.2.stats) + else + (cd $work_dir && git diff ${tag_2} ${tag_1} -- $src_dir | diffstat -t > $work_dir/diff.2.stats) + fi + perl -E "print '-' x 75, \"\\n\"" + cat $work_dir/diff.*.stats + perl -E "print '-' x 75, \"\\n\"" + cmp -s $work_dir/diff.1.stats $work_dir/diff.2.stats || die "verify changed files between ${pid}-${bid}b.orig and ${pid}-${bid}f" + + # Run the tests on the original buggy version and verify triggering tests + # Mockito require an explicit call to compile before calling test + defects4j compile -w $work_dir + defects4j test -r -w $work_dir + triggers=$(num_triggers "$work_dir/failing_tests") + expected=$(num_triggers "$BASE_DIR/framework/projects/$pid/trigger_tests/$bid") + [ $triggers -eq $expected ] \ + || die "verify number of triggering tests: $pid-$vid (expected: $expected, actual: $triggers)" + for t in $(get_triggers "$BASE_DIR/framework/projects/$pid/trigger_tests/$bid"); do + grep -q "$t" "$work_dir/failing_tests" || die "expected triggering test $t did not fail" + done +} +################################################################################ + +PROJECTS_DIR=$BASE_DIR/framework/projects + +# Test first and last bug in each project +for pid in $(get_project_ids); do + ids=$(get_bug_ids $PROJECTS_DIR/$pid/commit-db) + bug_1=$(echo $ids | cut -f1 -d" ") + bug_n=$(echo $ids | rev | cut -f1 -d" " | rev) + test_vids $pid $bug_1 && test_vids $pid $bug_n +done