diff --git a/ArchiveDropView.m b/ArchiveDropView.m index 0127a05..f57a982 100644 --- a/ArchiveDropView.m +++ b/ArchiveDropView.m @@ -61,21 +61,25 @@ - (NSImage *)image - (void)logError:(NSString*) message { + printf("\x1b[1;91m%s\x1b[0m\n", message.UTF8String); logMessage(logOutput, [NSColor redColor], message); } - (void)logWarning:(NSString*) message { + printf("\x1b[33m%s\x1b[0m\n", message.UTF8String); logMessage(logOutput, [NSColor orangeColor], message); } - (void)logInfo:(NSString*) message { + printf("\x1b[34m%s\x1b[0m\n", message.UTF8String); logMessage(logOutput, [NSColor blueColor], message); } - (void)logResult:(NSString*) message { + printf("\x1b[1;32m%s\x1b[0m\n", message.UTF8String); logMessage(logOutput, [NSColor darkGrayColor], message); } @@ -85,15 +89,6 @@ - (BOOL)performDragOperation:(id )sender [logOutput insertText:@"" replacementRange:logOutput.selectedRange]; NSPasteboard *pboard = [sender draggingPasteboard]; - /////////////////////////////////// - // This probably shouldn't be here - - //get the user defined index name - NSString * indexFileName = [[userDefaults values] valueForKey:@"WAEIndexName"]; - if (indexFileName == nil || [indexFileName length] == 0) { - indexFileName = @"index.html"; - } - //get the user selected output type //HACK alert. I need to figure out a better way to do this. I thought the User //types from the select box would get an object, but it only returns a string :-/ @@ -113,53 +108,18 @@ - (BOOL)performDragOperation:(id )sender if (URLPrepend == nil || [URLPrepend length] == 0) { URLPrepend = @""; } - /////////////////////////////////// if ( [[pboard types] containsObject:NSFilenamesPboardType] ) { NSArray *files = [pboard propertyListForType:NSFilenamesPboardType]; NSUInteger numberOfFiles = [files count]; //NSLog(@"%i\n", numberOfFiles); - NSUInteger i; - for (i=0; i - + + + + + @@ -460,6 +464,10 @@ - + + + + + diff --git a/Extractor.h b/Extractor.h index a1d729b..765e3bc 100644 --- a/Extractor.h +++ b/Extractor.h @@ -12,6 +12,7 @@ #import #import +#import "ArchiveDropView.h" @interface Extractor : NSObject { @@ -29,8 +30,16 @@ NSXMLDocumentContentKind contentKind; /** URL to add to the begining of the hrefs / srcs */ NSString * URLPrepend; + /** the directory in which to output contents. if length 0, use archiveName */ + NSString * outputPath; + IBOutlet NSUserDefaultsController *userDefaults; } +/** + * all in one extraction operation from filename + */ +- (void) extractAuto:(NSString*) fileName dropViewRef: (ArchiveDropView*) dropViewRef; + /** * load web archive file */ @@ -75,4 +84,6 @@ added by Robert Covington to handle archives with subframeArchives - (void) setURLPrepend:(NSString *) url; - (NSString *) URLPrepend; +- (void) setOutputPath:(NSString *) path; + @end diff --git a/Extractor.m b/Extractor.m index 3a8d69e..53bc538 100644 --- a/Extractor.m +++ b/Extractor.m @@ -12,6 +12,7 @@ #import "Extractor.h" + static NSString* composeEntryPointPath(NSString* packagePath, NSString* indexName) { return [packagePath stringByAppendingPathComponent:indexName]; @@ -23,12 +24,107 @@ - (id) init { self = [super init]; if(self != nil) { + /////////////////////////////////// + // initialize properties with userDefaults settings + + //get the user defined index name + entryFileName = [[userDefaults values] valueForKey:@"WAEIndexName"]; + if (entryFileName == nil || [entryFileName length] == 0) { + entryFileName = @"index.html"; + } + //default to XHTML if there is nothing else contentKind = NSXMLDocumentXHTMLKind; + + //get the user selected output type + //HACK alert. I need to figure out a better way to do this. I thought the User + //types from the select box would get an object, but it only returns a string :-/ + NSString * outputType = [[userDefaults values] valueForKey:@"WAEOutputType"]; + NSXMLDocumentContentKind type = NSXMLDocumentXHTMLKind; + if ( [outputType isEqualToString:@"HTML"] ) { + type = NSXMLDocumentHTMLKind; + } else if ( [outputType isEqualToString:@"XML"] ) { + type = NSXMLDocumentXMLKind; + } else if ( [outputType isEqualToString:@"XHTML"] ) { + type = NSXMLDocumentXHTMLKind; + } else if ( [outputType isEqualToString:@"Text"] ) { + type = NSXMLDocumentTextKind; + } + + // get url prepend + NSString * URLPrepend = [[userDefaults values] valueForKey:@"WAEURLOffset"]; + if (URLPrepend == nil || [URLPrepend length] == 0) { + URLPrepend = @""; + } + + // set default output path + outputPath = @""; + /////////////////////////////////// + } return self; } +-(void) extractAuto:(NSString *)fileName + dropViewRef:(ArchiveDropView *)dropViewRef +{ + // If not running with gui, save relative to CWD + // Also make an ArchiveDropView for logging + NSString * dirPath = [fileName stringByDeletingLastPathComponent]; + if (dropViewRef == nil) { + dirPath = @"./"; + dropViewRef = [[ArchiveDropView alloc] init]; + } + [dropViewRef logInfo:[NSString stringWithFormat: NSLocalizedStringFromTable(@"processing", @"InfoPlist", @"processing file: 1 name"), fileName] ]; + + + if ([fileName hasSuffix:@"webarchive"]) + { + NSFileManager * fm = [NSFileManager defaultManager]; + NSString * archiveName = [[fileName lastPathComponent] stringByDeletingPathExtension]; + + // return if not readable + if (![fm isReadableFileAtPath:fileName]) { + [dropViewRef logError:NSLocalizedStringFromTable(@"cannot read", @"InfoPlist", @"")]; + return; + } + + if ([fm isWritableFileAtPath:dirPath]) + { + // set output path to archiveName if empty + if ([outputPath isEqual: @""]) { + outputPath = [dirPath stringByAppendingPathComponent: archiveName]; + } + + NSUInteger i = 0; + while([fm fileExistsAtPath:outputPath]) + { + [dropViewRef logWarning:[NSString stringWithFormat: NSLocalizedStringFromTable(@"folder exists", @"InfoPlist", @"folder already exists: 1 name"), outputPath]]; + NSString * dirName = [archiveName stringByAppendingString:@"-%tu"]; + outputPath = [dirPath stringByAppendingPathComponent: [NSString stringWithFormat: dirName, i++]]; + } + + [self loadWebArchive: fileName]; + [self setURLPrepend: URLPrepend]; + NSString * mainResourcePath = [self extractResources: outputPath]; + + if (mainResourcePath != nil) { + [dropViewRef logResult:[NSString stringWithFormat: NSLocalizedStringFromTable(@"extract success", @"InfoPlist", @"extract success 1=folder name 2=main file"), outputPath, mainResourcePath]]; + } else { + [dropViewRef logError:NSLocalizedStringFromTable(@"unknown", @"InfoPlist", @"")]; + } + + } else { + [dropViewRef logError:NSLocalizedStringFromTable(@"cannot write", @"InfoPlist", @"")]; + } + } + else + { + [dropViewRef logError:NSLocalizedStringFromTable(@"not archive", @"InfoPlist", @"")]; + } +} + + -(void) loadWebArchive:(NSString*) pathToWebArchive { if (m_resources) @@ -351,4 +447,9 @@ - (NSXMLDocumentContentKind) contentKind return contentKind; } +- (void) setOutputPath: (NSString*) path +{ + outputPath = path; +} + @end diff --git a/README.md b/README.md index 53b1730..d664530 100644 --- a/README.md +++ b/README.md @@ -1,66 +1,64 @@ +# WebArchiveExtractor -## Building Help +Mac OS X utility to un-archive .webarchive files (like when saving from Safari) -``` -cd WebArchiveExtractorHelp -``` +This project was forked from [Vitaly Davidenko's repo on sourceforge](https://sourceforge.net/projects/webarchivext/). -``` -hiutil -I corespotlight -Caf WebArchiveExtractorHelp.cshelpindex -vv . -``` +## Usage -``` -hiutil -I lsm -Caf WebArchiveExtractorHelp.helpindex -vv . -``` +You can use the utility graphically by launching WebArchiveExtractor.app directly. [See interface here.](https://robrohan.github.io/WebArchiveExtractor/) -Verify: - -``` -hiutil -I corespotlight -Tvf WebArchiveExtractorHelp.cshelpindex -``` +You can also run the same executable from from the command line: +```sht +./WebArchiveExtractor.app/Contents/MacOS/WebArchiveExtractor ``` -mv WebArchiveExtractorHelp WebArchiveExtractorHelp.help -``` - ---- +Running with no arguments will just launch the GUI. -NOTE: this file is from the original sourceforge code. There is no Automator code in this forked version +> An ancestor of this project supported Automator Actions at one point. This project does not have this functionality. Use the CLI for programmatic access. -Release notes +**CLI Usage** -Version 0.1 - initial release -This release contains two independent parts +--- -Part 1. Application 'Web Archive Extractor' - -files: -WebArchiveExtractor.zip contains Application +Extract contents of `website.webarchive` to a directory named `website` relative to CWD: +```sh +WebArchiveExtractor website.webarchive +``` +```sh +WebArchiveExtractor -i website.webarchive +``` -To install 'Web Archive Extractor' - - unpack WebArchiveExtractor.zip - - copy WebArchiveExtractor into /Application folder +--- +Define explicit output directory: +```sh +WebArchiveExtractor website.webarchive -o out +``` -Part 2. Automator Action +## Build +You *should* be able to automatically build and sign a release for local execution by running this command in the root of the project, even if you are not an Apple developer (assuming you've got the Xcode CLI tools): -files: -Automator-WebArchiveExtractorAction.action.zip contains Automator Plugin -Automator-ExtractWebarchive.zip contains sample workflow +> Update: You need to do two things first:

+> - Step 1. Download Xcode from the App Store (**Note: You don't have to run it, if you do it'll use up a bunch of disk space**)
+> - Step 2. Run `xcodebuild -runFirstLaunch` in the Terminal -To install Automator Action - - unpack zip - - copy WebArchiveExtractorAction.action into /Users//Library/Automator folder +```sh +xcodebuild -project WebArchiveExtractor.xcodeproj +``` +If the command fails, you'll need to open the project in Xcode to investigate. -Version 0.2 -Version 0.2 improves stability and addresses a number of other minor issues. --crash on releasing of autorelease pool fixed (in NSCoreDragReceiveProc) --main resource name changed to webarchive-index.html --bundle identifiers changed +The resulting `WebArchiveExtractor.app` should be in `build/Release`. To install, you can just drag it to your Applications directory. -files: -WebArchiveExtractor.0.2.zip contains Application -Automator-WebArchiveExtractorAction.0.2.action.zip contains Automator Plugin +> Keep in mind that the executable is inside the `.app` bundle. To reference the command in your shell, you can do something like either of the following: -. +Add to PATH: +```sh +# Add this to your shell's rc file: +export PATH="$PATH:/Applications/WebArchiveExtractor.app/Contents/MacOS/" +``` +Symlink to a location already in PATH: +```sh +ln -s /Applications/WebArchiveExtractor.app/Contents/MacOS/WebArchiveExtractor ~/.local/bin/WebArchiveExtractor +``` \ No newline at end of file diff --git a/WebArchiveExtractor.xcodeproj/project.pbxproj b/WebArchiveExtractor.xcodeproj/project.pbxproj index 384f7bf..e40d4e7 100644 --- a/WebArchiveExtractor.xcodeproj/project.pbxproj +++ b/WebArchiveExtractor.xcodeproj/project.pbxproj @@ -341,6 +341,7 @@ INFOPLIST_FILE = Info.plist; INSTALL_PATH = "$(HOME)/Applications"; MACOSX_DEPLOYMENT_TARGET = 10.13; + OTHER_CODE_SIGN_FLAGS = "--deep"; PRODUCT_BUNDLE_IDENTIFIER = com.robrohan.WebArchiveExtractor; PRODUCT_NAME = WebArchiveExtractor; PROVISIONING_PROFILE = ""; @@ -371,6 +372,7 @@ INFOPLIST_FILE = Info.plist; INSTALL_PATH = "$(HOME)/Applications"; MACOSX_DEPLOYMENT_TARGET = 10.13; + OTHER_CODE_SIGN_FLAGS = "--deep"; PRODUCT_BUNDLE_IDENTIFIER = com.robrohan.WebArchiveExtractor; PRODUCT_NAME = WebArchiveExtractor; PROVISIONING_PROFILE_SPECIFIER = ""; diff --git a/main.m b/main.m index 0de11f7..1a21ad2 100644 --- a/main.m +++ b/main.m @@ -7,8 +7,57 @@ // #import +#import "Extractor.h" + +/** Check for input filename in CLI args, if any. + * Input arg qualifies as either: + * - arg at index 1 that does not start with "-" + * - arg directly after an arg that matches exactly "-i" + * Return index of filename, or 0 on failure + */ +char * findInputArg(int argc, char *argv[]) { + if (argc < 2) { return nil; } + if (argv[1][0]!='-') { return argv[1]; } + for (int i=1; i