Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 11 additions & 51 deletions ArchiveDropView.m
Original file line number Diff line number Diff line change
Expand Up @@ -61,21 +61,25 @@ - (NSImage *)image

- (void)logError:(NSString*) message
{
printf("\x1b[1;91m%s\x1b[0m\n", message.UTF8String);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should remove these printfs. I believe these are here to show output when running from the command line version - they might be better to just be in the logMessage function and not do the colours as we're assuming the colour scheme of the terminal.

logMessage(logOutput, [NSColor redColor], message);
}

- (void)logWarning:(NSString*) message
{
printf("\x1b[33m%s\x1b[0m\n", message.UTF8String);
logMessage(logOutput, [NSColor orangeColor], message);
}

- (void)logInfo:(NSString*) message
{
printf("\x1b[34m%s\x1b[0m\n", message.UTF8String);
logMessage(logOutput, [NSColor blueColor], message);
}

- (void)logResult:(NSString*) message
{
printf("\x1b[1;32m%s\x1b[0m\n", message.UTF8String);
logMessage(logOutput, [NSColor darkGrayColor], message);
}

Expand All @@ -85,15 +89,6 @@ - (BOOL)performDragOperation:(id <NSDraggingInfo>)sender
[logOutput insertText:@"" replacementRange:logOutput.selectedRange];
NSPasteboard *pboard = [sender draggingPasteboard];

///////////////////////////////////
// This probably shouldn't be here

//get the user defined index name
NSString * indexFileName = [[userDefaults values] valueForKey:@"WAEIndexName"];
if (indexFileName == nil || [indexFileName length] == 0) {
indexFileName = @"index.html";
}

//get the user selected output type
//HACK alert. I need to figure out a better way to do this. I thought the User
//types from the select box would get an object, but it only returns a string :-/
Expand All @@ -113,53 +108,18 @@ - (BOOL)performDragOperation:(id <NSDraggingInfo>)sender
if (URLPrepend == nil || [URLPrepend length] == 0) {
URLPrepend = @"";
}
///////////////////////////////////

if ( [[pboard types] containsObject:NSFilenamesPboardType] ) {
NSArray *files = [pboard propertyListForType:NSFilenamesPboardType];
NSUInteger numberOfFiles = [files count];
//NSLog(@"%i\n", numberOfFiles);
NSUInteger i;
for (i=0; i<numberOfFiles; i++)
{
NSString* fileName = [files objectAtIndex:i];

[self logInfo:[NSString stringWithFormat: NSLocalizedStringFromTable(@"processing", @"InfoPlist", @"processing file: 1 name"), fileName] ];

if ([fileName hasSuffix:@"webarchive"])
{
NSFileManager * fm = [NSFileManager defaultManager];
NSString * dirPath = [fileName stringByDeletingLastPathComponent];

if ([fm isWritableFileAtPath:dirPath])
{
NSString * archiveName = [[fileName lastPathComponent] stringByDeletingPathExtension];
NSString * outputPath = [dirPath stringByAppendingPathComponent: archiveName];

NSUInteger i = 0;
while([fm fileExistsAtPath:outputPath])
{
[self logWarning:[NSString stringWithFormat: NSLocalizedStringFromTable(@"folder exists", @"InfoPlist", @"folder already exists: 1 name"), outputPath] ];
NSString * dirName = [archiveName stringByAppendingString:@"-%tu"];
outputPath = [dirPath stringByAppendingPathComponent: [NSString stringWithFormat: dirName, i++]];
}

Extractor * extr = [[Extractor alloc] init];
[extr loadWebArchive: fileName];
[extr setEntryFileName: indexFileName];
[extr setContentKind: type];
[extr setURLPrepend: URLPrepend];
NSString * mainResourcePath = [extr extractResources: outputPath];

[self logResult:[NSString stringWithFormat: NSLocalizedStringFromTable(@"extract success", @"InfoPlist", @"extract success 1=folder name 2=main file"), outputPath, mainResourcePath]];

}
}
else
{
[self logError: NSLocalizedStringFromTable(@"not archive", @"InfoPlist", @"")];
}
}
NSUInteger i;
for (i=0; i<numberOfFiles; i++)
{
NSString* fileName = [files objectAtIndex:i];
Extractor * extr = [[Extractor alloc] init];
[extr extractAuto:fileName dropViewRef:self];
}
}
return YES;
}
Expand Down
Binary file modified English.lproj/InfoPlist.strings
Binary file not shown.
12 changes: 10 additions & 2 deletions English.lproj/MainMenu.nib/designable.nib

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions Extractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#import <Cocoa/Cocoa.h>
#import <WebKit/WebKit.h>
#import "ArchiveDropView.h"

@interface Extractor : NSObject
{
Expand All @@ -29,8 +30,16 @@
NSXMLDocumentContentKind contentKind;
/** URL to add to the begining of the hrefs / srcs */
NSString * URLPrepend;
/** the directory in which to output contents. if length 0, use archiveName */
NSString * outputPath;
IBOutlet NSUserDefaultsController *userDefaults;
}

/**
* all in one extraction operation from filename
*/
- (void) extractAuto:(NSString*) fileName dropViewRef: (ArchiveDropView*) dropViewRef;

/**
* load web archive file
*/
Expand Down Expand Up @@ -75,4 +84,6 @@ added by Robert Covington to handle archives with subframeArchives
- (void) setURLPrepend:(NSString *) url;
- (NSString *) URLPrepend;

- (void) setOutputPath:(NSString *) path;

@end
101 changes: 101 additions & 0 deletions Extractor.m
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#import "Extractor.h"


static NSString* composeEntryPointPath(NSString* packagePath, NSString* indexName)
{
return [packagePath stringByAppendingPathComponent:indexName];
Expand All @@ -23,12 +24,107 @@ - (id) init
{
self = [super init];
if(self != nil) {
///////////////////////////////////
// initialize properties with userDefaults settings

//get the user defined index name
entryFileName = [[userDefaults values] valueForKey:@"WAEIndexName"];
if (entryFileName == nil || [entryFileName length] == 0) {
entryFileName = @"index.html";
}

//default to XHTML if there is nothing else
contentKind = NSXMLDocumentXHTMLKind;

//get the user selected output type
//HACK alert. I need to figure out a better way to do this. I thought the User
//types from the select box would get an object, but it only returns a string :-/
NSString * outputType = [[userDefaults values] valueForKey:@"WAEOutputType"];
NSXMLDocumentContentKind type = NSXMLDocumentXHTMLKind;
if ( [outputType isEqualToString:@"HTML"] ) {
type = NSXMLDocumentHTMLKind;
} else if ( [outputType isEqualToString:@"XML"] ) {
type = NSXMLDocumentXMLKind;
} else if ( [outputType isEqualToString:@"XHTML"] ) {
type = NSXMLDocumentXHTMLKind;
} else if ( [outputType isEqualToString:@"Text"] ) {
type = NSXMLDocumentTextKind;
}

// get url prepend
NSString * URLPrepend = [[userDefaults values] valueForKey:@"WAEURLOffset"];
if (URLPrepend == nil || [URLPrepend length] == 0) {
URLPrepend = @"";
}

// set default output path
outputPath = @"";
///////////////////////////////////

}
return self;
}

-(void) extractAuto:(NSString *)fileName
dropViewRef:(ArchiveDropView *)dropViewRef
{
// If not running with gui, save relative to CWD
// Also make an ArchiveDropView for logging
NSString * dirPath = [fileName stringByDeletingLastPathComponent];
if (dropViewRef == nil) {
dirPath = @"./";
dropViewRef = [[ArchiveDropView alloc] init];
}
[dropViewRef logInfo:[NSString stringWithFormat: NSLocalizedStringFromTable(@"processing", @"InfoPlist", @"processing file: 1 name"), fileName] ];


if ([fileName hasSuffix:@"webarchive"])
{
NSFileManager * fm = [NSFileManager defaultManager];
NSString * archiveName = [[fileName lastPathComponent] stringByDeletingPathExtension];

// return if not readable
if (![fm isReadableFileAtPath:fileName]) {
[dropViewRef logError:NSLocalizedStringFromTable(@"cannot read", @"InfoPlist", @"")];
return;
}

if ([fm isWritableFileAtPath:dirPath])
{
// set output path to archiveName if empty
if ([outputPath isEqual: @""]) {
outputPath = [dirPath stringByAppendingPathComponent: archiveName];
}

NSUInteger i = 0;
while([fm fileExistsAtPath:outputPath])
{
[dropViewRef logWarning:[NSString stringWithFormat: NSLocalizedStringFromTable(@"folder exists", @"InfoPlist", @"folder already exists: 1 name"), outputPath]];
NSString * dirName = [archiveName stringByAppendingString:@"-%tu"];
outputPath = [dirPath stringByAppendingPathComponent: [NSString stringWithFormat: dirName, i++]];
}

[self loadWebArchive: fileName];
[self setURLPrepend: URLPrepend];
NSString * mainResourcePath = [self extractResources: outputPath];

if (mainResourcePath != nil) {
[dropViewRef logResult:[NSString stringWithFormat: NSLocalizedStringFromTable(@"extract success", @"InfoPlist", @"extract success 1=folder name 2=main file"), outputPath, mainResourcePath]];
} else {
[dropViewRef logError:NSLocalizedStringFromTable(@"unknown", @"InfoPlist", @"")];
}

} else {
[dropViewRef logError:NSLocalizedStringFromTable(@"cannot write", @"InfoPlist", @"")];
}
}
else
{
[dropViewRef logError:NSLocalizedStringFromTable(@"not archive", @"InfoPlist", @"")];
}
}


-(void) loadWebArchive:(NSString*) pathToWebArchive
{
if (m_resources)
Expand Down Expand Up @@ -351,4 +447,9 @@ - (NSXMLDocumentContentKind) contentKind
return contentKind;
}

- (void) setOutputPath: (NSString*) path
{
outputPath = path;
}

@end
90 changes: 44 additions & 46 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,66 +1,64 @@
# WebArchiveExtractor

## Building Help
Mac OS X utility to un-archive .webarchive files (like when saving from Safari)

```
cd WebArchiveExtractorHelp
```
This project was forked from [Vitaly Davidenko's repo on sourceforge](https://sourceforge.net/projects/webarchivext/).

```
hiutil -I corespotlight -Caf WebArchiveExtractorHelp.cshelpindex -vv .
```
## Usage

```
hiutil -I lsm -Caf WebArchiveExtractorHelp.helpindex -vv .
```
You can use the utility graphically by launching WebArchiveExtractor.app directly. [See interface here.](https://robrohan.github.io/WebArchiveExtractor/)

Verify:

```
hiutil -I corespotlight -Tvf WebArchiveExtractorHelp.cshelpindex
```
You can also run the same executable from from the command line:

```sht
./WebArchiveExtractor.app/Contents/MacOS/WebArchiveExtractor
```
mv WebArchiveExtractorHelp WebArchiveExtractorHelp.help
```

---
Running with no arguments will just launch the GUI.

NOTE: this file is from the original sourceforge code. There is no Automator code in this forked version
> An ancestor of this project supported Automator Actions at one point. This project does not have this functionality. Use the CLI for programmatic access.

Release notes
**CLI Usage**

Version 0.1 - initial release
This release contains two independent parts
---

Part 1. Application 'Web Archive Extractor'

files:
WebArchiveExtractor.zip contains Application
Extract contents of `website.webarchive` to a directory named `website` relative to CWD:
```sh
WebArchiveExtractor website.webarchive
```
```sh
WebArchiveExtractor -i website.webarchive
```

To install 'Web Archive Extractor'
- unpack WebArchiveExtractor.zip
- copy WebArchiveExtractor into /Application folder
---

Define explicit output directory:
```sh
WebArchiveExtractor website.webarchive -o out
```

Part 2. Automator Action
## Build
You *should* be able to automatically build and sign a release for local execution by running this command in the root of the project, even if you are not an Apple developer (assuming you've got the Xcode CLI tools):

files:
Automator-WebArchiveExtractorAction.action.zip contains Automator Plugin
Automator-ExtractWebarchive.zip contains sample workflow
> Update: You need to do two things first:<br><br>
> - Step 1. Download Xcode from the App Store (**Note: You don't have to run it, if you do it'll use up a bunch of disk space**)<br>
> - Step 2. Run `xcodebuild -runFirstLaunch` in the Terminal

To install Automator Action
- unpack zip
- copy WebArchiveExtractorAction.action into /Users/<your username>/Library/Automator folder
```sh
xcodebuild -project WebArchiveExtractor.xcodeproj
```
If the command fails, you'll need to open the project in Xcode to investigate.


Version 0.2
Version 0.2 improves stability and addresses a number of other minor issues.
-crash on releasing of autorelease pool fixed (in NSCoreDragReceiveProc)
-main resource name changed to webarchive-index.html
-bundle identifiers changed
The resulting `WebArchiveExtractor.app` should be in `build/Release`. To install, you can just drag it to your Applications directory.

files:
WebArchiveExtractor.0.2.zip contains Application
Automator-WebArchiveExtractorAction.0.2.action.zip contains Automator Plugin
> Keep in mind that the executable is inside the `.app` bundle. To reference the command in your shell, you can do something like either of the following:

.
Add to PATH:
```sh
# Add this to your shell's rc file:
export PATH="$PATH:/Applications/WebArchiveExtractor.app/Contents/MacOS/"
```
Symlink to a location already in PATH:
```sh
ln -s /Applications/WebArchiveExtractor.app/Contents/MacOS/WebArchiveExtractor ~/.local/bin/WebArchiveExtractor
```
Loading