diff --git a/README.md b/README.md index d50bc04..8b33884 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Command-line tool for accessing Scalyr services. The following commands are curr - [**numeric-query**](#fetching-numeric-data): Retrieve numeric / graph data - [**facet-query**](#fetching-facet-counts): Retrieve common values for a field - [**timeseries-query**](#fetching-numeric-data-using-a-timeseries): Retrieve numeric / graph data from a timeseries +- [**download-log**](#downloading-entire-log-files): Download the entire contents of a log file - [**get-file**](#retrieving-configuration-files): Fetch a configuration file - [**put-file**](#creating-or-updating-configuration-files): Create or update a configuration file - [**delete-file**](#creating-or-updating-configuration-files): Delete a configuration file @@ -163,6 +164,56 @@ these limits. If the clocks on the servers sending log messages to Scalyr are significantly out of sync then some messages may not appear in the live tail. For example, if you send us a new log message with a timestamp old enough that it's not in the 1,000 most recent messages when it arrives at the Scalyr servers, then it will not be displayed by the live tail tool. +## Downloading entire log files + +The "download-log" command allows you to download the entire contents, or a portion, of a log file from a given server host. + +The 'download-log' command is similar to the '[query](#querying-logs)' command, except it queries a specific log file from +a specific server host, and it automatically handles continuation tokens so that the entire contents of the log are downloaded +for the specified time range. + +**NOTE** this command is mostly intended to simplify retrieval of agent logs and profiling/debug information generated +by the agent. For bulk exports of log data, you are better off using the bulkExport tool: https://www.scalyr.com/bulkExports + +Here are some usage examples: + + # Download the last 24 hours of the agent.log from prod-100 + # Note: the default logfile to download is agent.log + # Also note the default query time range is the previous 24 hours + scalyr download-log --serverHost prod-100 + + # Download the last 4 hours of the agent.callgrind from prod-100 + # Note: if a non-absolute log file is specified, download-log assumes + # the log file location is in /var/log/scalyr-agent-2/ + scalyr download-log --serverHost prod-100 --logfile agent.callgrind --start 4h + + # Download the last 4 hours of /var/log/nginx/access.log from prod-100 + scalyr download-log --serverHost prod-100 --logfile /var/log/nginx/access.log --start 4h + +Complete argument list: + + scalyr download-log [options...] + + --serverHost + the serverHost containing the log file that you wish + to download - cannot be empty + + --logfile + the logfile on the serverHost that you want to download - defaults to `agent.log`. + If the logfile does not start with a `/` then the logfile is assumed to be + relative to /var/log/scalyr-agent-2 + + --start + beginning of the time range to query. Uses same syntax as the '[query](#querying-logs)' command. + + --end + end of the time range to query. Uses same syntax as the '[query](#querying-logs)' command. + + --delay + If multiple queries are needed to retrieve the full contents, the number of seconds to delay + between continuation queries. Defaults to 0. + + ## Fetching numeric data The "numeric-query" command allows you to retrieve numeric data, e.g. for graphing. You can count the diff --git a/scalyr b/scalyr index 5413d1d..52cecca 100755 --- a/scalyr +++ b/scalyr @@ -225,6 +225,89 @@ def commandListFiles(parser): for i in range(len(paths)): print(paths[i]) +# Implement the "scalyr download-log" command +def commandDownloadLog(parser): + + # Parse the command-line arguments. + parser.add_argument('--serverHost', default='', + help='the serverHost containing the log file that you wish to download - cannot be empty') + parser.add_argument('--logfile', default='agent.log', + help='the logfile on the serverHost that you want to download - defaults to agent.log. If the logfile does ' + 'not start with a `/` then the logfile is assumed to be relative to /var/log/scalyr-agent-2') + parser.add_argument('--start', default='', + help='beginning of the time range to query') + parser.add_argument('--end', default='', + help='end of the time range to query') + parser.add_argument('--delay', type=int, default=0, + help='The number of seconds to delay between continuations') + + args = parser.parse_args() + + server_host = args.serverHost + if server_host == '': + print_stderr('serverHost cannot be empty. Please specify a serverHost with the --serverHost argument') + sys.exit(1) + + log_file = args.logfile + if log_file == '': + print_stderr('logfile cannot be empty. Please specify a logfile with the --logfile argument') + sys.exit(1) + + if not log_file.startswith( '/' ): + log_file = '/var/log/scalyr-agent-2/' + log_file + + # Get the API token. + apiToken = getApiToken(args, 'scalyr_readlog_token', 'Read Logs') + + mode = 'head' + pageSize = 5000 + start = args.start + end = args.end + priority = 'low' + output = 'multiline' + + delay = args.delay + + query = '$logfile = "%s" $serverHost = "%s"' % (log_file, server_host) + + has_rows = True + continuation_token = None + + while has_rows: + params = { + "token": apiToken, + "queryType": "log", + "filter": query, + "startTime": start, + "endTime": end, + "maxCount": pageSize, + "pageMode": mode, + "columns": 'message', + "priority": priority + } + + if continuation_token is not None: + params['continuationToken'] = continuation_token + + # Send the query to the server. + response, rawResponse = sendRequest(args, '/api/query', params ) + + # Print the log records. + matches = response['matches'] + + # Readable text format (singleline or multiline) + for i in range(len(matches)): + printReadableRow(output, matches[i]) + + continuation_token = None + if 'continuationToken' in response: + continuation_token = response['continuationToken'] + time.sleep(delay) + else: + has_rows = False + + if len( matches ) == 0: + has_rows = False # Implement the "scalyr query" command. def commandQuery(parser): @@ -647,6 +730,7 @@ if __name__ == '__main__': # All available commands all_commands = { 'query': commandQuery, + 'download-log': commandDownloadLog, 'tail': commandTail, 'numeric-query': commandNumericQuery, 'facet-query': commandFacetQuery, @@ -682,6 +766,7 @@ if __name__ == '__main__': for arg in sys.argv[1:]: if arg in all_commands: command = arg + break # If we could not find a possible command, then just try to parse the commandline with the current options, which # we know will fail since there's no valid value for 'command'.