From 6a7668685994640e9b3a29b2118afce10b41d5b0 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Wed, 29 Apr 2020 01:32:28 +0300 Subject: [PATCH 01/20] nimgrep: speed up by threads and Channels --- tools/nimgrep.nim | 429 ++++++++++++++++++++++++++++++------------ tools/nimgrep.nim.cfg | 7 +- 2 files changed, 307 insertions(+), 129 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 138f1680c1f75..f0e5bb66cbd37 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -8,7 +8,7 @@ # import - os, strutils, parseopt, pegs, re, terminal + os, strutils, parseopt, pegs, re, terminal, osproc, tables const Version = "1.5" @@ -33,6 +33,7 @@ Options: --word, -w the match should have word boundaries (buggy for pegs!) --ignoreCase, -i be case insensitive --ignoreStyle, -y be style insensitive + --nWorkers:N, -n:N speed up search by N additional workers (threads) --ext:EX1|EX2|... only search the files with the given extension(s), empty one ("--ext") means files with missing extension --noExt:EX1|... exclude files having given extension(s), use empty one to @@ -68,11 +69,28 @@ type TConfirmEnum = enum ceAbort, ceYes, ceAll, ceNo, ceNone Pattern = Regex | Peg + SearchInfo = tuple[buf: string, filename: string] + MatchInfo = tuple[first: int, last: int; + lineBeg: int, lineEnd: int, match: string] + outputKind = enum + OpenError, Rejected, GroupFirstMatch, GroupNextMatch, GroupEnd, FileContents + Output = object + case kind: outputKind + of OpenError: msg: string + of Rejected: discard + of GroupFirstMatch, GroupNextMatch: + pre: string + match: MatchInfo + of GroupEnd: + groupEnding: string + firstLine: int # = last lineNo of last match + of FileContents: + buffer: string using pattern: Pattern var - filenames: seq[string] = @[] + paths: seq[string] = @[] pattern = "" replacement = "" extensions: seq[string] = @[] @@ -88,6 +106,10 @@ var linesContext = 0 colorTheme = "simple" newLine = false + gVar = (matches: 0, errors: 0, reallyReplace: false) + nWorkers = 0 # run in single thread by default + requests: Channel[(int, string)] + results: Channel[tuple[fileNo: int, result: seq[Output]]] proc ask(msg: string): string = stdout.write(msg) @@ -201,11 +223,6 @@ proc printBlockLineN(s: string) = of "ack": stdout.styledWrite(styleUnderscore, fgYellow, s) of "gnu": stdout.styledWrite(styleUnderscore, fgGreen, s) -type - SearchInfo = tuple[buf: string, filename: string] - MatchInfo = tuple[first: int, last: int; - lineBeg: int, lineEnd: int, match: string] - proc writeColored(s: string) = whenColors: case colorTheme @@ -252,150 +269,146 @@ proc printMatch(fileName: string, mi: MatchInfo) = if i < lines.len - 1: stdout.write("\n") -proc printLinesBefore(si: SearchInfo, curMi: MatchInfo, nLines: int, - replMode=false) = +proc getLinesBefore(si: SearchInfo, curMi: MatchInfo): string = + let first = beforePattern(si.buf, curMi.first-1, linesBefore+1) + result = substr(si.buf, first, curMi.first-1) + +proc printLinesBefore(filename: string, beforeMatch: string, lineBeg: int, replMode=false) = # start block: print 'linesBefore' lines before current match `curMi` - let first = beforePattern(si.buf, curMi.first-1, nLines) - let lines = splitLines(substr(si.buf, first, curMi.first-1)) - let startLine = curMi.lineBeg - lines.len + 1 - blockHeader(si.filename, curMi.lineBeg, replMode=replMode) + let lines = splitLines(beforeMatch) + let startLine = lineBeg - lines.len + 1 + blockHeader(filename, lineBeg, replMode=replMode) for i, l in lines: - lineHeader(si.filename, startLine + i, isMatch = (i == lines.len - 1)) + lineHeader(filename, startLine + i, isMatch = (i == lines.len - 1)) stdout.write(l) if i < lines.len - 1: stdout.write("\n") -proc printLinesAfter(si: SearchInfo, mi: MatchInfo, nLines: int) = +proc getLinesAfter(si: SearchInfo, mi: MatchInfo): string = + let last = afterPattern(si.buf, mi.last+1, 1+linesAfter) + result = substr(si.buf, mi.last+1, last) + +proc printLinesAfter(filename: string, afterMatch: string, matchLineEnd: int) = # finish block: print 'linesAfter' lines after match `mi` - let s = si.buf - let last = afterPattern(s, mi.last+1, nLines) - let lines = splitLines(substr(s, mi.last+1, last)) + let lines = splitLines(afterMatch) if lines.len == 0: # EOF stdout.write("\n") else: stdout.write(lines[0]) # complete the line after match itself stdout.write("\n") - let skipLine = # workaround posix line ending at the end of file - if last == s.len-1 and s.len >= 2 and s[^1] == '\l' and s[^2] != '\c': 1 - else: 0 + #let skipLine = # workaround posix line ending at the end of file + # if last == s.len-1 and s.len >= 2 and s[^1] == '\l' and s[^2] != '\c': 1 + # else: 0 + let skipLine = 0 for i in 1 ..< lines.len - skipLine: - lineHeader(si.filename, mi.lineEnd + i, isMatch = false) + lineHeader(filename, matchLineEnd + i, isMatch = false) stdout.write(lines[i]) stdout.write("\n") if linesAfter + linesBefore >= 2 and not newLine: stdout.write("\n") -proc printBetweenMatches(si: SearchInfo, prevMi: MatchInfo, curMi: MatchInfo) = +proc getLinesBetween(si: SearchInfo, prevMi: MatchInfo, curMi: MatchInfo): string = + si.buf.substr(prevMi.last+1, curMi.first-1) + +proc printBetweenMatches(filename: string, betweenMatches: string, lastLineBeg: int) = # continue block: print between `prevMi` and `curMi` - let lines = si.buf.substr(prevMi.last+1, curMi.first-1).splitLines() + let lines = betweenMatches.splitLines() stdout.write(lines[0]) # finish the line of previous Match if lines.len > 1: stdout.write("\n") for i in 1 ..< lines.len: - lineHeader(si.filename, prevMi.lineEnd + i, + lineHeader(filename, lastLineBeg - lines.len + i + 1, isMatch = (i == lines.len - 1)) stdout.write(lines[i]) if i < lines.len - 1: stdout.write("\n") -proc printContextBetween(si: SearchInfo, prevMi, curMi: MatchInfo) = - # print context after previous match prevMi and before current match curMi - let nLinesBetween = curMi.lineBeg - prevMi.lineEnd - if nLinesBetween <= linesAfter + linesBefore + 1: # print as 1 block - printBetweenMatches(si, prevMi, curMi) - else: # finalize previous block and then print next block - printLinesAfter(si, prevMi, 1+linesAfter) - printLinesBefore(si, curMi, linesBefore+1) - proc printReplacement(si: SearchInfo, mi: MatchInfo, repl: string, showRepl: bool, curPos: int, newBuf: string, curLine: int) = - printLinesBefore(si, mi, linesBefore+1) - printMatch(si.fileName, mi) - printLinesAfter(si, mi, 1+linesAfter) + let filename = si.fileName + printLinesBefore(fileName, getLinesBefore(si, mi), mi.lineBeg) + printMatch(fileName, mi) + printLinesAfter(fileName, getLinesAfter(si, mi), mi.lineEnd) stdout.flushFile() if showRepl: - let newSi: SearchInfo = (buf: newBuf, filename: si.filename) + let newSi: SearchInfo = (buf: newBuf, filename: filename) let miForNewBuf: MatchInfo = (first: newBuf.len, last: newBuf.len, lineBeg: curLine, lineEnd: curLine, match: "") - printLinesBefore(newSi, miForNewBuf, linesBefore+1, replMode=true) + printLinesBefore(fileName, getLinesBefore(newSi, miForNewBuf), miForNewBuf.lineBeg, replMode=true) let replLines = countLineBreaks(repl, 0, repl.len-1) let miFixLines: MatchInfo = (first: mi.first, last: mi.last, lineBeg: curLine, lineEnd: curLine + replLines, match: repl) - printMatch(si.fileName, miFixLines) - printLinesAfter(si, miFixLines, 1+linesAfter) + printMatch(fileName, miFixLines) + printLinesAfter(fileName, getLinesAfter(si, miFixLines), miFixLines.lineEnd) stdout.flushFile() -proc doReplace(si: SearchInfo, mi: MatchInfo, i: int, r: string; - newBuf: var string, curLine: var int, reallyReplace: var bool) = +proc replace1match(si: SearchInfo, mi: MatchInfo, i: int, r: string; + newBuf: var string, curLine: var int): bool = newBuf.add(si.buf.substr(i, mi.first-1)) inc(curLine, countLineBreaks(si.buf, i, mi.first-1)) if optConfirm in options: printReplacement(si, mi, r, showRepl=true, i, newBuf, curLine) case confirm() of ceAbort: quit(0) - of ceYes: reallyReplace = true + of ceYes: gVar.reallyReplace = true of ceAll: - reallyReplace = true + gVar.reallyReplace = true options.excl(optConfirm) of ceNo: - reallyReplace = false + gVar.reallyReplace = false of ceNone: - reallyReplace = false + gVar.reallyReplace = false options.excl(optConfirm) else: - printReplacement(si, mi, r, showRepl=reallyReplace, i, newBuf, curLine) - if reallyReplace: + printReplacement(si, mi, r, showRepl=gVar.reallyReplace, i, newBuf, curLine) + if gVar.reallyReplace: + result = true newBuf.add(r) inc(curLine, countLineBreaks(r, 0, r.len-1)) else: newBuf.add(mi.match) inc(curLine, countLineBreaks(mi.match, 0, mi.match.len-1)) -proc processFile(pattern; filename: string; counter: var int, errors: var int) = - var filenameShown = false - template beforeHighlight = - if not filenameShown and optVerbose notin options and not oneline: - printBlockFile(filename) - stdout.write("\n") - stdout.flushFile() - filenameShown = true - - var buffer: string - if optFilenames in options: - buffer = filename - else: - try: - buffer = system.readFile(filename) - except IOError: - printError "Error: cannot open file: " & filename - inc(errors) - return - if optVerbose in options: - printFile(filename) - stdout.write("\n") - stdout.flushFile() - var result: string - - if optReplace in options: - result = newStringOfCap(buffer.len) - - var lineRepl = 1 +template updateCounters(output: Output) = + case output.kind + of GroupFirstMatch, GroupNextMatch: inc gVar.matches + of OpenError: inc gVar.errors + of Rejected, GroupEnd, FileContents: discard + +proc printOutput(filename: string, output: Output) = + case output.kind + of OpenError: + printError("can not open file " & filename) + of Rejected: discard + of FileContents: discard # impossible + of GroupFirstMatch: + printLinesBefore(filename, output.pre, output.match.lineBeg) + printMatch(filename, output.match) + #flush: TODO + of GroupNextMatch: + printBetweenMatches(filename, output.pre, output.match.lineBeg) + printMatch(filename, output.match) + of GroupEnd: + printLinesAfter(filename, output.groupEnding, output.firstLine) + +iterator searchFile(pattern; filename: string; buffer: string): Output = + #echo "thread id:", getThreadId(), " pat: ", cast[int](unsafeAddr pattern) let si: SearchInfo = (buf: buffer, filename: filename) var prevMi, curMi: MatchInfo curMi.lineEnd = 1 var i = 0 var matches: array[0..re.MaxSubpatterns-1, string] for j in 0..high(matches): matches[j] = "" - var reallyReplace = true while i < buffer.len: let t = findBounds(buffer, pattern, matches, i) if t.first < 0 or t.last < t.first: - if optReplace notin options and prevMi.lineBeg != 0: # finalize last match - printLinesAfter(si, prevMi, 1+linesAfter) - stdout.flushFile() + if prevMi.lineBeg != 0: # finalize last match + yield Output(kind: GroupEnd, + groupEnding: getLinesAfter(si, prevMi), + firstLine: prevMi.lineEnd) break let lineBeg = curMi.lineEnd + countLineBreaks(buffer, i, t.first-1) @@ -404,32 +417,47 @@ proc processFile(pattern; filename: string; counter: var int, errors: var int) = lineBeg: lineBeg, lineEnd: lineBeg + countLineBreaks(buffer, t.first, t.last), match: buffer.substr(t.first, t.last)) - beforeHighlight() - inc counter - if optReplace notin options: - if prevMi.lineBeg == 0: # no previous match, so no previous block to finalize - printLinesBefore(si, curMi, linesBefore+1) - else: - printContextBetween(si, prevMi, curMi) - printMatch(si.fileName, curMi) - if t.last == buffer.len - 1: - stdout.write("\n") - stdout.flushFile() + if prevMi.lineBeg == 0: # no previous match, so no previous block to finalize + yield Output(kind: GroupFirstMatch, + pre: getLinesBefore(si, curMi), + match: curMi) else: - let r = replace(curMi.match, pattern, replacement % matches) - doReplace(si, curMi, i, r, result, lineRepl, reallyReplace) + let nLinesBetween = curMi.lineBeg - prevMi.lineEnd + if nLinesBetween <= linesAfter + linesBefore + 1: # print as 1 block + yield Output(kind: GroupNextMatch, + pre: getLinesBetween(si, prevMi, curMi), + match: curMi) + else: # finalize previous block and then print next block + yield Output(kind: GroupEnd, + groupEnding: getLinesAfter(si, prevMi), + firstLine: prevMi.lineEnd) + yield Output(kind: GroupFirstMatch, + pre: getLinesBefore(si, curMi), + match: curMi) + #if t.last == buffer.len - 1: # TODO + # stdout.write("\n") + #stdout.flushFile() i = t.last+1 prevMi = curMi - if optReplace in options: - result.add(substr(buffer, i)) # finalize new buffer after last match - var f: File - if open(f, filename, fmWrite): - f.write(result) - f.close() - else: - quit "cannot open file for overwriting: " & filename +iterator processFile(pattern; filename: string, yieldContents=false): Output = + var buffer: string + + if optFilenames in options: + buffer = filename + else: + try: + buffer = system.readFile(filename) + except IOError: + yield Output(kind: OpenError) + var found = false + for output in searchFile(pattern, filename, buffer): + yield output + found = true + if yieldContents and found: + yield Output(kind: FileContents, buffer: buffer) + proc hasRightFileName(path: string): bool = let filename = path.lastPathPart @@ -494,28 +522,180 @@ proc styleInsensitive(s: string): string = addx() else: addx() -proc walker(pattern; dir: string; counter: var int, errors: var int) = +proc walker(dir: string; files: var seq[string]) = if dirExists(dir): for kind, path in walkDir(dir): case kind of pcFile: if path.hasRightFileName: - processFile(pattern, path, counter, errors) + files.add(path) of pcLinkToFile: if optFollow in options and path.hasRightFileName: - processFile(pattern, path, counter, errors) + files.add(path) of pcDir: if optRecursive in options and path.hasRightDirectory: - walker(pattern, path, counter, errors) + walker(path, files) of pcLinkToDir: if optFollow in options and optRecursive in options and path.hasRightDirectory: - walker(pattern, path, counter, errors) + walker(path, files) elif fileExists(dir): - processFile(pattern, dir, counter, errors) + files.add(dir) else: printError "Error: no such file or directory: " & dir - inc(errors) + inc(gVar.errors) + +iterator walkDirBasic(dir: string): string = + var dirs = @[dir] # stack of directories + while dirs.len > 0: + let d = dirs.pop() + for kind, path in walkDir(d): + case kind + of pcFile: + if path.hasRightFileName: + yield path + of pcLinkToFile: + if optFollow in options and path.hasRightFileName: + yield path + of pcDir: + if optRecursive in options and path.hasRightDirectory: + dirs.add path + of pcLinkToDir: + if optFollow in options and optRecursive in options and + path.hasRightDirectory: + dirs.add path + +iterator walkRec(paths: seq[string]): string = + for path in paths: + if existsDir(path): + for p in walkDirBasic(path): + yield p + elif existsFile(path): + yield path + else: + printError "Error: no such file or directory: " & path + inc(gVar.errors) + +template printResult(filename: string, body: untyped) = + var filenameShown = false + template showFilename = + if not filenameShown and not oneline: + printBlockFile(filename) + stdout.write("\n") + stdout.flushFile() + filenameShown = true + if optVerbose in options: + showFilename + for output in body: + updateCounters(output) + if output.kind notin {Rejected, OpenError}: + showFilename + printOutput(filename, output) + +proc worker(pattern: Pattern) {.thread.} = + while true: + let (fileNo, filename) = requests.recv() + var rslt = newSeq[Output](); + for output in processFile(pattern, filename, yieldContents=(optReplace in options)): + rslt.add(output) + results.send((fileNo, move(rslt))) + +proc replaceMatches(filename: string, buffer: string, outpSeq: seq[Output]) = + var newBuf = newStringOfCap(buffer.len) + + var changed = false + var lineRepl = 1 + let si: SearchInfo = (buf: buffer, filename: filename) + var i = 0 + for output in outpSeq: + if output.kind in {GroupFirstMatch, GroupNextMatch}: + #let r = replace(curMi.match, pattern, replacement % matches) #TODO + let curMi = output.match + let r = replace(curMi.match, pattern, replacement) + if replace1match(si, curMi, i, r, newBuf, lineRepl): + changed = true + i = curMi.last + 1 + if changed: + newBuf.add(substr(buffer, i)) # finalize new buffer after last match + var f: File + if open(f, filename, fmWrite): + f.write(newBuf) + f.close() + else: + printError "cannot open file for overwriting: " & filename + inc(gVar.errors) + +proc runMultiThread(pattern) = + stdout.flushFile() + var + workers = newSeq[Thread[Pattern]](nWorkers) + open(requests) + open(results) + for n in 0 ..< nWorkers: + createThread(workers[n], worker, pattern) + var + inWork = 0 + nextFile = 0 + firstFile = 0 + storage = newTable[int, seq[Output]]() # file number -> accumulated result + files = newTable[int, string]() + template proc1result(fileNo, newOutpSeq) = + storage[fileNo] = newOutpSeq + var outpSeq: seq[Output] + while storage.haskey(firstFile): + outpSeq = storage[firstFile] + let filename = files[firstFile] + if optReplace notin options: + printResult(filename, outpSeq) + else: + var buffer = "" + + var matches = newSeq[Output]() + for output in outpSeq: + updateCounters(output) + case output.kind + of Rejected, OpenError: discard + of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) + of FileContents: buffer = output.buffer + if matches.len > 0: + replaceMatches(filename, buffer, matches) + firstFile += 1 + for filename in walkRec(paths): + requests.send((nextFile,filename)) + files[nextFile] = filename + nextFile += 1 + inWork += 1 + let (available, msg) = results.tryRecv() + if available: + proc1result(msg.fileNo, msg.result) + inWork -= 1 + while inWork > 0: + let (fileNo, newOutpSeq) = results.recv() + proc1result(fileNo, newOutpSeq) + inWork -= 1 + +proc run1Thread(pattern) = + for filename in walkRec(paths): + if optReplace notin options: + printResult(filename, processFile(pattern, filename)) + else: + var matches = newSeq[Output]() + var buffer = "" + + for output in processFile(pattern, filename, yieldContents=true): + updateCounters(output) + case output.kind + of Rejected, OpenError: discard + of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) + of FileContents: buffer = output.buffer + if matches.len > 0: + replaceMatches(filename, buffer, matches) + +proc run(pattern) = + if nWorkers == 0: + run1Thread(pattern) + else: + runMultiThread(pattern) proc reportError(msg: string) = printError "Error: " & msg @@ -543,13 +723,13 @@ for kind, key, val in getopt(): case kind of cmdArgument: if options.contains(optStdin): - filenames.add(key) + paths.add(key) elif pattern.len == 0: pattern = key elif options.contains(optReplace) and replacement.len == 0: replacement = key else: - filenames.add(key) + paths.add(key) of cmdLongOption, cmdShortOption: case normalize(key) of "find", "f": incl(options, optFind) @@ -571,6 +751,11 @@ for kind, key, val in getopt(): of "word", "w": incl(options, optWord) of "ignorecase", "i": incl(options, optIgnoreCase) of "ignorestyle", "y": incl(options, optIgnoreStyle) + of "nworkers", "n": + if val == "": + nWorkers = countProcessors() + else: + nWorkers = parseInt(val) of "ext": extensions.add val.split('|') of "noext": skipExtensions.add val.split('|') of "excludedir", "exclude-dir": excludeDir.add rex(val) @@ -632,10 +817,8 @@ if optStdin in options: if pattern.len == 0: reportError("empty pattern was given") else: - var counter = 0 - var errors = 0 - if filenames.len == 0: - filenames.add(os.getCurrentDir()) + if paths.len == 0: + paths.add(os.getCurrentDir()) if optRegex notin options: if optWord in options: pattern = r"(^ / !\letter)(" & pattern & r") !\letter" @@ -644,8 +827,7 @@ else: elif optIgnoreCase in options: pattern = "\\i " & pattern let pegp = peg(pattern) - for f in items(filenames): - walker(pegp, f, counter, errors) + run(pegp) else: var reflags = {reStudy} if optIgnoreStyle in options: @@ -657,10 +839,9 @@ else: reflags.incl reIgnoreCase let rep = if optRex in options: rex(pattern, reflags) else: re(pattern, reflags) - for f in items(filenames): - walker(rep, f, counter, errors) - if errors != 0: - printError $errors & " errors" - stdout.write($counter & " matches\n") - if errors != 0: + run(rep) + if gVar.errors != 0: + printError $gVar.errors & " errors" + stdout.write($gVar.matches & " matches\n") + if gVar.errors != 0: quit(1) diff --git a/tools/nimgrep.nim.cfg b/tools/nimgrep.nim.cfg index 6d0ea5aadb8ba..e08ed849ad0e8 100644 --- a/tools/nimgrep.nim.cfg +++ b/tools/nimgrep.nim.cfg @@ -1,5 +1,2 @@ -# The GC is stable enough now: - -#--gc:none - - +# using markandsweep because of bug https://github.com/nim-lang/Nim/issues/14138 +--threads:on --gc:markandsweep From be9495080afd447d8a8a069d9f89f4d09507cbd5 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Wed, 29 Apr 2020 17:35:38 +0300 Subject: [PATCH 02/20] nimgrep: add --bin, --text, --count options --- tools/nimgrep.nim | 73 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index f0e5bb66cbd37..0348f0a90ecb8 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -41,6 +41,9 @@ Options: --includeFile:PAT include only files whose names match the given regex PAT --excludeFile:PAT skip files whose names match the given regex pattern PAT --excludeDir:PAT skip directories whose names match the given regex PAT + --bin:yes|no|only process binary files? (detected by first 1024 bytes) + --text, -t process only text, the same as --bin:no + --count just count number of matches --nocolor output will be given without any colours --color[:always] force color even if output is redirected --colorTheme:THEME select color THEME from 'simple' (default), @@ -68,16 +71,20 @@ type TOptions = set[TOption] TConfirmEnum = enum ceAbort, ceYes, ceAll, ceNo, ceNone + Bin = enum + biYes, biOnly, biNo Pattern = Regex | Peg SearchInfo = tuple[buf: string, filename: string] MatchInfo = tuple[first: int, last: int; lineBeg: int, lineEnd: int, match: string] outputKind = enum - OpenError, Rejected, GroupFirstMatch, GroupNextMatch, GroupEnd, FileContents + OpenError, Rejected, JustCount, + GroupFirstMatch, GroupNextMatch, GroupEnd, FileContents Output = object case kind: outputKind of OpenError: msg: string of Rejected: discard + of JustCount: matches: int of GroupFirstMatch, GroupNextMatch: pre: string match: MatchInfo @@ -99,6 +106,8 @@ var excludeFile: seq[Regex] includeFile: seq[Regex] excludeDir: seq[Regex] + checkBin = biYes + justCount = false useWriteStyled = true oneline = true linesBefore = 0 @@ -107,6 +116,7 @@ var colorTheme = "simple" newLine = false gVar = (matches: 0, errors: 0, reallyReplace: false) + # gVar - variables that can change during search/replace nWorkers = 0 # run in single thread by default requests: Channel[(int, string)] results: Channel[tuple[fileNo: int, result: seq[Output]]] @@ -374,8 +384,9 @@ proc replace1match(si: SearchInfo, mi: MatchInfo, i: int, r: string; template updateCounters(output: Output) = case output.kind - of GroupFirstMatch, GroupNextMatch: inc gVar.matches - of OpenError: inc gVar.errors + of GroupFirstMatch, GroupNextMatch: inc(gVar.matches) + of JustCount: inc(gVar.matches, output.matches) + of OpenError: inc(gVar.errors) of Rejected, GroupEnd, FileContents: discard proc printOutput(filename: string, output: Output) = @@ -383,6 +394,8 @@ proc printOutput(filename: string, output: Output) = of OpenError: printError("can not open file " & filename) of Rejected: discard + of JustCount: + echo " (" & $output.matches & " matches)" of FileContents: discard # impossible of GroupFirstMatch: printLinesBefore(filename, output.pre, output.match.lineBeg) @@ -395,7 +408,6 @@ proc printOutput(filename: string, output: Output) = printLinesAfter(filename, output.groupEnding, output.firstLine) iterator searchFile(pattern; filename: string; buffer: string): Output = - #echo "thread id:", getThreadId(), " pat: ", cast[int](unsafeAddr pattern) let si: SearchInfo = (buf: buffer, filename: filename) var prevMi, curMi: MatchInfo curMi.lineEnd = 1 @@ -441,6 +453,11 @@ iterator searchFile(pattern; filename: string; buffer: string): Output = i = t.last+1 prevMi = curMi +func detectBin(buffer: string): bool = + for i in 0 ..< min(1024, buffer.len): + if buffer[i] == '\0': + return true + iterator processFile(pattern; filename: string, yieldContents=false): Output = var buffer: string @@ -451,13 +468,31 @@ iterator processFile(pattern; filename: string, yieldContents=false): Output = buffer = system.readFile(filename) except IOError: yield Output(kind: OpenError) - var found = false - for output in searchFile(pattern, filename, buffer): - yield output - found = true - if yieldContents and found: - yield Output(kind: FileContents, buffer: buffer) + var reject = false + if checkBin in {biNo, biOnly}: + let isBin = detectBin(buffer) + if isBin and checkBin == biNo: + reject = true + if (not isBin) and checkBin == biOnly: + reject = true + + if reject: + yield Output(kind: Rejected) + else: + var found = false + var cnt = 0 + for output in searchFile(pattern, filename, buffer): + found = true + if not justCount: + yield output + else: + if output.kind in {GroupFirstMatch, GroupNextMatch}: + inc(cnt) + if justCount and cnt > 0: + yield Output(kind: JustCount, matches: cnt) + if yieldContents and found and not justCount: + yield Output(kind: FileContents, buffer: buffer) proc hasRightFileName(path: string): bool = let filename = path.lastPathPart @@ -579,7 +614,7 @@ iterator walkRec(paths: seq[string]): string = template printResult(filename: string, body: untyped) = var filenameShown = false template showFilename = - if not filenameShown and not oneline: + if not filenameShown: printBlockFile(filename) stdout.write("\n") stdout.flushFile() @@ -588,8 +623,10 @@ template printResult(filename: string, body: untyped) = showFilename for output in body: updateCounters(output) - if output.kind notin {Rejected, OpenError}: + if output.kind notin {Rejected, OpenError, JustCount} and not oneline: showFilename + if output.kind == JustCount and oneline: + printFile(filename & ":") printOutput(filename, output) proc worker(pattern: Pattern) {.thread.} = @@ -654,7 +691,7 @@ proc runMultiThread(pattern) = for output in outpSeq: updateCounters(output) case output.kind - of Rejected, OpenError: discard + of Rejected, OpenError, JustCount: discard of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) of FileContents: buffer = output.buffer if matches.len > 0: @@ -685,7 +722,7 @@ proc run1Thread(pattern) = for output in processFile(pattern, filename, yieldContents=true): updateCounters(output) case output.kind - of Rejected, OpenError: discard + of Rejected, OpenError, JustCount: discard of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) of FileContents: buffer = output.buffer if matches.len > 0: @@ -761,6 +798,14 @@ for kind, key, val in getopt(): of "excludedir", "exclude-dir": excludeDir.add rex(val) of "includefile", "include-file": includeFile.add rex(val) of "excludefile", "exclude-file": excludeFile.add rex(val) + of "bin": + case val + of "no": checkBin = biNo + of "yes": checkBin = biYes + of "only": checkBin = biOnly + else: reportError("unknown value for --bin") + of "text", "t": checkBin = biNo + of "count": justCount = true of "nocolor": useWriteStyled = false of "color": case val From 9601a17b66d884bb6b563f1075a04082a88587db Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Wed, 29 Apr 2020 22:28:00 +0300 Subject: [PATCH 03/20] nimgrep: add --sortTime option --- tools/nimgrep.nim | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 0348f0a90ecb8..b5b8c867d33c7 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -8,7 +8,7 @@ # import - os, strutils, parseopt, pegs, re, terminal, osproc, tables + os, strutils, parseopt, pegs, re, terminal, osproc, tables, algorithm, times const Version = "1.5" @@ -54,6 +54,8 @@ Options: -b:N print N lines of leading context before every match --context:N, -c:N print N lines of leading context before every match and N lines of trailing context after it + --sortTime[:desc|asc], + -s[:desc|asc] order files by modification time descending or ascending --group, -g group matches by file --newLine, -l display every matching line starting from a new line --verbose be verbose: list every processed file @@ -108,6 +110,8 @@ var excludeDir: seq[Regex] checkBin = biYes justCount = false + sortTime = false + sortTimeOrder = SortOrder.Descending useWriteStyled = true oneline = true linesBefore = 0 @@ -581,17 +585,20 @@ proc walker(dir: string; files: var seq[string]) = inc(gVar.errors) iterator walkDirBasic(dir: string): string = - var dirs = @[dir] # stack of directories - while dirs.len > 0: - let d = dirs.pop() + var dirStack = @[dir] # stack of directories + var timeFiles = newSeq[(times.Time, string)]() + while dirStack.len > 0: + let d = dirStack.pop() + var files = newSeq[string]() + var dirs = newSeq[string]() for kind, path in walkDir(d): case kind of pcFile: if path.hasRightFileName: - yield path + files.add(path) of pcLinkToFile: if optFollow in options and path.hasRightFileName: - yield path + files.add(path) of pcDir: if optRecursive in options and path.hasRightDirectory: dirs.add path @@ -599,6 +606,20 @@ iterator walkDirBasic(dir: string): string = if optFollow in options and optRecursive in options and path.hasRightDirectory: dirs.add path + if sortTime: + for file in files: + timeFiles.add((getLastModificationTime(file), file)) + else: # alphanumeric sort + files.sort() + for file in files: + yield file + dirs.sort(order = SortOrder.Descending) + for dir in dirs: + dirStack.add(dir) + if sortTime: + timeFiles.sort(sortTimeOrder) + for (_, file) in timeFiles: + yield file iterator walkRec(paths: seq[string]): string = for path in paths: @@ -806,6 +827,13 @@ for kind, key, val in getopt(): else: reportError("unknown value for --bin") of "text", "t": checkBin = biNo of "count": justCount = true + of "sorttime", "s": + sortTime = true + case normalize(val) + of "": discard + of "asc", "ascending": sortTimeOrder = SortOrder.Ascending + of "desc", "descending": sortTimeOrder = SortOrder.Descending + else: reportError("invalid value '" & val & "' for --sortTime") of "nocolor": useWriteStyled = false of "color": case val From 0355da108f5570e7c51e4a39bacee92eff4c8630 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Tue, 9 Jun 2020 17:03:51 +0300 Subject: [PATCH 04/20] allow Peg in all matches including --includeFile, --excludeFile, --excludeDir --- tools/nimgrep.nim | 416 +++++++++++++++++++++++++--------------------- 1 file changed, 222 insertions(+), 194 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index b5b8c867d33c7..3b55b72cd23d2 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -41,8 +41,8 @@ Options: --includeFile:PAT include only files whose names match the given regex PAT --excludeFile:PAT skip files whose names match the given regex pattern PAT --excludeDir:PAT skip directories whose names match the given regex PAT - --bin:yes|no|only process binary files? (detected by first 1024 bytes) - --text, -t process only text, the same as --bin:no + --bin:yes|no|only process binary files? (detected by \0 in first 1K bytes) + --text, -t process only text files, the same as --bin:no --count just count number of matches --nocolor output will be given without any colours --color[:always] force color even if output is redirected @@ -95,6 +95,22 @@ type firstLine: int # = last lineNo of last match of FileContents: buffer: string + Trequest = (int, string) + Tresult = tuple[finished: bool, fileNo: int, + filename: string, fileResult: seq[Output]] + WalkOpt = tuple + extensions: seq[string] + skipExtensions: seq[string] + excludeFile: seq[string] + includeFile: seq[string] + excludeDir : seq[string] + WalkOptPat = tuple + excludeFileP: seq[Peg] + includeFileP: seq[Peg] + excludeDirP : seq[Peg] + excludeFileR: seq[Regex] + includeFileR: seq[Regex] + excludeDirR : seq[Regex] using pattern: Pattern @@ -102,12 +118,8 @@ var paths: seq[string] = @[] pattern = "" replacement = "" - extensions: seq[string] = @[] options: TOptions = {optRegex} - skipExtensions: seq[string] = @[] - excludeFile: seq[Regex] - includeFile: seq[Regex] - excludeDir: seq[Regex] + opt {.threadvar.}: WalkOpt checkBin = biYes justCount = false sortTime = false @@ -117,13 +129,13 @@ var linesBefore = 0 linesAfter = 0 linesContext = 0 - colorTheme = "simple" newLine = false gVar = (matches: 0, errors: 0, reallyReplace: false) # gVar - variables that can change during search/replace nWorkers = 0 # run in single thread by default - requests: Channel[(int, string)] - results: Channel[tuple[fileNo: int, result: seq[Output]]] + searchRequestsChan: Channel[Trequest] + resultsChan: Channel[Tresult] + colorTheme: string = "simple" proc ask(msg: string): string = stdout.write(msg) @@ -254,7 +266,7 @@ proc writeArrow(s: string) = proc blockHeader(filename: string, line: int|string, replMode=false) = if replMode: writeArrow(" ->\n") - elif newLine: + elif newLine and optFilenames notin options: if oneline: printBlockFile(filename) printBlockLineN(":" & $line & ":") @@ -266,7 +278,7 @@ proc lineHeader(filename: string, line: int|string, isMatch: bool) = let lineSym = if isMatch: $line & ":" else: $line & " " - if not newLine: + if not newLine and optFilenames notin options: if oneline: printFile(filename) printLineN(":" & lineSym, isMatch) @@ -387,29 +399,29 @@ proc replace1match(si: SearchInfo, mi: MatchInfo, i: int, r: string; inc(curLine, countLineBreaks(mi.match, 0, mi.match.len-1)) template updateCounters(output: Output) = - case output.kind - of GroupFirstMatch, GroupNextMatch: inc(gVar.matches) - of JustCount: inc(gVar.matches, output.matches) - of OpenError: inc(gVar.errors) - of Rejected, GroupEnd, FileContents: discard + case output.kind + of GroupFirstMatch, GroupNextMatch: inc(gVar.matches) + of JustCount: inc(gVar.matches, output.matches) + of OpenError: inc(gVar.errors) + of Rejected, GroupEnd, FileContents: discard proc printOutput(filename: string, output: Output) = - case output.kind - of OpenError: - printError("can not open file " & filename) - of Rejected: discard - of JustCount: - echo " (" & $output.matches & " matches)" - of FileContents: discard # impossible - of GroupFirstMatch: - printLinesBefore(filename, output.pre, output.match.lineBeg) - printMatch(filename, output.match) - #flush: TODO - of GroupNextMatch: - printBetweenMatches(filename, output.pre, output.match.lineBeg) - printMatch(filename, output.match) - of GroupEnd: - printLinesAfter(filename, output.groupEnding, output.firstLine) + case output.kind + of OpenError: + printError("can not open path " & filename & " " & output.msg) + of Rejected: discard + of JustCount: + echo " (" & $output.matches & " matches)" + of FileContents: discard # impossible + of GroupFirstMatch: + printLinesBefore(filename, output.pre, output.match.lineBeg) + printMatch(filename, output.match) + #flush: TODO + of GroupNextMatch: + printBetweenMatches(filename, output.pre, output.match.lineBeg) + printMatch(filename, output.match) + of GroupEnd: + printLinesAfter(filename, output.groupEnding, output.firstLine) iterator searchFile(pattern; filename: string; buffer: string): Output = let si: SearchInfo = (buf: buffer, filename: filename) @@ -418,7 +430,7 @@ iterator searchFile(pattern; filename: string; buffer: string): Output = var i = 0 var matches: array[0..re.MaxSubpatterns-1, string] for j in 0..high(matches): matches[j] = "" - while i < buffer.len: + while true: let t = findBounds(buffer, pattern, matches, i) if t.first < 0 or t.last < t.first: if prevMi.lineBeg != 0: # finalize last match @@ -433,7 +445,7 @@ iterator searchFile(pattern; filename: string; buffer: string): Output = lineBeg: lineBeg, lineEnd: lineBeg + countLineBreaks(buffer, t.first, t.last), match: buffer.substr(t.first, t.last)) - if prevMi.lineBeg == 0: # no previous match, so no previous block to finalize + if prevMi.lineBeg == 0: # no prev. match, so no prev. block to finalize yield Output(kind: GroupFirstMatch, pre: getLinesBefore(si, curMi), match: curMi) @@ -450,9 +462,6 @@ iterator searchFile(pattern; filename: string; buffer: string): Output = yield Output(kind: GroupFirstMatch, pre: getLinesBefore(si, curMi), match: curMi) - #if t.last == buffer.len - 1: # TODO - # stdout.write("\n") - #stdout.flushFile() i = t.last+1 prevMi = curMi @@ -470,8 +479,8 @@ iterator processFile(pattern; filename: string, yieldContents=false): Output = else: try: buffer = system.readFile(filename) - except IOError: - yield Output(kind: OpenError) + except IOError as e: + yield Output(kind: OpenError, msg: e.msg) var reject = false if checkBin in {biNo, biOnly}: @@ -498,33 +507,38 @@ iterator processFile(pattern; filename: string, yieldContents=false): Output = if yieldContents and found and not justCount: yield Output(kind: FileContents, buffer: buffer) -proc hasRightFileName(path: string): bool = +proc hasRightFileName(path: string, wopt: WalkOptPat): bool = let filename = path.lastPathPart let ex = filename.splitFile.ext.substr(1) # skip leading '.' - if extensions.len != 0: + if opt.extensions.len != 0: var matched = false - for x in items(extensions): + for x in opt.extensions: if os.cmpPaths(x, ex) == 0: matched = true break if not matched: return false - for x in items(skipExtensions): + for x in opt.skipExtensions: if os.cmpPaths(x, ex) == 0: return false - if includeFile.len != 0: - var matched = false - for x in items(includeFile): - if filename.match(x): - matched = true - break - if not matched: return false - for x in items(excludeFile): - if filename.match(x): return false + template checkFileName(patInclude: untyped, patExclude: untyped) = + if patInclude.len != 0: + var matched = false + for pat in patInclude: + if filename.match(pat): + matched = true + break + if not matched: return false + for pat in patExclude: + if filename.match(pat): return false + checkFileName(wopt.includeFileR, wopt.excludeFileR) + checkFileName(wopt.includeFileP, wopt.excludeFileP) result = true -proc hasRightDirectory(path: string): bool = +proc hasRightDirectory(path: string, wopt: WalkOptPat): bool = let dirname = path.lastPathPart - for x in items(excludeDir): - if dirname.match(x): return false + for pat in wopt.excludeDirR: + if dirname.match(pat): return false + for pat in wopt.excludeDirP: + if dirname.match(pat): return false result = true proc styleInsensitive(s: string): string = @@ -561,30 +575,7 @@ proc styleInsensitive(s: string): string = addx() else: addx() -proc walker(dir: string; files: var seq[string]) = - if dirExists(dir): - for kind, path in walkDir(dir): - case kind - of pcFile: - if path.hasRightFileName: - files.add(path) - of pcLinkToFile: - if optFollow in options and path.hasRightFileName: - files.add(path) - of pcDir: - if optRecursive in options and path.hasRightDirectory: - walker(path, files) - of pcLinkToDir: - if optFollow in options and optRecursive in options and - path.hasRightDirectory: - walker(path, files) - elif fileExists(dir): - files.add(dir) - else: - printError "Error: no such file or directory: " & dir - inc(gVar.errors) - -iterator walkDirBasic(dir: string): string = +iterator walkDirBasic(dir: string, wopt: WalkOptPat): string = var dirStack = @[dir] # stack of directories var timeFiles = newSeq[(times.Time, string)]() while dirStack.len > 0: @@ -594,22 +585,22 @@ iterator walkDirBasic(dir: string): string = for kind, path in walkDir(d): case kind of pcFile: - if path.hasRightFileName: + if path.hasRightFileName(wopt): files.add(path) of pcLinkToFile: - if optFollow in options and path.hasRightFileName: + if optFollow in options and path.hasRightFileName(wopt): files.add(path) of pcDir: - if optRecursive in options and path.hasRightDirectory: + if optRecursive in options and path.hasRightDirectory(wopt): dirs.add path of pcLinkToDir: if optFollow in options and optRecursive in options and - path.hasRightDirectory: + path.hasRightDirectory(wopt): dirs.add path - if sortTime: + if sortTime: # sort by time - collect files before yielding for file in files: timeFiles.add((getLastModificationTime(file), file)) - else: # alphanumeric sort + else: # alphanumeric sort, yield immediately after sorting files.sort() for file in files: yield file @@ -621,16 +612,59 @@ iterator walkDirBasic(dir: string): string = for (_, file) in timeFiles: yield file -iterator walkRec(paths: seq[string]): string = +template withPattern2(initPattern: string, + finalPattern: untyped, # either Peg or Regex + body1: untyped, # the Peg block + body2: untyped) = # the Regex block + var pattern = initPattern + if optRegex notin options: + if optWord in options: + pattern = r"(^ / !\letter)(" & pattern & r") !\letter" + if optIgnoreStyle in options: + pattern = "\\y " & pattern + elif optIgnoreCase in options: + pattern = "\\i " & pattern + var finalPattern = peg(pattern) + body1 + else: + var reflags = {reStudy} + if optIgnoreStyle in options: + pattern = styleInsensitive(pattern) + if optWord in options: + # see https://github.com/nim-lang/Nim/issues/13528#issuecomment-592786443 + pattern = r"(^|\W)(:?" & pattern & r")($|\W)" + if {optIgnoreCase, optIgnoreStyle} * options != {}: + reflags.incl reIgnoreCase + var finalPattern = if optRex in options: rex(pattern, reflags) + else: re(pattern, reflags) + body2 + +template withPattern(initPattern: string, finalPattern: untyped, body) = + # use the same body for Peg and Regex + withPattern2(initPattern, finalPattern, body, body) + +iterator walkRec(paths: seq[string]): (string, string) = + var wopt: WalkOptPat + for pat in opt.excludeFile: + withPattern2(pat, finalPattern, + wopt.excludeFileP.add finalPattern, + wopt.excludeFileR.add finalPattern) + for pat in opt.includeFile: + withPattern2(pat, finalPattern, + wopt.includeFileP.add finalPattern, + wopt.includeFileR.add finalPattern) + for pat in opt.excludeDir: + withPattern2(pat, finalPattern, + wopt.excludeDirP.add finalPattern, + wopt.excludeDirR.add finalPattern) for path in paths: - if existsDir(path): - for p in walkDirBasic(path): - yield p - elif existsFile(path): - yield path + if dirExists(path): + for p in walkDirBasic(path, wopt): + yield ("", p) + elif fileExists(path): + yield ("", path) else: - printError "Error: no such file or directory: " & path - inc(gVar.errors) + yield ("Error: no such file or directory: ", path) template printResult(filename: string, body: untyped) = var filenameShown = false @@ -650,14 +684,6 @@ template printResult(filename: string, body: untyped) = printFile(filename & ":") printOutput(filename, output) -proc worker(pattern: Pattern) {.thread.} = - while true: - let (fileNo, filename) = requests.recv() - var rslt = newSeq[Output](); - for output in processFile(pattern, filename, yieldContents=(optReplace in options)): - rslt.add(output) - results.send((fileNo, move(rslt))) - proc replaceMatches(filename: string, buffer: string, outpSeq: seq[Output]) = var newBuf = newStringOfCap(buffer.len) @@ -683,73 +709,95 @@ proc replaceMatches(filename: string, buffer: string, outpSeq: seq[Output]) = printError "cannot open file for overwriting: " & filename inc(gVar.errors) -proc runMultiThread(pattern) = - stdout.flushFile() - var - workers = newSeq[Thread[Pattern]](nWorkers) - open(requests) - open(results) - for n in 0 ..< nWorkers: - createThread(workers[n], worker, pattern) - var - inWork = 0 - nextFile = 0 - firstFile = 0 - storage = newTable[int, seq[Output]]() # file number -> accumulated result - files = newTable[int, string]() - template proc1result(fileNo, newOutpSeq) = - storage[fileNo] = newOutpSeq - var outpSeq: seq[Output] - while storage.haskey(firstFile): - outpSeq = storage[firstFile] - let filename = files[firstFile] - if optReplace notin options: - printResult(filename, outpSeq) - else: - var buffer = "" - - var matches = newSeq[Output]() - for output in outpSeq: - updateCounters(output) - case output.kind - of Rejected, OpenError, JustCount: discard - of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) - of FileContents: buffer = output.buffer - if matches.len > 0: - replaceMatches(filename, buffer, matches) - firstFile += 1 - for filename in walkRec(paths): - requests.send((nextFile,filename)) - files[nextFile] = filename - nextFile += 1 - inWork += 1 - let (available, msg) = results.tryRecv() - if available: - proc1result(msg.fileNo, msg.result) - inWork -= 1 - while inWork > 0: - let (fileNo, newOutpSeq) = results.recv() - proc1result(fileNo, newOutpSeq) - inWork -= 1 - -proc run1Thread(pattern) = - for filename in walkRec(paths): - if optReplace notin options: - printResult(filename, processFile(pattern, filename)) +proc run1Thread(initPattern: string) = + withPattern(initPattern, finalPattern): + for (err, filename) in walkRec(paths): + if err != "": + inc(gVar.errors) + printError (err & filename) + continue + if optReplace notin options: + printResult(filename, processFile(finalPattern, filename)) + else: + var matches = newSeq[Output]() + var buffer = "" + + for output in processFile(finalPattern, filename, yieldContents=true): + updateCounters(output) + case output.kind + of Rejected, OpenError, JustCount: discard + of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) + of FileContents: buffer = output.buffer + if matches.len > 0: + replaceMatches(filename, buffer, matches) + +proc worker(initPattern: string) {.thread.} = + withPattern(initPattern, finalPattern): + while true: + let (fileNo, filename) = searchRequestsChan.recv() + var fileResult = newSeq[Output](); + for output in processFile(finalPattern, filename, yieldContents=(optReplace in options)): + fileResult.add(output) + resultsChan.send((false, fileNo, filename, move(fileResult))) + +proc pathProducer(arg: (seq[string], WalkOpt)) {.thread.} = + let paths = arg[0] + opt = arg[1] # init thread-local copy of opt + var + nextFileN = 0 + for (err, filename) in walkRec(paths): + if err == "": + searchRequestsChan.send((nextFileN,filename)) + else: + resultsChan.send((false, nextFileN, + filename, @[Output(kind: OpenError, msg: err)])) + nextFileN += 1 + resultsChan.send((true, nextFileN, "", @[])) + +proc runMultiThread(pattern: string) = + var + workers = newSeq[Thread[string]](nWorkers) + storage = newTable[int, (string, seq[Output]) ]() + # file number -> accumulated result + firstUnprocessedFile = 0 + open(searchRequestsChan) + open(resultsChan) + for n in 0 ..< nWorkers: + createThread(workers[n], worker, pattern) + var producerThread: Thread[(seq[string], WalkOpt)] + createThread(producerThread, pathProducer, (paths, opt)) + template process1result(fileNo: int, fname: string, fileResult: seq[Output]) = + storage[fileNo] = (fname, fileResult) + var outpSeq: seq[Output] + while storage.haskey(firstUnprocessedFile): + outpSeq = storage[firstUnprocessedFile][1] + let filename = storage[firstUnprocessedFile][0] + if optReplace notin options: + printResult(filename, outpSeq) + else: + var buffer = "" + + var matches = newSeq[Output]() + for output in outpSeq: + updateCounters(output) + case output.kind + of Rejected, OpenError, JustCount: discard + # printError error + of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) + of FileContents: buffer = output.buffer + if matches.len > 0: + replaceMatches(filename, buffer, matches) + storage.del(firstUnprocessedFile) + firstUnprocessedFile += 1 + var totalFiles = -1 # will be known when pathProducer finishes + while totalFiles == -1 or firstUnprocessedFile < totalFiles: + let msg = resultsChan.recv() + if msg.finished: + totalFiles = msg.fileNo else: - var matches = newSeq[Output]() - var buffer = "" - - for output in processFile(pattern, filename, yieldContents=true): - updateCounters(output) - case output.kind - of Rejected, OpenError, JustCount: discard - of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) - of FileContents: buffer = output.buffer - if matches.len > 0: - replaceMatches(filename, buffer, matches) - -proc run(pattern) = + process1result(msg.fileNo, msg.filename, msg.fileResult) + +proc run(pattern: string) = if nWorkers == 0: run1Thread(pattern) else: @@ -814,11 +862,11 @@ for kind, key, val in getopt(): nWorkers = countProcessors() else: nWorkers = parseInt(val) - of "ext": extensions.add val.split('|') - of "noext": skipExtensions.add val.split('|') - of "excludedir", "exclude-dir": excludeDir.add rex(val) - of "includefile", "include-file": includeFile.add rex(val) - of "excludefile", "exclude-file": excludeFile.add rex(val) + of "ext": opt.extensions.add val.split('|') + of "noext": opt.skipExtensions.add val.split('|') + of "excludedir", "exclude-dir": opt.excludeDir.add val + of "includefile", "include-file": opt.includeFile.add val + of "excludefile", "exclude-file": opt.excludeFile.add val of "bin": case val of "no": checkBin = biNo @@ -892,27 +940,7 @@ if pattern.len == 0: else: if paths.len == 0: paths.add(os.getCurrentDir()) - if optRegex notin options: - if optWord in options: - pattern = r"(^ / !\letter)(" & pattern & r") !\letter" - if optIgnoreStyle in options: - pattern = "\\y " & pattern - elif optIgnoreCase in options: - pattern = "\\i " & pattern - let pegp = peg(pattern) - run(pegp) - else: - var reflags = {reStudy} - if optIgnoreStyle in options: - pattern = styleInsensitive(pattern) - if optWord in options: - # see https://github.com/nim-lang/Nim/issues/13528#issuecomment-592786443 - pattern = r"(^|\W)(:?" & pattern & r")($|\W)" - if {optIgnoreCase, optIgnoreStyle} * options != {}: - reflags.incl reIgnoreCase - let rep = if optRex in options: rex(pattern, reflags) - else: re(pattern, reflags) - run(rep) + run(pattern) if gVar.errors != 0: printError $gVar.errors & " errors" stdout.write($gVar.matches & " matches\n") From 92e05d6084a9bde3061a22a6fbc486e3a933b078 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sat, 10 Oct 2020 17:57:04 +0300 Subject: [PATCH 05/20] add --match and --noMatch options --- tools/nimgrep.nim | 426 +++++++++++++++++++++++++--------------------- 1 file changed, 235 insertions(+), 191 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 3b55b72cd23d2..f8ef72ca47b60 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -11,12 +11,12 @@ import os, strutils, parseopt, pegs, re, terminal, osproc, tables, algorithm, times const - Version = "1.5" + Version = "1.6" Usage = "nimgrep - Nim Grep Utility Version " & Version & """ (c) 2012 Andreas Rumpf Usage: - nimgrep [options] [pattern] [replacement] (file/directory)* + nimgrep [options] pattern [replacement] (file/directory)* Options: --find, -f find the pattern (default) --replace, -! replace the pattern @@ -38,9 +38,11 @@ Options: empty one ("--ext") means files with missing extension --noExt:EX1|... exclude files having given extension(s), use empty one to skip files with no extension (like some binary files are) - --includeFile:PAT include only files whose names match the given regex PAT - --excludeFile:PAT skip files whose names match the given regex pattern PAT - --excludeDir:PAT skip directories whose names match the given regex PAT + --includeFile:PAT include only files whose names match the given PATttern + --excludeFile:PAT skip files whose names match the given pattern PAT + --excludeDir:PAT skip directories whose names match the given pattern PAT + --match:PAT, -m:PAT select files containing a (not displayed) match of PAT + --noMatch:PAT select files not containing any match of PAT --bin:yes|no|only process binary files? (detected by \0 in first 1K bytes) --text, -t process only text files, the same as --bin:no --count just count number of matches @@ -54,8 +56,8 @@ Options: -b:N print N lines of leading context before every match --context:N, -c:N print N lines of leading context before every match and N lines of trailing context after it - --sortTime[:desc|asc], - -s[:desc|asc] order files by modification time descending or ascending + --sortTime order files by the last modification time - + -s[:desc|asc] - descending (default) or ascending --group, -g group matches by file --newLine, -l display every matching line starting from a new line --verbose be verbose: list every processed file @@ -98,29 +100,32 @@ type Trequest = (int, string) Tresult = tuple[finished: bool, fileNo: int, filename: string, fileResult: seq[Output]] - WalkOpt = tuple + WalkOpt = tuple # used for walking directories/producing paths extensions: seq[string] skipExtensions: seq[string] excludeFile: seq[string] includeFile: seq[string] excludeDir : seq[string] - WalkOptPat = tuple - excludeFileP: seq[Peg] - includeFileP: seq[Peg] - excludeDirP : seq[Peg] - excludeFileR: seq[Regex] - includeFileR: seq[Regex] - excludeDirR : seq[Regex] - -using pattern: Pattern + WalkOptComp[Pat] = tuple # a compiled version of the previous + excludeFile: seq[Pat] + includeFile: seq[Pat] + excludeDir : seq[Pat] + SearchOpt = tuple # used for searching inside a file + pattern: string + checkMatch: string + checkNoMatch: string + checkBin: Bin + SearchOptComp[Pat] = tuple # a compiled version of the previous + pattern: Pat + checkMatch: Pat + checkNoMatch: Pat var paths: seq[string] = @[] - pattern = "" replacement = "" options: TOptions = {optRegex} - opt {.threadvar.}: WalkOpt - checkBin = biYes + walkOpt {.threadvar.}: WalkOpt + searchOpt {.threadvar.}: SearchOpt justCount = false sortTime = false sortTimeOrder = SortOrder.Descending @@ -137,6 +142,8 @@ var resultsChan: Channel[Tresult] colorTheme: string = "simple" +searchOpt.checkBin = biYes + proc ask(msg: string): string = stdout.write(msg) stdout.flushFile() @@ -423,7 +430,7 @@ proc printOutput(filename: string, output: Output) = of GroupEnd: printLinesAfter(filename, output.groupEnding, output.firstLine) -iterator searchFile(pattern; filename: string; buffer: string): Output = +iterator searchFile(pattern: Pattern; filename: string; buffer: string): Output = let si: SearchInfo = (buf: buffer, filename: filename) var prevMi, curMi: MatchInfo curMi.lineEnd = 1 @@ -471,7 +478,82 @@ func detectBin(buffer: string): bool = if buffer[i] == '\0': return true -iterator processFile(pattern; filename: string, yieldContents=false): Output = +proc compilePeg(initPattern: string): Peg = + var pattern = initPattern + if optWord in options: + pattern = r"(^ / !\letter)(" & pattern & r") !\letter" + if optIgnoreStyle in options: + pattern = "\\y " & pattern + elif optIgnoreCase in options: + pattern = "\\i " & pattern + result = peg(pattern) + +proc styleInsensitive(s: string): string = + template addx = + result.add(s[i]) + inc(i) + result = "" + var i = 0 + var brackets = 0 + while i < s.len: + case s[i] + of 'A'..'Z', 'a'..'z', '0'..'9': + addx() + if brackets == 0: result.add("_?") + of '_': + addx() + result.add('?') + of '[': + addx() + inc(brackets) + of ']': + addx() + if brackets > 0: dec(brackets) + of '?': + addx() + if s[i] == '<': + addx() + while s[i] != '>' and s[i] != '\0': addx() + of '\\': + addx() + if s[i] in strutils.Digits: + while s[i] in strutils.Digits: addx() + else: + addx() + else: addx() + +proc compileRegex(initPattern: string): Regex = + var pattern = initPattern + var reflags = {reStudy} + if optIgnoreStyle in options: + pattern = styleInsensitive(pattern) + if optWord in options: + # see https://github.com/nim-lang/Nim/issues/13528#issuecomment-592786443 + pattern = r"(^|\W)(:?" & pattern & r")($|\W)" + if {optIgnoreCase, optIgnoreStyle} * options != {}: + reflags.incl reIgnoreCase + result = if optRex in options: rex(pattern, reflags) + else: re(pattern, reflags) + +template declareCompiledPatterns(compiledStruct: untyped, + StructType: untyped, + body: untyped) = + if optRegex notin options: + var compiledStruct: StructType[Peg] + proc compile(p: string): Peg = p.compilePeg() + proc compileArray(initPattern: seq[string]): seq[Peg] = + for pat in initPattern: + result.add pat.compilePeg() + body + else: + var compiledStruct: StructType[Regex] + proc compile(p: string): Regex = p.compileRegex() + proc compileArray(initPattern: seq[string]): seq[Regex] = + for pat in initPattern: + result.add pat.compileRegex() + body + +iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, yieldContents=false): Output = var buffer: string if optFilenames in options: @@ -483,19 +565,27 @@ iterator processFile(pattern; filename: string, yieldContents=false): Output = yield Output(kind: OpenError, msg: e.msg) var reject = false - if checkBin in {biNo, biOnly}: + if searchOpt.checkBin in {biNo, biOnly}: let isBin = detectBin(buffer) - if isBin and checkBin == biNo: + if isBin and searchOpt.checkBin == biNo: reject = true - if (not isBin) and checkBin == biOnly: + if (not isBin) and searchOpt.checkBin == biOnly: reject = true + if not reject: + if searchOpt.checkMatch != "": + reject = not contains(buffer, searchOptC.checkMatch, 0) + + if not reject: + if searchOpt.checkNoMatch != "": + reject = contains(buffer, searchOptC.checkNoMatch, 0) + if reject: yield Output(kind: Rejected) else: var found = false var cnt = 0 - for output in searchFile(pattern, filename, buffer): + for output in searchFile(searchOptC.pattern, filename, buffer): found = true if not justCount: yield output @@ -507,75 +597,36 @@ iterator processFile(pattern; filename: string, yieldContents=false): Output = if yieldContents and found and not justCount: yield Output(kind: FileContents, buffer: buffer) -proc hasRightFileName(path: string, wopt: WalkOptPat): bool = +proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool = let filename = path.lastPathPart let ex = filename.splitFile.ext.substr(1) # skip leading '.' - if opt.extensions.len != 0: + if walkOpt.extensions.len != 0: var matched = false - for x in opt.extensions: + for x in walkOpt.extensions: if os.cmpPaths(x, ex) == 0: matched = true break if not matched: return false - for x in opt.skipExtensions: + for x in walkOpt.skipExtensions: if os.cmpPaths(x, ex) == 0: return false - template checkFileName(patInclude: untyped, patExclude: untyped) = - if patInclude.len != 0: - var matched = false - for pat in patInclude: - if filename.match(pat): - matched = true - break - if not matched: return false - for pat in patExclude: - if filename.match(pat): return false - checkFileName(wopt.includeFileR, wopt.excludeFileR) - checkFileName(wopt.includeFileP, wopt.excludeFileP) + if walkOptC.includeFile.len != 0: + var matched = false + for pat in walkOptC.includeFile: + if filename.match(pat): + matched = true + break + if not matched: return false + for pat in walkOptC.excludeFile: + if filename.match(pat): return false result = true -proc hasRightDirectory(path: string, wopt: WalkOptPat): bool = +proc hasRightDirectory(path: string, walkOptC: WalkOptComp[Pattern]): bool = let dirname = path.lastPathPart - for pat in wopt.excludeDirR: - if dirname.match(pat): return false - for pat in wopt.excludeDirP: + for pat in walkOptC.excludeDir: if dirname.match(pat): return false result = true -proc styleInsensitive(s: string): string = - template addx = - result.add(s[i]) - inc(i) - result = "" - var i = 0 - var brackets = 0 - while i < s.len: - case s[i] - of 'A'..'Z', 'a'..'z', '0'..'9': - addx() - if brackets == 0: result.add("_?") - of '_': - addx() - result.add('?') - of '[': - addx() - inc(brackets) - of ']': - addx() - if brackets > 0: dec(brackets) - of '?': - addx() - if s[i] == '<': - addx() - while s[i] != '>' and s[i] != '\0': addx() - of '\\': - addx() - if s[i] in strutils.Digits: - while s[i] in strutils.Digits: addx() - else: - addx() - else: addx() - -iterator walkDirBasic(dir: string, wopt: WalkOptPat): string = +iterator walkDirBasic(dir: string, walkOptC: WalkOptComp[Pattern]): string = var dirStack = @[dir] # stack of directories var timeFiles = newSeq[(times.Time, string)]() while dirStack.len > 0: @@ -585,17 +636,17 @@ iterator walkDirBasic(dir: string, wopt: WalkOptPat): string = for kind, path in walkDir(d): case kind of pcFile: - if path.hasRightFileName(wopt): + if path.hasRightFileName(walkOptC): files.add(path) of pcLinkToFile: - if optFollow in options and path.hasRightFileName(wopt): + if optFollow in options and path.hasRightFileName(walkOptC): files.add(path) of pcDir: - if optRecursive in options and path.hasRightDirectory(wopt): + if optRecursive in options and path.hasRightDirectory(walkOptC): dirs.add path of pcLinkToDir: if optFollow in options and optRecursive in options and - path.hasRightDirectory(wopt): + path.hasRightDirectory(walkOptC): dirs.add path if sortTime: # sort by time - collect files before yielding for file in files: @@ -612,59 +663,19 @@ iterator walkDirBasic(dir: string, wopt: WalkOptPat): string = for (_, file) in timeFiles: yield file -template withPattern2(initPattern: string, - finalPattern: untyped, # either Peg or Regex - body1: untyped, # the Peg block - body2: untyped) = # the Regex block - var pattern = initPattern - if optRegex notin options: - if optWord in options: - pattern = r"(^ / !\letter)(" & pattern & r") !\letter" - if optIgnoreStyle in options: - pattern = "\\y " & pattern - elif optIgnoreCase in options: - pattern = "\\i " & pattern - var finalPattern = peg(pattern) - body1 - else: - var reflags = {reStudy} - if optIgnoreStyle in options: - pattern = styleInsensitive(pattern) - if optWord in options: - # see https://github.com/nim-lang/Nim/issues/13528#issuecomment-592786443 - pattern = r"(^|\W)(:?" & pattern & r")($|\W)" - if {optIgnoreCase, optIgnoreStyle} * options != {}: - reflags.incl reIgnoreCase - var finalPattern = if optRex in options: rex(pattern, reflags) - else: re(pattern, reflags) - body2 - -template withPattern(initPattern: string, finalPattern: untyped, body) = - # use the same body for Peg and Regex - withPattern2(initPattern, finalPattern, body, body) - iterator walkRec(paths: seq[string]): (string, string) = - var wopt: WalkOptPat - for pat in opt.excludeFile: - withPattern2(pat, finalPattern, - wopt.excludeFileP.add finalPattern, - wopt.excludeFileR.add finalPattern) - for pat in opt.includeFile: - withPattern2(pat, finalPattern, - wopt.includeFileP.add finalPattern, - wopt.includeFileR.add finalPattern) - for pat in opt.excludeDir: - withPattern2(pat, finalPattern, - wopt.excludeDirP.add finalPattern, - wopt.excludeDirR.add finalPattern) - for path in paths: - if dirExists(path): - for p in walkDirBasic(path, wopt): - yield ("", p) - elif fileExists(path): - yield ("", path) - else: - yield ("Error: no such file or directory: ", path) + declareCompiledPatterns(walkOptC, WalkOptComp): + walkOptC.excludeFile.add walkOpt.excludeFile.compileArray() + walkOptC.includeFile.add walkOpt.includeFile.compileArray() + walkOptC.excludeDir.add walkOpt.excludeDir.compileArray() + for path in paths: + if dirExists(path): + for p in walkDirBasic(path, walkOptC): + yield ("", p) + elif fileExists(path): + yield ("", path) + else: + yield ("Error: no such file or directory: ", path) template printResult(filename: string, body: untyped) = var filenameShown = false @@ -695,7 +706,7 @@ proc replaceMatches(filename: string, buffer: string, outpSeq: seq[Output]) = if output.kind in {GroupFirstMatch, GroupNextMatch}: #let r = replace(curMi.match, pattern, replacement % matches) #TODO let curMi = output.match - let r = replace(curMi.match, pattern, replacement) + let r = replace(curMi.match, searchOpt.pattern, replacement) if replace1match(si, curMi, i, r, newBuf, lineRepl): changed = true i = curMi.last + 1 @@ -709,40 +720,70 @@ proc replaceMatches(filename: string, buffer: string, outpSeq: seq[Output]) = printError "cannot open file for overwriting: " & filename inc(gVar.errors) -proc run1Thread(initPattern: string) = - withPattern(initPattern, finalPattern): - for (err, filename) in walkRec(paths): - if err != "": - inc(gVar.errors) - printError (err & filename) - continue - if optReplace notin options: - printResult(filename, processFile(finalPattern, filename)) - else: - var matches = newSeq[Output]() - var buffer = "" - - for output in processFile(finalPattern, filename, yieldContents=true): - updateCounters(output) - case output.kind - of Rejected, OpenError, JustCount: discard - of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) - of FileContents: buffer = output.buffer - if matches.len > 0: - replaceMatches(filename, buffer, matches) - -proc worker(initPattern: string) {.thread.} = - withPattern(initPattern, finalPattern): +proc run1Thread() = + declareCompiledPatterns(searchOptC, SearchOptComp): + searchOptC.pattern = searchOpt.pattern.compile() + searchOptC.checkMatch = searchOpt.checkMatch.compile() + searchOptC.checkNoMatch = searchOpt.checkNoMatch.compile() + for (err, filename) in walkRec(paths): + if err != "": + inc(gVar.errors) + printError (err & filename) + continue + if optReplace notin options: + printResult(filename, processFile(searchOptC, filename)) + else: + var matches = newSeq[Output]() + var buffer = "" + + for output in processFile(searchOptC, filename, yieldContents=true): + updateCounters(output) + case output.kind + of Rejected, OpenError, JustCount: discard + of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) + of FileContents: buffer = output.buffer + if matches.len > 0: + replaceMatches(filename, buffer, matches) + +# Multi-threaded version: all printing is being done in the Main thread. +# Totally nWorkers+1 additional threads are created (workers + pathProducer). +# An example of nWorkers=2: +# +# ------------------ initial paths ------------------- +# | Main thread |----------------->| pathProducer | +# ------------------ ------------------- +# ^ | | +# resultsChan | | | searchRequestsChan +# | number of files | -----+----- +# ----+--------------------------- | | +# | | (when walking finished) |a path |a path to file +# | | | | +# | | V V +# | | ------------ ------------ +# | | | worker 1 | | worker 2 | +# | | ------------ ------------ +# | | matches in the file | | +# | -------------------------------- | +# | matches in the file | +# ---------------------------------------------- + +proc worker(initSearchOpt: SearchOpt) {.thread.} = + searchOpt = initSearchOpt # init thread-local var + declareCompiledPatterns(searchOptC, SearchOptComp): + searchOptC.pattern = searchOpt.pattern.compile() + searchOptC.checkMatch = searchOpt.checkMatch.compile() + searchOptC.checkNoMatch = searchOpt.checkNoMatch.compile() while true: let (fileNo, filename) = searchRequestsChan.recv() var fileResult = newSeq[Output](); - for output in processFile(finalPattern, filename, yieldContents=(optReplace in options)): + for output in processFile(searchOptC, filename, + yieldContents=(optReplace in options)): fileResult.add(output) resultsChan.send((false, fileNo, filename, move(fileResult))) proc pathProducer(arg: (seq[string], WalkOpt)) {.thread.} = let paths = arg[0] - opt = arg[1] # init thread-local copy of opt + walkOpt = arg[1] # init thread-local copy of opt var nextFileN = 0 for (err, filename) in walkRec(paths): @@ -754,18 +795,18 @@ proc pathProducer(arg: (seq[string], WalkOpt)) {.thread.} = nextFileN += 1 resultsChan.send((true, nextFileN, "", @[])) -proc runMultiThread(pattern: string) = +proc runMultiThread() = var - workers = newSeq[Thread[string]](nWorkers) + workers = newSeq[Thread[SearchOpt]](nWorkers) storage = newTable[int, (string, seq[Output]) ]() # file number -> accumulated result firstUnprocessedFile = 0 open(searchRequestsChan) open(resultsChan) for n in 0 ..< nWorkers: - createThread(workers[n], worker, pattern) + createThread(workers[n], worker, searchOpt) var producerThread: Thread[(seq[string], WalkOpt)] - createThread(producerThread, pathProducer, (paths, opt)) + createThread(producerThread, pathProducer, (paths, walkOpt)) template process1result(fileNo: int, fname: string, fileResult: seq[Output]) = storage[fileNo] = (fname, fileResult) var outpSeq: seq[Output] @@ -797,11 +838,11 @@ proc runMultiThread(pattern: string) = else: process1result(msg.fileNo, msg.filename, msg.fileResult) -proc run(pattern: string) = +proc run() = if nWorkers == 0: - run1Thread(pattern) + run1Thread() else: - runMultiThread(pattern) + runMultiThread() proc reportError(msg: string) = printError "Error: " & msg @@ -830,8 +871,8 @@ for kind, key, val in getopt(): of cmdArgument: if options.contains(optStdin): paths.add(key) - elif pattern.len == 0: - pattern = key + elif searchOpt.pattern.len == 0: + searchOpt.pattern = key elif options.contains(optReplace) and replacement.len == 0: replacement = key else: @@ -855,41 +896,44 @@ for kind, key, val in getopt(): of "confirm": incl(options, optConfirm) of "stdin": incl(options, optStdin) of "word", "w": incl(options, optWord) - of "ignorecase", "i": incl(options, optIgnoreCase) - of "ignorestyle", "y": incl(options, optIgnoreStyle) + of "ignorecase", "ignore-case", "i": incl(options, optIgnoreCase) + of "ignorestyle", "ignore-style", "y": incl(options, optIgnoreStyle) of "nworkers", "n": if val == "": nWorkers = countProcessors() else: nWorkers = parseInt(val) - of "ext": opt.extensions.add val.split('|') - of "noext": opt.skipExtensions.add val.split('|') - of "excludedir", "exclude-dir": opt.excludeDir.add val - of "includefile", "include-file": opt.includeFile.add val - of "excludefile", "exclude-file": opt.excludeFile.add val + of "ext": walkOpt.extensions.add val.split('|') + of "noext", "no-ext": walkOpt.skipExtensions.add val.split('|') + of "excludedir", "exclude-dir": walkOpt.excludeDir.add val + of "includefile", "include-file": walkOpt.includeFile.add val + of "excludefile", "exclude-file": walkOpt.excludeFile.add val + of "match", "m": searchOpt.checkMatch = val + of "nomatch", "notmatch", "not-match", "no-match": + searchOpt.checkNoMatch = val of "bin": case val - of "no": checkBin = biNo - of "yes": checkBin = biYes - of "only": checkBin = biOnly + of "no": searchOpt.checkBin = biNo + of "yes": searchOpt.checkBin = biYes + of "only": searchOpt.checkBin = biOnly else: reportError("unknown value for --bin") - of "text", "t": checkBin = biNo + of "text", "t": searchOpt.checkBin = biNo of "count": justCount = true - of "sorttime", "s": + of "sorttime", "sort-time", "s": sortTime = true case normalize(val) of "": discard of "asc", "ascending": sortTimeOrder = SortOrder.Ascending of "desc", "descending": sortTimeOrder = SortOrder.Descending else: reportError("invalid value '" & val & "' for --sortTime") - of "nocolor": useWriteStyled = false + of "nocolor", "no-color": useWriteStyled = false of "color": case val of "auto": discard of "never", "false": useWriteStyled = false of "", "always", "true": useWriteStyled = true else: reportError("invalid value '" & val & "' for --color") - of "colortheme": + of "colortheme", "color-theme": colortheme = normalize(val) if colortheme notin ["simple", "bnw", "ack", "gnu"]: reportError("unknown colortheme '" & val & "'") @@ -930,17 +974,17 @@ linesBefore = max(linesBefore, linesContext) linesAfter = max(linesAfter, linesContext) if optStdin in options: - pattern = ask("pattern [ENTER to exit]: ") - if pattern.len == 0: quit(0) + searchOpt.pattern = ask("pattern [ENTER to exit]: ") + if searchOpt.pattern.len == 0: quit(0) if optReplace in options: replacement = ask("replacement [supports $1, $# notations]: ") -if pattern.len == 0: +if searchOpt.pattern.len == 0: reportError("empty pattern was given") else: if paths.len == 0: paths.add(os.getCurrentDir()) - run(pattern) + run() if gVar.errors != 0: printError $gVar.errors & " errors" stdout.write($gVar.matches & " matches\n") From 0f34c6a83eb2cb3d2e424384a56be7ebe767f9d0 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sat, 10 Oct 2020 19:23:16 +0300 Subject: [PATCH 06/20] add --includeDir option --- tools/nimgrep.nim | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index f8ef72ca47b60..bc646014f4105 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -38,14 +38,15 @@ Options: empty one ("--ext") means files with missing extension --noExt:EX1|... exclude files having given extension(s), use empty one to skip files with no extension (like some binary files are) - --includeFile:PAT include only files whose names match the given PATttern + --includeFile:PAT search only files whose names match the given PATttern --excludeFile:PAT skip files whose names match the given pattern PAT + --includeDir:PAT search only files with full directory name matching PAT --excludeDir:PAT skip directories whose names match the given pattern PAT --match:PAT, -m:PAT select files containing a (not displayed) match of PAT --noMatch:PAT select files not containing any match of PAT --bin:yes|no|only process binary files? (detected by \0 in first 1K bytes) --text, -t process only text files, the same as --bin:no - --count just count number of matches + --count only print counts of matches for files that matched --nocolor output will be given without any colours --color[:always] force color even if output is redirected --colorTheme:THEME select color THEME from 'simple' (default), @@ -105,10 +106,12 @@ type skipExtensions: seq[string] excludeFile: seq[string] includeFile: seq[string] + includeDir : seq[string] excludeDir : seq[string] WalkOptComp[Pat] = tuple # a compiled version of the previous excludeFile: seq[Pat] includeFile: seq[Pat] + includeDir : seq[Pat] excludeDir : seq[Pat] SearchOpt = tuple # used for searching inside a file pattern: string @@ -618,6 +621,14 @@ proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool = if not matched: return false for pat in walkOptC.excludeFile: if filename.match(pat): return false + let dirname = path.parentDir + if walkOptC.includeDir.len != 0: + var matched = false + for pat in walkOptC.includeDir: + if dirname.match(pat): + matched = true + break + if not matched: return false result = true proc hasRightDirectory(path: string, walkOptC: WalkOptComp[Pattern]): bool = @@ -667,6 +678,7 @@ iterator walkRec(paths: seq[string]): (string, string) = declareCompiledPatterns(walkOptC, WalkOptComp): walkOptC.excludeFile.add walkOpt.excludeFile.compileArray() walkOptC.includeFile.add walkOpt.includeFile.compileArray() + walkOptC.includeDir.add walkOpt.includeDir.compileArray() walkOptC.excludeDir.add walkOpt.excludeDir.compileArray() for path in paths: if dirExists(path): @@ -906,6 +918,7 @@ for kind, key, val in getopt(): of "ext": walkOpt.extensions.add val.split('|') of "noext", "no-ext": walkOpt.skipExtensions.add val.split('|') of "excludedir", "exclude-dir": walkOpt.excludeDir.add val + of "includedir", "include-dir": walkOpt.includeDir.add val of "includefile", "include-file": walkOpt.includeFile.add val of "excludefile", "exclude-file": walkOpt.excludeFile.add val of "match", "m": searchOpt.checkMatch = val From e9fe301b9a671158a9c93996b91b3ffbff808d21 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Thu, 15 Oct 2020 18:39:10 +0300 Subject: [PATCH 07/20] add --limit (-m) and --onlyAscii (-o) options --- tools/nimgrep.nim | 420 +++++++++++++++++++++++++++++++++------------- 1 file changed, 302 insertions(+), 118 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index bc646014f4105..5084bb4f29d41 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -61,6 +61,9 @@ Options: -s[:desc|asc] - descending (default) or ascending --group, -g group matches by file --newLine, -l display every matching line starting from a new line + --limit[:N], -m[:N] limit max width of lines from files by N characters (80) + --onlyAscii, -o use only printable ASCII Latin characters 0x20-0x7E + (substitutions: 0 -> @, 1-0x1F -> A-_, 0x7F-0xFF -> !) --verbose be verbose: list every processed file --filenames find the pattern in the filenames, not in the contents of the file @@ -68,11 +71,37 @@ Options: --version, -v shows the version """ +# Search results for a file are modelled by these levels: +# FileResult -> Block -> Output/Chunk -> SubLine +# +# 1. SubLine is an entire line or its part. +# +# 2. Chunk, which is a sequence of SubLine, represents a match and its +# surrounding context. +# Output is a Chunk or one of auxiliary results like an OpenError. +# +# 3. Block, which is a sequence of Chunks, is not present as a separate type. +# It will just be separated from another Block by newline when there is +# more than 3 lines in it. +# Here is an example of a Block where only 1 match is found and +# 1 line before and 1 line after of context are required: +# +# ...a_line_before...................................... << 0: lineHeader(filename, mi.lineBeg + i, isMatch = true) - writeColored(l) - if i < lines.len - 1: - stdout.write("\n") + if curCol.terminal < limitChar: + writeColored(l) + else: + curCol.overflowMatches += 1 + if i < sLines.len - 1: + newLn(curCol) + curCol.terminal += mi.match.len + curCol.file += mi.match.len + +let ellipsis = "..." -proc getLinesBefore(si: SearchInfo, curMi: MatchInfo): string = +proc reserveChars(mi: MatchInfo): int = + if optLimitChars notin options: + result = 0 + else: + let patternChars = afterPattern(mi.match, 0) + 1 + let padding = 3 + result = patternChars + ellipsis.len + padding + +proc printRaw(c: char, curCol: var Column, allowTabs = true) = + # print taking into account tabs and optOnlyAscii + if c == '\t': + if allowTabs: + let spaces = 8 - (curCol.file mod 8) + curCol.file += spaces + curCol.terminal += spaces + if optOnlyAscii: + printSpecial " " + stdout.write " ".repeat(spaces-1) + else: + stdout.write " ".repeat(spaces) + else: + curCol.file += 1 + curCol.terminal += 1 + if optOnlyAscii: + printSpecial " " + else: + stdout.write " " + elif not optOnlyAscii or (0x20 <= int(c) and int(c) <= 0x7e): + stdout.write c + curCol.file += 1 + curCol.terminal += 1 + else: # substitute characters that are not ACSII Latin + let substitute = + if int(c) < 0x20: + char(int(c) + 0x40) # use common "control codes" + else: '!' + printSpecial $substitute + curCol.file += 1 + curCol.terminal += 1 + +proc calcTabLen(s: string, chars: int, fromLeft: bool): int = + if chars < 0: + return 0 + var col = 0 + var first, last: int + if fromLeft: + first = max(0, s.len - chars) + last = s.len - 1 + else: + first = 0 + last = min(s.len - 1, chars - 1) + for c in s[first .. last]: + if c == '\t': + result += 8 - (col mod 8) - 1 + col += 8 - (col mod 8) + +proc printCropped(s: string, curCol: var Column, fromLeft: bool) = + let eL = ellipsis.len + let charsAllowed = limitChar - curCol.terminal + let tabLen = calcTabLen(s, charsAllowed, fromLeft) + if s.len + tabLen <= charsAllowed: + for c in s: + printRaw(c, curCol) + elif charsAllowed <= eL: + if curCol.overflowMatches == 0: + printBold ellipsis + curCol.terminal += eL + else: + if fromLeft: + printBold ellipsis + curCol.terminal += 3 + # don't expand tabs when cropped from left + let first = max(0, s.len - (charsAllowed - eL)) + for c in s[first .. s.len - 1]: + printRaw(c, curCol, allowTabs=false) + else: + let last = min(s.len - 1, charsAllowed - eL - 1) + for c in s[0 .. last]: + printRaw(c, curCol, allowTabs=true) + if curCol.terminal >= limitChar - eL: + break + printBold ellipsis + curCol.terminal += 3 + +proc getSubLinesBefore(si: SearchInfo, curMi: MatchInfo): string = let first = beforePattern(si.buf, curMi.first-1, linesBefore+1) result = substr(si.buf, first, curMi.first-1) -proc printLinesBefore(filename: string, beforeMatch: string, lineBeg: int, replMode=false) = +proc printSubLinesBefore(filename: string, beforeMatch: string, lineBeg: int, + curCol: var Column, reserveChars: int, replMode=false) = # start block: print 'linesBefore' lines before current match `curMi` - let lines = splitLines(beforeMatch) - let startLine = lineBeg - lines.len + 1 + let sLines = splitLines(beforeMatch) + let startLine = lineBeg - sLines.len + 1 blockHeader(filename, lineBeg, replMode=replMode) - for i, l in lines: - lineHeader(filename, startLine + i, isMatch = (i == lines.len - 1)) - stdout.write(l) - if i < lines.len - 1: - stdout.write("\n") - -proc getLinesAfter(si: SearchInfo, mi: MatchInfo): string = + for i, l in sLines: + let isLastLine = i == sLines.len - 1 + lineHeader(filename, startLine + i, isMatch = isLastLine) + if isLastLine: limitChar -= reserveChars + l.printCropped(curCol, fromLeft = isLastLine) + if isLastLine: limitChar += reserveChars + if not isLastLine: + newLn(curCol) + +proc getSubLinesAfter(si: SearchInfo, mi: MatchInfo): string = let last = afterPattern(si.buf, mi.last+1, 1+linesAfter) result = substr(si.buf, mi.last+1, last) -proc printLinesAfter(filename: string, afterMatch: string, matchLineEnd: int) = +proc printSubLinesAfter(filename: string, afterMatch: string, matchLineEnd: int, + curCol: var Column) = # finish block: print 'linesAfter' lines after match `mi` - let lines = splitLines(afterMatch) - if lines.len == 0: # EOF - stdout.write("\n") + let sLines = splitLines(afterMatch) + if sLines.len == 0: # EOF + newLn(curCol) else: - stdout.write(lines[0]) # complete the line after match itself - stdout.write("\n") + sLines[0].printCropped(curCol, fromLeft = false) + # complete the line after the match itself + newLn(curCol) #let skipLine = # workaround posix line ending at the end of file # if last == s.len-1 and s.len >= 2 and s[^1] == '\l' and s[^2] != '\c': 1 # else: 0 let skipLine = 0 - for i in 1 ..< lines.len - skipLine: + for i in 1 ..< sLines.len - skipLine: lineHeader(filename, matchLineEnd + i, isMatch = false) - stdout.write(lines[i]) - stdout.write("\n") - if linesAfter + linesBefore >= 2 and not newLine: stdout.write("\n") + sLines[i].printCropped(curCol, fromLeft = false) + newLn(curCol) -proc getLinesBetween(si: SearchInfo, prevMi: MatchInfo, curMi: MatchInfo): string = +proc getSubLinesBetween(si: SearchInfo, prevMi: MatchInfo, + curMi: MatchInfo): string = si.buf.substr(prevMi.last+1, curMi.first-1) -proc printBetweenMatches(filename: string, betweenMatches: string, lastLineBeg: int) = +proc printBetweenMatches(filename: string, betweenMatches: string, + lastLineBeg: int, + curCol: var Column, reserveChars: int) = # continue block: print between `prevMi` and `curMi` - let lines = betweenMatches.splitLines() - stdout.write(lines[0]) # finish the line of previous Match - if lines.len > 1: - stdout.write("\n") - for i in 1 ..< lines.len: - lineHeader(filename, lastLineBeg - lines.len + i + 1, - isMatch = (i == lines.len - 1)) - stdout.write(lines[i]) - if i < lines.len - 1: - stdout.write("\n") + let sLines = betweenMatches.splitLines() + sLines[0].printCropped(curCol, fromLeft = false) + # finish the line of previous Match + if sLines.len > 1: + newLn(curCol) + for i in 1 ..< sLines.len: + let isLastLine = i == sLines.len - 1 + lineHeader(filename, lastLineBeg - sLines.len + i + 1, + isMatch = isLastLine) + if isLastLine: limitChar -= reserveChars + sLines[i].printCropped(curCol, fromLeft = isLastLine) + if isLastLine: limitChar += reserveChars + if not isLastLine: + newLn(curCol) proc printReplacement(si: SearchInfo, mi: MatchInfo, repl: string, showRepl: bool, curPos: int, newBuf: string, curLine: int) = let filename = si.fileName - printLinesBefore(fileName, getLinesBefore(si, mi), mi.lineBeg) - printMatch(fileName, mi) - printLinesAfter(fileName, getLinesAfter(si, mi), mi.lineEnd) + var curCol: Column + printSubLinesBefore(fileName, getSubLinesBefore(si, mi), mi.lineBeg, + curCol, reserveChars(mi)) + printMatch(fileName, mi, curCol) + printSubLinesAfter(fileName, getSubLinesAfter(si, mi), mi.lineEnd, curCol) stdout.flushFile() if showRepl: let newSi: SearchInfo = (buf: newBuf, filename: filename) let miForNewBuf: MatchInfo = (first: newBuf.len, last: newBuf.len, lineBeg: curLine, lineEnd: curLine, match: "") - printLinesBefore(fileName, getLinesBefore(newSi, miForNewBuf), miForNewBuf.lineBeg, replMode=true) + printSubLinesBefore(fileName, getSubLinesBefore(newSi, miForNewBuf), + miForNewBuf.lineBeg, curCol, reserveChars(miForNewBuf), + replMode=true) let replLines = countLineBreaks(repl, 0, repl.len-1) let miFixLines: MatchInfo = (first: mi.first, last: mi.last, lineBeg: curLine, lineEnd: curLine + replLines, match: repl) - printMatch(fileName, miFixLines) - printLinesAfter(fileName, getLinesAfter(si, miFixLines), miFixLines.lineEnd) + printMatch(fileName, miFixLines, curCol) + printSubLinesAfter(fileName, getSubLinesAfter(si, miFixLines), + miFixLines.lineEnd, curCol) stdout.flushFile() proc replace1match(si: SearchInfo, mi: MatchInfo, i: int, r: string; @@ -410,12 +570,12 @@ proc replace1match(si: SearchInfo, mi: MatchInfo, i: int, r: string; template updateCounters(output: Output) = case output.kind - of GroupFirstMatch, GroupNextMatch: inc(gVar.matches) + of BlockFirstMatch, BlockNextMatch: inc(gVar.matches) of JustCount: inc(gVar.matches, output.matches) of OpenError: inc(gVar.errors) - of Rejected, GroupEnd, FileContents: discard + of Rejected, BlockEnd, FileContents: discard -proc printOutput(filename: string, output: Output) = +proc printOutput(filename: string, output: Output, curCol: var Column) = case output.kind of OpenError: printError("can not open path " & filename & " " & output.msg) @@ -423,15 +583,24 @@ proc printOutput(filename: string, output: Output) = of JustCount: echo " (" & $output.matches & " matches)" of FileContents: discard # impossible - of GroupFirstMatch: - printLinesBefore(filename, output.pre, output.match.lineBeg) - printMatch(filename, output.match) + of BlockFirstMatch: + printSubLinesBefore(filename, output.pre, output.match.lineBeg, + curCol, reserveChars(output.match)) + printMatch(filename, output.match, curCol) #flush: TODO - of GroupNextMatch: - printBetweenMatches(filename, output.pre, output.match.lineBeg) - printMatch(filename, output.match) - of GroupEnd: - printLinesAfter(filename, output.groupEnding, output.firstLine) + of BlockNextMatch: + printBetweenMatches(filename, output.pre, output.match.lineBeg, + curCol, reserveChars(output.match)) + printMatch(filename, output.match, curCol) + of BlockEnd: + printSubLinesAfter(filename, output.blockEnding, output.firstLine, curCol) + if curCol.overflowMatches > 0: + # overflowed matches are shown for the entire Block after last match + lineHeader(filename, output.firstLine, isMatch = true) + printBold("(" & $curCol.overflowMatches & " more matches skipped)") + stdout.write("\n") + curCol.overflowMatches = 0 + if linesAfter + linesBefore >= 2 and not newLine: stdout.write("\n") iterator searchFile(pattern: Pattern; filename: string; buffer: string): Output = let si: SearchInfo = (buf: buffer, filename: filename) @@ -444,8 +613,8 @@ iterator searchFile(pattern: Pattern; filename: string; buffer: string): Output let t = findBounds(buffer, pattern, matches, i) if t.first < 0 or t.last < t.first: if prevMi.lineBeg != 0: # finalize last match - yield Output(kind: GroupEnd, - groupEnding: getLinesAfter(si, prevMi), + yield Output(kind: BlockEnd, + blockEnding: getSubLinesAfter(si, prevMi), firstLine: prevMi.lineEnd) break @@ -456,21 +625,21 @@ iterator searchFile(pattern: Pattern; filename: string; buffer: string): Output lineEnd: lineBeg + countLineBreaks(buffer, t.first, t.last), match: buffer.substr(t.first, t.last)) if prevMi.lineBeg == 0: # no prev. match, so no prev. block to finalize - yield Output(kind: GroupFirstMatch, - pre: getLinesBefore(si, curMi), + yield Output(kind: BlockFirstMatch, + pre: getSubLinesBefore(si, curMi), match: curMi) else: let nLinesBetween = curMi.lineBeg - prevMi.lineEnd if nLinesBetween <= linesAfter + linesBefore + 1: # print as 1 block - yield Output(kind: GroupNextMatch, - pre: getLinesBetween(si, prevMi, curMi), + yield Output(kind: BlockNextMatch, + pre: getSubLinesBetween(si, prevMi, curMi), match: curMi) else: # finalize previous block and then print next block - yield Output(kind: GroupEnd, - groupEnding: getLinesAfter(si, prevMi), + yield Output(kind: BlockEnd, + blockEnding: getSubLinesAfter(si, prevMi), firstLine: prevMi.lineEnd) - yield Output(kind: GroupFirstMatch, - pre: getLinesBefore(si, curMi), + yield Output(kind: BlockFirstMatch, + pre: getSubLinesBefore(si, curMi), match: curMi) i = t.last+1 @@ -541,20 +710,24 @@ proc compileRegex(initPattern: string): Regex = template declareCompiledPatterns(compiledStruct: untyped, StructType: untyped, body: untyped) = + {.hint[XDeclaredButNotUsed]: off.} if optRegex notin options: var compiledStruct: StructType[Peg] - proc compile(p: string): Peg = p.compilePeg() + template compile1Pattern(p: string, pat: Peg) = + if p!="": pat = p.compilePeg() proc compileArray(initPattern: seq[string]): seq[Peg] = for pat in initPattern: result.add pat.compilePeg() body else: var compiledStruct: StructType[Regex] - proc compile(p: string): Regex = p.compileRegex() + template compile1Pattern(p: string, pat: Regex) = + if p!="": pat = p.compileRegex() proc compileArray(initPattern: seq[string]): seq[Regex] = for pat in initPattern: result.add pat.compileRegex() body + {.hint[XDeclaredButNotUsed]: on.} iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, yieldContents=false): Output = var buffer: string @@ -593,7 +766,7 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, yield if not justCount: yield output else: - if output.kind in {GroupFirstMatch, GroupNextMatch}: + if output.kind in {BlockFirstMatch, BlockNextMatch}: inc(cnt) if justCount and cnt > 0: yield Output(kind: JustCount, matches: cnt) @@ -689,7 +862,7 @@ iterator walkRec(paths: seq[string]): (string, string) = else: yield ("Error: no such file or directory: ", path) -template printResult(filename: string, body: untyped) = +template printFileResult(filename: string, body: untyped) = var filenameShown = false template showFilename = if not filenameShown: @@ -699,23 +872,24 @@ template printResult(filename: string, body: untyped) = filenameShown = true if optVerbose in options: showFilename + var curCol: Column for output in body: updateCounters(output) if output.kind notin {Rejected, OpenError, JustCount} and not oneline: showFilename if output.kind == JustCount and oneline: printFile(filename & ":") - printOutput(filename, output) + printOutput(filename, output, curCol) -proc replaceMatches(filename: string, buffer: string, outpSeq: seq[Output]) = +proc replaceMatches(filename: string, buffer: string, fileResult: FileResult) = var newBuf = newStringOfCap(buffer.len) var changed = false var lineRepl = 1 let si: SearchInfo = (buf: buffer, filename: filename) var i = 0 - for output in outpSeq: - if output.kind in {GroupFirstMatch, GroupNextMatch}: + for output in fileResult: + if output.kind in {BlockFirstMatch, BlockNextMatch}: #let r = replace(curMi.match, pattern, replacement % matches) #TODO let curMi = output.match let r = replace(curMi.match, searchOpt.pattern, replacement) @@ -734,32 +908,33 @@ proc replaceMatches(filename: string, buffer: string, outpSeq: seq[Output]) = proc run1Thread() = declareCompiledPatterns(searchOptC, SearchOptComp): - searchOptC.pattern = searchOpt.pattern.compile() - searchOptC.checkMatch = searchOpt.checkMatch.compile() - searchOptC.checkNoMatch = searchOpt.checkNoMatch.compile() - for (err, filename) in walkRec(paths): - if err != "": - inc(gVar.errors) - printError (err & filename) - continue - if optReplace notin options: - printResult(filename, processFile(searchOptC, filename)) - else: - var matches = newSeq[Output]() - var buffer = "" - - for output in processFile(searchOptC, filename, yieldContents=true): - updateCounters(output) - case output.kind - of Rejected, OpenError, JustCount: discard - of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) - of FileContents: buffer = output.buffer - if matches.len > 0: - replaceMatches(filename, buffer, matches) + compile1Pattern(searchOpt.pattern, searchOptC.pattern) + compile1Pattern(searchOpt.checkMatch, searchOptC.checkMatch) + compile1Pattern(searchOpt.checkNoMatch, searchOptC.checkNoMatch) + for (err, filename) in walkRec(paths): + if err != "": + inc(gVar.errors) + printError (err & filename) + continue + if optReplace notin options: + printFileResult(filename, processFile(searchOptC, filename)) + else: + var matches: FileResult + var buffer = "" + + for output in processFile(searchOptC, filename, yieldContents=true): + updateCounters(output) + case output.kind + of Rejected, OpenError, JustCount: discard + of BlockFirstMatch, BlockNextMatch, BlockEnd: + matches.add(output) + of FileContents: buffer = output.buffer + if matches.len > 0: + replaceMatches(filename, buffer, matches) # Multi-threaded version: all printing is being done in the Main thread. # Totally nWorkers+1 additional threads are created (workers + pathProducer). -# An example of nWorkers=2: +# An example of case nWorkers=2: # # ------------------ initial paths ------------------- # | Main thread |----------------->| pathProducer | @@ -778,16 +953,18 @@ proc run1Thread() = # | -------------------------------- | # | matches in the file | # ---------------------------------------------- +# +# The matches from each file are passed at once as FileResult type. proc worker(initSearchOpt: SearchOpt) {.thread.} = searchOpt = initSearchOpt # init thread-local var declareCompiledPatterns(searchOptC, SearchOptComp): - searchOptC.pattern = searchOpt.pattern.compile() - searchOptC.checkMatch = searchOpt.checkMatch.compile() - searchOptC.checkNoMatch = searchOpt.checkNoMatch.compile() + compile1Pattern(searchOpt.pattern, searchOptC.pattern) + compile1Pattern(searchOpt.checkMatch, searchOptC.checkMatch) + compile1Pattern(searchOpt.checkNoMatch, searchOptC.checkNoMatch) while true: let (fileNo, filename) = searchRequestsChan.recv() - var fileResult = newSeq[Output](); + var fileResult: FileResult for output in processFile(searchOptC, filename, yieldContents=(optReplace in options)): fileResult.add(output) @@ -810,7 +987,7 @@ proc pathProducer(arg: (seq[string], WalkOpt)) {.thread.} = proc runMultiThread() = var workers = newSeq[Thread[SearchOpt]](nWorkers) - storage = newTable[int, (string, seq[Output]) ]() + storage = newTable[int, (string, FileResult) ]() # file number -> accumulated result firstUnprocessedFile = 0 open(searchRequestsChan) @@ -819,24 +996,25 @@ proc runMultiThread() = createThread(workers[n], worker, searchOpt) var producerThread: Thread[(seq[string], WalkOpt)] createThread(producerThread, pathProducer, (paths, walkOpt)) - template process1result(fileNo: int, fname: string, fileResult: seq[Output]) = - storage[fileNo] = (fname, fileResult) - var outpSeq: seq[Output] + template process1result(fileNo: int, fname: string, fResult: FileResult) = + storage[fileNo] = (fname, fResult) + var fileResult: FileResult while storage.haskey(firstUnprocessedFile): - outpSeq = storage[firstUnprocessedFile][1] + fileResult = storage[firstUnprocessedFile][1] let filename = storage[firstUnprocessedFile][0] if optReplace notin options: - printResult(filename, outpSeq) + printFileResult(filename, fileResult) else: var buffer = "" - var matches = newSeq[Output]() - for output in outpSeq: + var matches: FileResult + for output in fileResult: updateCounters(output) case output.kind of Rejected, OpenError, JustCount: discard # printError error - of GroupFirstMatch, GroupNextMatch, GroupEnd: matches.add(output) + of BlockFirstMatch, BlockNextMatch, BlockEnd: + matches.add(output) of FileContents: buffer = output.buffer if matches.len > 0: replaceMatches(filename, buffer, matches) @@ -921,7 +1099,7 @@ for kind, key, val in getopt(): of "includedir", "include-dir": walkOpt.includeDir.add val of "includefile", "include-file": walkOpt.includeFile.add val of "excludefile", "exclude-file": walkOpt.excludeFile.add val - of "match", "m": searchOpt.checkMatch = val + of "match": searchOpt.checkMatch = val of "nomatch", "notmatch", "not-match", "no-match": searchOpt.checkNoMatch = val of "bin": @@ -971,6 +1149,11 @@ for kind, key, val in getopt(): of "newline", "l": newLine = true of "oneline": oneline = true of "group", "g": oneline = false + of "limit", "m": + incl(options, optLimitChars) + if val != "": + limitChar = parseInt(val) + of "onlyascii", "only-ascii", "o": optOnlyAscii = true of "verbose": incl(options, optVerbose) of "filenames": incl(options, optFilenames) of "help", "h": writeHelp() @@ -1000,6 +1183,7 @@ else: run() if gVar.errors != 0: printError $gVar.errors & " errors" - stdout.write($gVar.matches & " matches\n") + printBold($gVar.matches & " matches") + stdout.write("\n") if gVar.errors != 0: quit(1) From d7c090ff12200f67816cf8cef88d19cac214ef99 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Thu, 15 Oct 2020 19:05:58 +0300 Subject: [PATCH 08/20] fix performance regression introduced in nimgrep improvements #12779 --- tools/nimgrep.nim | 62 +++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 5084bb4f29d41..8c4043ef5c500 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -108,7 +108,6 @@ type Bin = enum biYes, biOnly, biNo Pattern = Regex | Peg - SearchInfo = tuple[buf: string, filename: string] MatchInfo = tuple[first: int, last: int; lineBeg: int, lineEnd: int, match: string] outputKind = enum @@ -448,9 +447,9 @@ proc printCropped(s: string, curCol: var Column, fromLeft: bool) = printBold ellipsis curCol.terminal += 3 -proc getSubLinesBefore(si: SearchInfo, curMi: MatchInfo): string = - let first = beforePattern(si.buf, curMi.first-1, linesBefore+1) - result = substr(si.buf, first, curMi.first-1) +proc getSubLinesBefore(buf: string, curMi: MatchInfo): string = + let first = beforePattern(buf, curMi.first-1, linesBefore+1) + result = substr(buf, first, curMi.first-1) proc printSubLinesBefore(filename: string, beforeMatch: string, lineBeg: int, curCol: var Column, reserveChars: int, replMode=false) = @@ -467,9 +466,9 @@ proc printSubLinesBefore(filename: string, beforeMatch: string, lineBeg: int, if not isLastLine: newLn(curCol) -proc getSubLinesAfter(si: SearchInfo, mi: MatchInfo): string = - let last = afterPattern(si.buf, mi.last+1, 1+linesAfter) - result = substr(si.buf, mi.last+1, last) +proc getSubLinesAfter(buf: string, mi: MatchInfo): string = + let last = afterPattern(buf, mi.last+1, 1+linesAfter) + result = substr(buf, mi.last+1, last) proc printSubLinesAfter(filename: string, afterMatch: string, matchLineEnd: int, curCol: var Column) = @@ -490,9 +489,9 @@ proc printSubLinesAfter(filename: string, afterMatch: string, matchLineEnd: int, sLines[i].printCropped(curCol, fromLeft = false) newLn(curCol) -proc getSubLinesBetween(si: SearchInfo, prevMi: MatchInfo, +proc getSubLinesBetween(buf: string, prevMi: MatchInfo, curMi: MatchInfo): string = - si.buf.substr(prevMi.last+1, curMi.first-1) + buf.substr(prevMi.last+1, curMi.first-1) proc printBetweenMatches(filename: string, betweenMatches: string, lastLineBeg: int, @@ -513,22 +512,21 @@ proc printBetweenMatches(filename: string, betweenMatches: string, if not isLastLine: newLn(curCol) -proc printReplacement(si: SearchInfo, mi: MatchInfo, repl: string, - showRepl: bool, curPos: int, +proc printReplacement(filename: string, buf: string, mi: MatchInfo, + repl: string, showRepl: bool, curPos: int, newBuf: string, curLine: int) = - let filename = si.fileName + let filename = fileName var curCol: Column - printSubLinesBefore(fileName, getSubLinesBefore(si, mi), mi.lineBeg, + printSubLinesBefore(fileName, getSubLinesBefore(buf, mi), mi.lineBeg, curCol, reserveChars(mi)) printMatch(fileName, mi, curCol) - printSubLinesAfter(fileName, getSubLinesAfter(si, mi), mi.lineEnd, curCol) + printSubLinesAfter(fileName, getSubLinesAfter(buf, mi), mi.lineEnd, curCol) stdout.flushFile() if showRepl: - let newSi: SearchInfo = (buf: newBuf, filename: filename) let miForNewBuf: MatchInfo = (first: newBuf.len, last: newBuf.len, lineBeg: curLine, lineEnd: curLine, match: "") - printSubLinesBefore(fileName, getSubLinesBefore(newSi, miForNewBuf), + printSubLinesBefore(fileName, getSubLinesBefore(newBuf, miForNewBuf), miForNewBuf.lineBeg, curCol, reserveChars(miForNewBuf), replMode=true) @@ -537,16 +535,16 @@ proc printReplacement(si: SearchInfo, mi: MatchInfo, repl: string, (first: mi.first, last: mi.last, lineBeg: curLine, lineEnd: curLine + replLines, match: repl) printMatch(fileName, miFixLines, curCol) - printSubLinesAfter(fileName, getSubLinesAfter(si, miFixLines), + printSubLinesAfter(fileName, getSubLinesAfter(buf, miFixLines), miFixLines.lineEnd, curCol) stdout.flushFile() -proc replace1match(si: SearchInfo, mi: MatchInfo, i: int, r: string; - newBuf: var string, curLine: var int): bool = - newBuf.add(si.buf.substr(i, mi.first-1)) - inc(curLine, countLineBreaks(si.buf, i, mi.first-1)) +proc replace1match(filename: string, buf: string, mi: MatchInfo, i: int, + r: string; newBuf: var string, curLine: var int): bool = + newBuf.add(buf.substr(i, mi.first-1)) + inc(curLine, countLineBreaks(buf, i, mi.first-1)) if optConfirm in options: - printReplacement(si, mi, r, showRepl=true, i, newBuf, curLine) + printReplacement(filename, buf, mi, r, showRepl=true, i, newBuf, curLine) case confirm() of ceAbort: quit(0) of ceYes: gVar.reallyReplace = true @@ -559,7 +557,8 @@ proc replace1match(si: SearchInfo, mi: MatchInfo, i: int, r: string; gVar.reallyReplace = false options.excl(optConfirm) else: - printReplacement(si, mi, r, showRepl=gVar.reallyReplace, i, newBuf, curLine) + printReplacement(filename, buf, mi, r, showRepl=gVar.reallyReplace, i, + newBuf, curLine) if gVar.reallyReplace: result = true newBuf.add(r) @@ -602,8 +601,8 @@ proc printOutput(filename: string, output: Output, curCol: var Column) = curCol.overflowMatches = 0 if linesAfter + linesBefore >= 2 and not newLine: stdout.write("\n") -iterator searchFile(pattern: Pattern; filename: string; buffer: string): Output = - let si: SearchInfo = (buf: buffer, filename: filename) +iterator searchFile(pattern: Pattern; filename: string; + buffer: string): Output = var prevMi, curMi: MatchInfo curMi.lineEnd = 1 var i = 0 @@ -614,7 +613,7 @@ iterator searchFile(pattern: Pattern; filename: string; buffer: string): Output if t.first < 0 or t.last < t.first: if prevMi.lineBeg != 0: # finalize last match yield Output(kind: BlockEnd, - blockEnding: getSubLinesAfter(si, prevMi), + blockEnding: getSubLinesAfter(buffer, prevMi), firstLine: prevMi.lineEnd) break @@ -626,20 +625,20 @@ iterator searchFile(pattern: Pattern; filename: string; buffer: string): Output match: buffer.substr(t.first, t.last)) if prevMi.lineBeg == 0: # no prev. match, so no prev. block to finalize yield Output(kind: BlockFirstMatch, - pre: getSubLinesBefore(si, curMi), + pre: getSubLinesBefore(buffer, curMi), match: curMi) else: let nLinesBetween = curMi.lineBeg - prevMi.lineEnd if nLinesBetween <= linesAfter + linesBefore + 1: # print as 1 block yield Output(kind: BlockNextMatch, - pre: getSubLinesBetween(si, prevMi, curMi), + pre: getSubLinesBetween(buffer, prevMi, curMi), match: curMi) else: # finalize previous block and then print next block yield Output(kind: BlockEnd, - blockEnding: getSubLinesAfter(si, prevMi), + blockEnding: getSubLinesAfter(buffer, prevMi), firstLine: prevMi.lineEnd) yield Output(kind: BlockFirstMatch, - pre: getSubLinesBefore(si, curMi), + pre: getSubLinesBefore(buffer, curMi), match: curMi) i = t.last+1 @@ -886,14 +885,13 @@ proc replaceMatches(filename: string, buffer: string, fileResult: FileResult) = var changed = false var lineRepl = 1 - let si: SearchInfo = (buf: buffer, filename: filename) var i = 0 for output in fileResult: if output.kind in {BlockFirstMatch, BlockNextMatch}: #let r = replace(curMi.match, pattern, replacement % matches) #TODO let curMi = output.match let r = replace(curMi.match, searchOpt.pattern, replacement) - if replace1match(si, curMi, i, r, newBuf, lineRepl): + if replace1match(filename, buffer, curMi, i, r, newBuf, lineRepl): changed = true i = curMi.last + 1 if changed: From 1adac9b8342d7237ef244475fb2b0523396f8c69 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Fri, 16 Oct 2020 01:45:53 +0300 Subject: [PATCH 09/20] better error handling --- tools/nimgrep.nim | 173 ++++++++++++++++++++++------------------------ 1 file changed, 84 insertions(+), 89 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 8c4043ef5c500..cec2f0fed9c71 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -116,7 +116,7 @@ type Output = object case kind: outputKind of OpenError: msg: string - of Rejected: discard + of Rejected: reason: string of JustCount: matches: int of BlockFirstMatch, BlockNextMatch: pre: string @@ -574,13 +574,20 @@ template updateCounters(output: Output) = of OpenError: inc(gVar.errors) of Rejected, BlockEnd, FileContents: discard -proc printOutput(filename: string, output: Output, curCol: var Column) = +proc printInfo(filename:string, output: Output) = case output.kind of OpenError: printError("can not open path " & filename & " " & output.msg) - of Rejected: discard + of Rejected: + if optVerbose in options: + echo "(rejected: ", output.reason, ")" of JustCount: echo " (" & $output.matches & " matches)" + else: discard # impossible + +proc printOutput(filename: string, output: Output, curCol: var Column) = + case output.kind + of OpenError, Rejected, JustCount: printInfo(filename, output) of FileContents: discard # impossible of BlockFirstMatch: printSubLinesBefore(filename, output.pre, output.match.lineBeg, @@ -728,7 +735,8 @@ template declareCompiledPatterns(compiledStruct: untyped, body {.hint[XDeclaredButNotUsed]: on.} -iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, yieldContents=false): Output = +iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, + yieldContents=false): Output = var buffer: string if optFilenames in options: @@ -737,26 +745,31 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, yield try: buffer = system.readFile(filename) except IOError as e: - yield Output(kind: OpenError, msg: e.msg) + yield Output(kind: OpenError, msg: "readFile failed") var reject = false + var reason: string if searchOpt.checkBin in {biNo, biOnly}: let isBin = detectBin(buffer) if isBin and searchOpt.checkBin == biNo: reject = true + reason = "binary file" if (not isBin) and searchOpt.checkBin == biOnly: reject = true + reason = "text file" if not reject: if searchOpt.checkMatch != "": reject = not contains(buffer, searchOptC.checkMatch, 0) + reason = "doesn't contain a requested match" if not reject: if searchOpt.checkNoMatch != "": reject = contains(buffer, searchOptC.checkNoMatch, 0) + reason = "contains a forbidden match" if reject: - yield Output(kind: Rejected) + yield Output(kind: Rejected, reason: reason) else: var found = false var cnt = 0 @@ -861,7 +874,31 @@ iterator walkRec(paths: seq[string]): (string, string) = else: yield ("Error: no such file or directory: ", path) -template printFileResult(filename: string, body: untyped) = +proc replaceMatches(filename: string, buffer: string, fileResult: FileResult) = + var newBuf = newStringOfCap(buffer.len) + + var changed = false + var lineRepl = 1 + var i = 0 + for output in fileResult: + if output.kind in {BlockFirstMatch, BlockNextMatch}: + #let r = replace(curMi.match, pattern, replacement % matches) #TODO + let curMi = output.match + let r = replace(curMi.match, searchOpt.pattern, replacement) + if replace1match(filename, buffer, curMi, i, r, newBuf, lineRepl): + changed = true + i = curMi.last + 1 + if changed: + newBuf.add(substr(buffer, i)) # finalize new buffer after last match + var f: File + if open(f, filename, fmWrite): + f.write(newBuf) + f.close() + else: + printError "cannot open file for overwriting: " & filename + inc(gVar.errors) + +template processFileResult(filename: string, fileResult: untyped) = var filenameShown = false template showFilename = if not filenameShown: @@ -871,38 +908,27 @@ template printFileResult(filename: string, body: untyped) = filenameShown = true if optVerbose in options: showFilename - var curCol: Column - for output in body: - updateCounters(output) - if output.kind notin {Rejected, OpenError, JustCount} and not oneline: - showFilename - if output.kind == JustCount and oneline: - printFile(filename & ":") - printOutput(filename, output, curCol) - -proc replaceMatches(filename: string, buffer: string, fileResult: FileResult) = - var newBuf = newStringOfCap(buffer.len) - - var changed = false - var lineRepl = 1 - var i = 0 - for output in fileResult: - if output.kind in {BlockFirstMatch, BlockNextMatch}: - #let r = replace(curMi.match, pattern, replacement % matches) #TODO - let curMi = output.match - let r = replace(curMi.match, searchOpt.pattern, replacement) - if replace1match(filename, buffer, curMi, i, r, newBuf, lineRepl): - changed = true - i = curMi.last + 1 - if changed: - newBuf.add(substr(buffer, i)) # finalize new buffer after last match - var f: File - if open(f, filename, fmWrite): - f.write(newBuf) - f.close() - else: - printError "cannot open file for overwriting: " & filename - inc(gVar.errors) + if optReplace notin options: + var curCol: Column + for output in fileResult: + updateCounters(output) + if output.kind notin {Rejected, OpenError, JustCount} and not oneline: + showFilename + if output.kind == JustCount and oneline: + printFile(filename & ":") + printOutput(filename, output, curCol) + else: + var buffer = "" + var matches: FileResult + for output in fileResult: + updateCounters(output) + case output.kind + of Rejected, OpenError, JustCount: printInfo(filename, output) + of BlockFirstMatch, BlockNextMatch, BlockEnd: + matches.add(output) + of FileContents: buffer = output.buffer + if matches.len > 0: + replaceMatches(filename, buffer, matches) proc run1Thread() = declareCompiledPatterns(searchOptC, SearchOptComp): @@ -914,21 +940,9 @@ proc run1Thread() = inc(gVar.errors) printError (err & filename) continue - if optReplace notin options: - printFileResult(filename, processFile(searchOptC, filename)) - else: - var matches: FileResult - var buffer = "" - - for output in processFile(searchOptC, filename, yieldContents=true): - updateCounters(output) - case output.kind - of Rejected, OpenError, JustCount: discard - of BlockFirstMatch, BlockNextMatch, BlockEnd: - matches.add(output) - of FileContents: buffer = output.buffer - if matches.len > 0: - replaceMatches(filename, buffer, matches) + processFileResult(filename, + processFile(searchOptC, filename, + yieldContents=optReplace in options)) # Multi-threaded version: all printing is being done in the Main thread. # Totally nWorkers+1 additional threads are created (workers + pathProducer). @@ -980,57 +994,35 @@ proc pathProducer(arg: (seq[string], WalkOpt)) {.thread.} = resultsChan.send((false, nextFileN, filename, @[Output(kind: OpenError, msg: err)])) nextFileN += 1 - resultsChan.send((true, nextFileN, "", @[])) + resultsChan.send((true, nextFileN, "", @[])) # pass total number of files proc runMultiThread() = var workers = newSeq[Thread[SearchOpt]](nWorkers) storage = newTable[int, (string, FileResult) ]() - # file number -> accumulated result - firstUnprocessedFile = 0 + # file number -> tuple[filename, fileResult - accumulated data structure] + firstUnprocessedFile = 0 # for always processing files in the same order open(searchRequestsChan) open(resultsChan) for n in 0 ..< nWorkers: createThread(workers[n], worker, searchOpt) var producerThread: Thread[(seq[string], WalkOpt)] createThread(producerThread, pathProducer, (paths, walkOpt)) - template process1result(fileNo: int, fname: string, fResult: FileResult) = - storage[fileNo] = (fname, fResult) - var fileResult: FileResult - while storage.haskey(firstUnprocessedFile): - fileResult = storage[firstUnprocessedFile][1] - let filename = storage[firstUnprocessedFile][0] - if optReplace notin options: - printFileResult(filename, fileResult) - else: - var buffer = "" - - var matches: FileResult - for output in fileResult: - updateCounters(output) - case output.kind - of Rejected, OpenError, JustCount: discard - # printError error - of BlockFirstMatch, BlockNextMatch, BlockEnd: - matches.add(output) - of FileContents: buffer = output.buffer - if matches.len > 0: - replaceMatches(filename, buffer, matches) - storage.del(firstUnprocessedFile) - firstUnprocessedFile += 1 + template add1fileResult(fileNo: int, fname: string, fResult: FileResult) = + storage[fileNo] = (fname, fResult) + while storage.haskey(firstUnprocessedFile): + let fileResult = storage[firstUnprocessedFile][1] + let filename = storage[firstUnprocessedFile][0] + processFileResult(filename, fileResult) + storage.del(firstUnprocessedFile) + firstUnprocessedFile += 1 var totalFiles = -1 # will be known when pathProducer finishes while totalFiles == -1 or firstUnprocessedFile < totalFiles: let msg = resultsChan.recv() if msg.finished: totalFiles = msg.fileNo else: - process1result(msg.fileNo, msg.filename, msg.fileResult) - -proc run() = - if nWorkers == 0: - run1Thread() - else: - runMultiThread() + add1fileResult(msg.fileNo, msg.filename, msg.fileResult) proc reportError(msg: string) = printError "Error: " & msg @@ -1178,7 +1170,10 @@ if searchOpt.pattern.len == 0: else: if paths.len == 0: paths.add(os.getCurrentDir()) - run() + if nWorkers == 0: + run1Thread() + else: + runMultiThread() if gVar.errors != 0: printError $gVar.errors & " errors" printBold($gVar.matches & " matches") From ecaa4e0063f9ca8807954eb924fff8be40251282 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Fri, 16 Oct 2020 20:46:45 +0300 Subject: [PATCH 10/20] add option --fit --- tools/nimgrep.nim | 90 ++++++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index cec2f0fed9c71..fa6d3467a3dde 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -42,7 +42,8 @@ Options: --excludeFile:PAT skip files whose names match the given pattern PAT --includeDir:PAT search only files with full directory name matching PAT --excludeDir:PAT skip directories whose names match the given pattern PAT - --match:PAT, -m:PAT select files containing a (not displayed) match of PAT + --if,--ef,--id,--ed abbreviations of 4 options above + --match:PAT select files containing a (not displayed) match of PAT --noMatch:PAT select files not containing any match of PAT --bin:yes|no|only process binary files? (detected by \0 in first 1K bytes) --text, -t process only text files, the same as --bin:no @@ -58,10 +59,11 @@ Options: --context:N, -c:N print N lines of leading context before every match and N lines of trailing context after it --sortTime order files by the last modification time - - -s[:desc|asc] - descending (default) or ascending + -s[:asc|desc] - ascending (default: recent files last) or descending --group, -g group matches by file --newLine, -l display every matching line starting from a new line --limit[:N], -m[:N] limit max width of lines from files by N characters (80) + --fit calculate --limit from terminal width for every line --onlyAscii, -o use only printable ASCII Latin characters 0x20-0x7E (substitutions: 0 -> @, 1-0x1F -> A-_, 0x7F-0xFF -> !) --verbose be verbose: list every processed file @@ -101,7 +103,7 @@ type TOption = enum optFind, optReplace, optPeg, optRegex, optRecursive, optConfirm, optStdin, optWord, optIgnoreCase, optIgnoreStyle, optVerbose, optFilenames, - optRex, optFollow, optLimitChars + optRex, optFollow, optLimitChars, optFit TOptions = set[TOption] TConfirmEnum = enum ceAbort, ceYes, ceAll, ceNo, ceNone @@ -160,7 +162,7 @@ var searchOpt {.threadvar.}: SearchOpt justCount = false sortTime = false - sortTimeOrder = SortOrder.Descending + sortTimeOrder = SortOrder.Ascending useWriteStyled = true oneline = true linesBefore = 0 @@ -325,18 +327,6 @@ proc blockHeader(filename: string, line: int|string, replMode=false) = printBlockLineN($line.`$`.align(alignment) & ":") stdout.write("\n") -proc lineHeader(filename: string, line: int|string, isMatch: bool) = - let lineSym = - if isMatch: $line & ":" - else: $line & " " - if not newLine and optFilenames notin options: - if oneline: - printFile(filename) - printLineN(":" & lineSym, isMatch) - else: - printLineN(lineSym.align(alignment+1), isMatch) - stdout.write(" ") - type Column = tuple # current column info for the cropping (--limit) feature terminal: int file: int @@ -347,11 +337,25 @@ proc newLn(curCol: var Column) = curCol.file = 0 curcol.terminal = 0 +proc lineHeader(filename: string, line: int|string, isMatch: bool, curCol: var Column) = + let lineSym = + if isMatch: $line & ":" + else: $line & " " + if not newLine and optFilenames notin options: + if oneline: + printFile(filename) + printLineN(":" & lineSym, isMatch) + curcol.terminal += filename.len + 1 + lineSym.len + else: + printLineN(lineSym.align(alignment+1), isMatch) + curcol.terminal += lineSym.align(alignment+1).len + stdout.write(" "); curCol.terminal += 1 + proc printMatch(fileName: string, mi: MatchInfo, curCol: var Column) = let sLines = mi.match.splitLines() for i, l in sLines: if i > 0: - lineHeader(filename, mi.lineBeg + i, isMatch = true) + lineHeader(filename, mi.lineBeg + i, isMatch = true, curCol) if curCol.terminal < limitChar: writeColored(l) else: @@ -361,15 +365,15 @@ proc printMatch(fileName: string, mi: MatchInfo, curCol: var Column) = curCol.terminal += mi.match.len curCol.file += mi.match.len +const matchPaddingFromRight = 10 let ellipsis = "..." proc reserveChars(mi: MatchInfo): int = - if optLimitChars notin options: - result = 0 - else: + if optLimitChars in options or optFit in options: let patternChars = afterPattern(mi.match, 0) + 1 - let padding = 3 - result = patternChars + ellipsis.len + padding + result = patternChars + ellipsis.len + matchPaddingFromRight + else: + result = 0 proc printRaw(c: char, curCol: var Column, allowTabs = true) = # print taking into account tabs and optOnlyAscii @@ -459,7 +463,7 @@ proc printSubLinesBefore(filename: string, beforeMatch: string, lineBeg: int, blockHeader(filename, lineBeg, replMode=replMode) for i, l in sLines: let isLastLine = i == sLines.len - 1 - lineHeader(filename, startLine + i, isMatch = isLastLine) + lineHeader(filename, startLine + i, isMatch = isLastLine, curCol) if isLastLine: limitChar -= reserveChars l.printCropped(curCol, fromLeft = isLastLine) if isLastLine: limitChar += reserveChars @@ -470,6 +474,13 @@ proc getSubLinesAfter(buf: string, mi: MatchInfo): string = let last = afterPattern(buf, mi.last+1, 1+linesAfter) result = substr(buf, mi.last+1, last) +proc printOverflow(filename: string, line: int, curCol: var Column) = + if curCol.overflowMatches > 0: + lineHeader(filename, line, isMatch = true, curCol) + printBold("(" & $curCol.overflowMatches & " more matches skipped)") + newLn(curCol) + curCol.overflowMatches = 0 + proc printSubLinesAfter(filename: string, afterMatch: string, matchLineEnd: int, curCol: var Column) = # finish block: print 'linesAfter' lines after match `mi` @@ -480,12 +491,13 @@ proc printSubLinesAfter(filename: string, afterMatch: string, matchLineEnd: int, sLines[0].printCropped(curCol, fromLeft = false) # complete the line after the match itself newLn(curCol) + printOverflow(filename, matchLineEnd, curCol) #let skipLine = # workaround posix line ending at the end of file # if last == s.len-1 and s.len >= 2 and s[^1] == '\l' and s[^2] != '\c': 1 - # else: 0 + # else: 0 TODO: let skipLine = 0 for i in 1 ..< sLines.len - skipLine: - lineHeader(filename, matchLineEnd + i, isMatch = false) + lineHeader(filename, matchLineEnd + i, isMatch = false, curCol) sLines[i].printCropped(curCol, fromLeft = false) newLn(curCol) @@ -502,10 +514,11 @@ proc printBetweenMatches(filename: string, betweenMatches: string, # finish the line of previous Match if sLines.len > 1: newLn(curCol) + printOverflow(filename, lastLineBeg - sLines.len + 1, curCol) for i in 1 ..< sLines.len: let isLastLine = i == sLines.len - 1 lineHeader(filename, lastLineBeg - sLines.len + i + 1, - isMatch = isLastLine) + isMatch = isLastLine, curCol) if isLastLine: limitChar -= reserveChars sLines[i].printCropped(curCol, fromLeft = isLastLine) if isLastLine: limitChar += reserveChars @@ -600,12 +613,6 @@ proc printOutput(filename: string, output: Output, curCol: var Column) = printMatch(filename, output.match, curCol) of BlockEnd: printSubLinesAfter(filename, output.blockEnding, output.firstLine, curCol) - if curCol.overflowMatches > 0: - # overflowed matches are shown for the entire Block after last match - lineHeader(filename, output.firstLine, isMatch = true) - printBold("(" & $curCol.overflowMatches & " more matches skipped)") - stdout.write("\n") - curCol.overflowMatches = 0 if linesAfter + linesBefore >= 2 and not newLine: stdout.write("\n") iterator searchFile(pattern: Pattern; filename: string; @@ -846,7 +853,12 @@ iterator walkDirBasic(dir: string, walkOptC: WalkOptComp[Pattern]): string = dirs.add path if sortTime: # sort by time - collect files before yielding for file in files: - timeFiles.add((getLastModificationTime(file), file)) + var time: Time + try: + time = getLastModificationTime(file) # can fail for broken symlink + except: + discard + timeFiles.add((time, file)) else: # alphanumeric sort, yield immediately after sorting files.sort() for file in files: @@ -1085,10 +1097,10 @@ for kind, key, val in getopt(): nWorkers = parseInt(val) of "ext": walkOpt.extensions.add val.split('|') of "noext", "no-ext": walkOpt.skipExtensions.add val.split('|') - of "excludedir", "exclude-dir": walkOpt.excludeDir.add val - of "includedir", "include-dir": walkOpt.includeDir.add val - of "includefile", "include-file": walkOpt.includeFile.add val - of "excludefile", "exclude-file": walkOpt.excludeFile.add val + of "excludedir", "exclude-dir", "ed": walkOpt.excludeDir.add val + of "includedir", "include-dir", "id": walkOpt.includeDir.add val + of "includefile", "include-file", "if": walkOpt.includeFile.add val + of "excludefile", "exclude-file", "ef": walkOpt.excludeFile.add val of "match": searchOpt.checkMatch = val of "nomatch", "notmatch", "not-match", "no-match": searchOpt.checkNoMatch = val @@ -1143,6 +1155,9 @@ for kind, key, val in getopt(): incl(options, optLimitChars) if val != "": limitChar = parseInt(val) + of "fit": + incl(options, optFit) + limitChar = terminalWidth() of "onlyascii", "only-ascii", "o": optOnlyAscii = true of "verbose": incl(options, optVerbose) of "filenames": incl(options, optFilenames) @@ -1155,6 +1170,7 @@ checkOptions({optFind, optReplace}, "find", "replace") checkOptions({optPeg, optRegex}, "peg", "re") checkOptions({optIgnoreCase, optIgnoreStyle}, "ignore_case", "ignore_style") checkOptions({optFilenames, optReplace}, "filenames", "replace") +checkOptions({optFit, optLimitChars}, "fit", "limit") linesBefore = max(linesBefore, linesContext) linesAfter = max(linesAfter, linesContext) From 2fb41ca72dccfcbac9bbc5f04e8cbaa8784207eb Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Fri, 16 Oct 2020 22:59:55 +0300 Subject: [PATCH 11/20] fix groups in --replace --- tools/nimgrep.nim | 49 ++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index fa6d3467a3dde..5dc56fdec3b57 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -59,7 +59,7 @@ Options: --context:N, -c:N print N lines of leading context before every match and N lines of trailing context after it --sortTime order files by the last modification time - - -s[:asc|desc] - ascending (default: recent files last) or descending + -s[:asc|desc] - ascending (default: recent files go last) or descending --group, -g group matches by file --newLine, -l display every matching line starting from a new line --limit[:N], -m[:N] limit max width of lines from files by N characters (80) @@ -153,6 +153,8 @@ type pattern: Pat checkMatch: Pat checkNoMatch: Pat + SinglePattern[PAT] = tuple # compile single pattern for replacef + pattern: PAT var paths: seq[string] = @[] @@ -886,7 +888,8 @@ iterator walkRec(paths: seq[string]): (string, string) = else: yield ("Error: no such file or directory: ", path) -proc replaceMatches(filename: string, buffer: string, fileResult: FileResult) = +proc replaceMatches(pattern: Pattern; filename: string, buffer: string, + fileResult: FileResult) = var newBuf = newStringOfCap(buffer.len) var changed = false @@ -894,9 +897,8 @@ proc replaceMatches(filename: string, buffer: string, fileResult: FileResult) = var i = 0 for output in fileResult: if output.kind in {BlockFirstMatch, BlockNextMatch}: - #let r = replace(curMi.match, pattern, replacement % matches) #TODO let curMi = output.match - let r = replace(curMi.match, searchOpt.pattern, replacement) + let r = replacef(curMi.match, pattern, replacement) if replace1match(filename, buffer, curMi, i, r, newBuf, lineRepl): changed = true i = curMi.last + 1 @@ -910,7 +912,8 @@ proc replaceMatches(filename: string, buffer: string, fileResult: FileResult) = printError "cannot open file for overwriting: " & filename inc(gVar.errors) -template processFileResult(filename: string, fileResult: untyped) = +template processFileResult(pattern: Pattern; filename: string, + fileResult: untyped) = var filenameShown = false template showFilename = if not filenameShown: @@ -940,7 +943,7 @@ template processFileResult(filename: string, fileResult: untyped) = matches.add(output) of FileContents: buffer = output.buffer if matches.len > 0: - replaceMatches(filename, buffer, matches) + replaceMatches(pattern, filename, buffer, matches) proc run1Thread() = declareCompiledPatterns(searchOptC, SearchOptComp): @@ -952,7 +955,7 @@ proc run1Thread() = inc(gVar.errors) printError (err & filename) continue - processFileResult(filename, + processFileResult(searchOptC.pattern, filename, processFile(searchOptC, filename, yieldContents=optReplace in options)) @@ -1020,21 +1023,23 @@ proc runMultiThread() = createThread(workers[n], worker, searchOpt) var producerThread: Thread[(seq[string], WalkOpt)] createThread(producerThread, pathProducer, (paths, walkOpt)) - template add1fileResult(fileNo: int, fname: string, fResult: FileResult) = - storage[fileNo] = (fname, fResult) - while storage.haskey(firstUnprocessedFile): - let fileResult = storage[firstUnprocessedFile][1] - let filename = storage[firstUnprocessedFile][0] - processFileResult(filename, fileResult) - storage.del(firstUnprocessedFile) - firstUnprocessedFile += 1 - var totalFiles = -1 # will be known when pathProducer finishes - while totalFiles == -1 or firstUnprocessedFile < totalFiles: - let msg = resultsChan.recv() - if msg.finished: - totalFiles = msg.fileNo - else: - add1fileResult(msg.fileNo, msg.filename, msg.fileResult) + declareCompiledPatterns(pat, SinglePattern): + compile1Pattern(searchOpt.pattern, pat.pattern) + template add1fileResult(fileNo: int, fname: string, fResult: FileResult) = + storage[fileNo] = (fname, fResult) + while storage.haskey(firstUnprocessedFile): + let fileResult = storage[firstUnprocessedFile][1] + let filename = storage[firstUnprocessedFile][0] + processFileResult(pat.pattern, filename, fileResult) + storage.del(firstUnprocessedFile) + firstUnprocessedFile += 1 + var totalFiles = -1 # will be known when pathProducer finishes + while totalFiles == -1 or firstUnprocessedFile < totalFiles: + let msg = resultsChan.recv() + if msg.finished: + totalFiles = msg.fileNo + else: + add1fileResult(msg.fileNo, msg.filename, msg.fileResult) proc reportError(msg: string) = printError "Error: " & msg From 8478753a25169ba31acf776cbdf72a52b4abd7d2 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sat, 17 Oct 2020 15:51:03 +0300 Subject: [PATCH 12/20] fix flushing, --replace, improve --count --- tools/nimgrep.nim | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 5dc56fdec3b57..2f7e301968f3b 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -103,7 +103,7 @@ type TOption = enum optFind, optReplace, optPeg, optRegex, optRecursive, optConfirm, optStdin, optWord, optIgnoreCase, optIgnoreStyle, optVerbose, optFilenames, - optRex, optFollow, optLimitChars, optFit + optRex, optFollow, optCount, optLimitChars, optFit TOptions = set[TOption] TConfirmEnum = enum ceAbort, ceYes, ceAll, ceNo, ceNone @@ -162,7 +162,6 @@ var options: TOptions = {optRegex} walkOpt {.threadvar.}: WalkOpt searchOpt {.threadvar.}: SearchOpt - justCount = false sortTime = false sortTimeOrder = SortOrder.Ascending useWriteStyled = true @@ -171,7 +170,7 @@ var linesAfter = 0 linesContext = 0 newLine = false - gVar = (matches: 0, errors: 0, reallyReplace: false) + gVar = (matches: 0, errors: 0, reallyReplace: true) # gVar - variables that can change during search/replace nWorkers = 0 # run in single thread by default searchRequestsChan: Channel[Trequest] @@ -474,7 +473,11 @@ proc printSubLinesBefore(filename: string, beforeMatch: string, lineBeg: int, proc getSubLinesAfter(buf: string, mi: MatchInfo): string = let last = afterPattern(buf, mi.last+1, 1+linesAfter) - result = substr(buf, mi.last+1, last) + let skipByte = # workaround posix: suppress extra line at the end of file + if (last == buf.len-1 and buf.len >= 2 and + buf[^1] == '\l' and buf[^2] != '\c'): 1 + else: 0 + result = substr(buf, mi.last+1, last - skipByte) proc printOverflow(filename: string, line: int, curCol: var Column) = if curCol.overflowMatches > 0: @@ -494,11 +497,7 @@ proc printSubLinesAfter(filename: string, afterMatch: string, matchLineEnd: int, # complete the line after the match itself newLn(curCol) printOverflow(filename, matchLineEnd, curCol) - #let skipLine = # workaround posix line ending at the end of file - # if last == s.len-1 and s.len >= 2 and s[^1] == '\l' and s[^2] != '\c': 1 - # else: 0 TODO: - let skipLine = 0 - for i in 1 ..< sLines.len - skipLine: + for i in 1 ..< sLines.len: lineHeader(filename, matchLineEnd + i, isMatch = false, curCol) sLines[i].printCropped(curCol, fromLeft = false) newLn(curCol) @@ -552,6 +551,7 @@ proc printReplacement(filename: string, buf: string, mi: MatchInfo, printMatch(fileName, miFixLines, curCol) printSubLinesAfter(fileName, getSubLinesAfter(buf, miFixLines), miFixLines.lineEnd, curCol) + if linesAfter + linesBefore >= 2 and not newLine: stdout.write("\n") stdout.flushFile() proc replace1match(filename: string, buf: string, mi: MatchInfo, i: int, @@ -608,14 +608,14 @@ proc printOutput(filename: string, output: Output, curCol: var Column) = printSubLinesBefore(filename, output.pre, output.match.lineBeg, curCol, reserveChars(output.match)) printMatch(filename, output.match, curCol) - #flush: TODO of BlockNextMatch: printBetweenMatches(filename, output.pre, output.match.lineBeg, curCol, reserveChars(output.match)) printMatch(filename, output.match, curCol) of BlockEnd: printSubLinesAfter(filename, output.blockEnding, output.firstLine, curCol) - if linesAfter + linesBefore >= 2 and not newLine: stdout.write("\n") + if linesAfter + linesBefore >= 2 and not newLine and + optFilenames notin options: stdout.write("\n") iterator searchFile(pattern: Pattern; filename: string; buffer: string): Output = @@ -784,14 +784,14 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, var cnt = 0 for output in searchFile(searchOptC.pattern, filename, buffer): found = true - if not justCount: + if optCount notin options: yield output else: if output.kind in {BlockFirstMatch, BlockNextMatch}: inc(cnt) - if justCount and cnt > 0: + if optCount in options and cnt > 0: yield Output(kind: JustCount, matches: cnt) - if yieldContents and found and not justCount: + if yieldContents and found and optCount notin options: yield Output(kind: FileContents, buffer: buffer) proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool = @@ -925,13 +925,18 @@ template processFileResult(pattern: Pattern; filename: string, showFilename if optReplace notin options: var curCol: Column + var toFlush: bool for output in fileResult: updateCounters(output) + toFlush = true if output.kind notin {Rejected, OpenError, JustCount} and not oneline: showFilename if output.kind == JustCount and oneline: printFile(filename & ":") printOutput(filename, output, curCol) + if nWorkers == 0 and output.kind in {BlockFirstMatch, BlockNextMatch}: + stdout.flushFile() # flush immediately in single thread mode + if toFlush: stdout.flushFile() else: var buffer = "" var matches: FileResult @@ -1116,7 +1121,7 @@ for kind, key, val in getopt(): of "only": searchOpt.checkBin = biOnly else: reportError("unknown value for --bin") of "text", "t": searchOpt.checkBin = biNo - of "count": justCount = true + of "count": incl(options, optCount) of "sorttime", "sort-time", "s": sortTime = true case normalize(val) @@ -1172,6 +1177,7 @@ for kind, key, val in getopt(): of cmdEnd: assert(false) # cannot happen checkOptions({optFind, optReplace}, "find", "replace") +checkOptions({optCount, optReplace}, "count", "replace") checkOptions({optPeg, optRegex}, "peg", "re") checkOptions({optIgnoreCase, optIgnoreStyle}, "ignore_case", "ignore_style") checkOptions({optFilenames, optReplace}, "filenames", "replace") From 49001dc44d4f2f3573880935827551667d7c695e Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sat, 17 Oct 2020 18:10:01 +0300 Subject: [PATCH 13/20] use "." as the default directory, not full path --- tools/nimgrep.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 2f7e301968f3b..b00072c6839fa 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -1196,7 +1196,7 @@ if searchOpt.pattern.len == 0: reportError("empty pattern was given") else: if paths.len == 0: - paths.add(os.getCurrentDir()) + paths.add(".") if nWorkers == 0: run1Thread() else: From bffa96b4a93b48a76a897356c4a9f8b8c37b736c Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sat, 17 Oct 2020 18:46:06 +0300 Subject: [PATCH 14/20] fix --fit for Windows --- tools/nimgrep.nim | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index b00072c6839fa..622eb048e30a2 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -155,6 +155,10 @@ type checkNoMatch: Pat SinglePattern[PAT] = tuple # compile single pattern for replacef pattern: PAT + Column = tuple # current column info for the cropping (--limit) feature + terminal: int + file: int + overflowMatches: int var paths: seq[string] = @[] @@ -328,17 +332,13 @@ proc blockHeader(filename: string, line: int|string, replMode=false) = printBlockLineN($line.`$`.align(alignment) & ":") stdout.write("\n") -type Column = tuple # current column info for the cropping (--limit) feature - terminal: int - file: int - overflowMatches: int - proc newLn(curCol: var Column) = stdout.write("\n") curCol.file = 0 curcol.terminal = 0 -proc lineHeader(filename: string, line: int|string, isMatch: bool, curCol: var Column) = +proc lineHeader(filename: string, line: int|string, isMatch: bool, + curCol: var Column) = let lineSym = if isMatch: $line & ":" else: $line & " " @@ -457,7 +457,8 @@ proc getSubLinesBefore(buf: string, curMi: MatchInfo): string = result = substr(buf, first, curMi.first-1) proc printSubLinesBefore(filename: string, beforeMatch: string, lineBeg: int, - curCol: var Column, reserveChars: int, replMode=false) = + curCol: var Column, reserveChars: int, + replMode=false) = # start block: print 'linesBefore' lines before current match `curMi` let sLines = splitLines(beforeMatch) let startLine = lineBeg - sLines.len + 1 @@ -1168,6 +1169,8 @@ for kind, key, val in getopt(): of "fit": incl(options, optFit) limitChar = terminalWidth() + when defined(windows): # Windows cmd&powershell add an empty line when + limitChar -= 1 # printing '\n' right after the last column of "onlyascii", "only-ascii", "o": optOnlyAscii = true of "verbose": incl(options, optVerbose) of "filenames": incl(options, optFilenames) From 4f466bb0d5e6ab2e07a97ca15088d64fd0be8002 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sun, 18 Oct 2020 19:56:47 +0300 Subject: [PATCH 15/20] force target to C for macosx --- testament/categories.nim | 2 ++ 1 file changed, 2 insertions(+) diff --git a/testament/categories.nim b/testament/categories.nim index 07d35f6fb04c1..a95a76357b622 100644 --- a/testament/categories.nim +++ b/testament/categories.nim @@ -262,6 +262,8 @@ proc debuggerTests(r: var TResults, cat: Category, options: string) = if fileExists("tools/nimgrep.nim"): var t = makeTest("tools/nimgrep", options & " --debugger:on", cat) t.spec.action = actionCompile + # force target to C because of MacOS 10.15 clang++ bug (see #15612) + t.spec.targets = { targetC } testSpec r, t # ------------------------- JS tests ------------------------------------------ From b30b8cad9a8145b7d09ff547450c8652dfb74b1f Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Wed, 21 Oct 2020 23:16:59 +0300 Subject: [PATCH 16/20] validate non-negative int input for options #15318 --- tools/nimgrep.nim | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 622eb048e30a2..e8127ccc2ec4f 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -1065,6 +1065,15 @@ proc checkOptions(subset: TOptions, a, b: string) = if subset <= options: quit("cannot specify both '$#' and '$#'" % [a, b]) +proc parseNonNegative(str: string, key: string): int = + try: + result = parseInt(str) + except ValueError: + reportError("Option " & key & " requires an integer but '" & + str & "' was given") + if result < 0: + reportError("A positive integer is expected for option " & key) + when defined(posix): useWriteStyled = terminal.isatty(stdout) # that should be before option processing to allow override of useWriteStyled @@ -1105,7 +1114,7 @@ for kind, key, val in getopt(): if val == "": nWorkers = countProcessors() else: - nWorkers = parseInt(val) + nWorkers = parseNonNegative(val, key) of "ext": walkOpt.extensions.add val.split('|') of "noext", "no-ext": walkOpt.skipExtensions.add val.split('|') of "excludedir", "exclude-dir", "ed": walkOpt.excludeDir.add val @@ -1142,30 +1151,20 @@ for kind, key, val in getopt(): if colortheme notin ["simple", "bnw", "ack", "gnu"]: reportError("unknown colortheme '" & val & "'") of "beforecontext", "before-context", "b": - try: - linesBefore = parseInt(val) - except ValueError: - reportError("option " & key & " requires an integer but '" & - val & "' was given") + linesBefore = parseNonNegative(val, key) of "aftercontext", "after-context", "a": - try: - linesAfter = parseInt(val) - except ValueError: - reportError("option " & key & " requires an integer but '" & - val & "' was given") + linesAfter = parseNonNegative(val, key) of "context", "c": - try: - linesContext = parseInt(val) - except ValueError: - reportError("option --context requires an integer but '" & - val & "' was given") + linesContext = parseNonNegative(val, key) of "newline", "l": newLine = true of "oneline": oneline = true of "group", "g": oneline = false of "limit", "m": incl(options, optLimitChars) - if val != "": - limitChar = parseInt(val) + if val == "": + limitChar = 80 + else: + limitChar = parseNonNegative(val, key) of "fit": incl(options, optFit) limitChar = terminalWidth() From 3ea83b7bf267db8cda253200c77e4e9e7bd5f883 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Thu, 22 Oct 2020 00:56:33 +0300 Subject: [PATCH 17/20] switch nimgrep to using --gc:orc --- tools/nimgrep.nim | 32 ++++++++++++++++---------------- tools/nimgrep.nim.cfg | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index e8127ccc2ec4f..b6ed741ed642c 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -621,7 +621,7 @@ proc printOutput(filename: string, output: Output, curCol: var Column) = iterator searchFile(pattern: Pattern; filename: string; buffer: string): Output = var prevMi, curMi: MatchInfo - curMi.lineEnd = 1 + prevMi.lineEnd = 1 var i = 0 var matches: array[0..re.MaxSubpatterns-1, string] for j in 0..high(matches): matches[j] = "" @@ -634,32 +634,32 @@ iterator searchFile(pattern: Pattern; filename: string; firstLine: prevMi.lineEnd) break - let lineBeg = curMi.lineEnd + countLineBreaks(buffer, i, t.first-1) + let lineBeg = prevMi.lineEnd + countLineBreaks(buffer, i, t.first-1) curMi = (first: t.first, last: t.last, lineBeg: lineBeg, lineEnd: lineBeg + countLineBreaks(buffer, t.first, t.last), match: buffer.substr(t.first, t.last)) if prevMi.lineBeg == 0: # no prev. match, so no prev. block to finalize - yield Output(kind: BlockFirstMatch, - pre: getSubLinesBefore(buffer, curMi), - match: curMi) + let pre = getSubLinesBefore(buffer, curMi) + prevMi = curMi + yield Output(kind: BlockFirstMatch, pre: pre, match: move(curMi)) else: let nLinesBetween = curMi.lineBeg - prevMi.lineEnd if nLinesBetween <= linesAfter + linesBefore + 1: # print as 1 block - yield Output(kind: BlockNextMatch, - pre: getSubLinesBetween(buffer, prevMi, curMi), - match: curMi) + let pre = getSubLinesBetween(buffer, prevMi, curMi) + prevMi = curMi + yield Output(kind: BlockNextMatch, pre: pre, match: move(curMi)) else: # finalize previous block and then print next block - yield Output(kind: BlockEnd, - blockEnding: getSubLinesAfter(buffer, prevMi), + let after = getSubLinesAfter(buffer, prevMi) + yield Output(kind: BlockEnd, blockEnding: after, firstLine: prevMi.lineEnd) + let pre = getSubLinesBefore(buffer, curMi) + prevMi = curMi yield Output(kind: BlockFirstMatch, - pre: getSubLinesBefore(buffer, curMi), - match: curMi) - + pre: pre, + match: move(curMi)) i = t.last+1 - prevMi = curMi func detectBin(buffer: string): bool = for i in 0 ..< min(1024, buffer.len): @@ -779,7 +779,7 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, reason = "contains a forbidden match" if reject: - yield Output(kind: Rejected, reason: reason) + yield Output(kind: Rejected, reason: move(reason)) else: var found = false var cnt = 0 @@ -793,7 +793,7 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, if optCount in options and cnt > 0: yield Output(kind: JustCount, matches: cnt) if yieldContents and found and optCount notin options: - yield Output(kind: FileContents, buffer: buffer) + yield Output(kind: FileContents, buffer: move(buffer)) proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool = let filename = path.lastPathPart diff --git a/tools/nimgrep.nim.cfg b/tools/nimgrep.nim.cfg index e08ed849ad0e8..2609eb81f1bce 100644 --- a/tools/nimgrep.nim.cfg +++ b/tools/nimgrep.nim.cfg @@ -1,2 +1,2 @@ # using markandsweep because of bug https://github.com/nim-lang/Nim/issues/14138 ---threads:on --gc:markandsweep +--threads:on --gc:orc From 84c66da58faa3c12cb442397e0b2d839c6ecd193 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Mon, 2 Nov 2020 21:29:14 +0300 Subject: [PATCH 18/20] address review: implement cropping in matches,... --- testament/categories.nim | 3 +- tools/nimgrep.nim | 442 ++++++++++++++++++++++++--------------- tools/nimgrep.nim.cfg | 4 +- 3 files changed, 283 insertions(+), 166 deletions(-) diff --git a/testament/categories.nim b/testament/categories.nim index a95a76357b622..fa9391055b270 100644 --- a/testament/categories.nim +++ b/testament/categories.nim @@ -262,7 +262,8 @@ proc debuggerTests(r: var TResults, cat: Category, options: string) = if fileExists("tools/nimgrep.nim"): var t = makeTest("tools/nimgrep", options & " --debugger:on", cat) t.spec.action = actionCompile - # force target to C because of MacOS 10.15 clang++ bug (see #15612) + # force target to C because of MacOS 10.15 SDK headers bug + # https://github.com/nim-lang/Nim/pull/15612#issuecomment-712471879 t.spec.targets = { targetC } testSpec r, t diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index b6ed741ed642c..24206993fc0de 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -11,7 +11,7 @@ import os, strutils, parseopt, pegs, re, terminal, osproc, tables, algorithm, times const - Version = "1.6" + Version = "1.6.0" Usage = "nimgrep - Nim Grep Utility Version " & Version & """ (c) 2012 Andreas Rumpf @@ -33,20 +33,20 @@ Options: --word, -w the match should have word boundaries (buggy for pegs!) --ignoreCase, -i be case insensitive --ignoreStyle, -y be style insensitive - --nWorkers:N, -n:N speed up search by N additional workers (threads) + --threads:N, -j:N speed up search by N additional workers (threads) --ext:EX1|EX2|... only search the files with the given extension(s), empty one ("--ext") means files with missing extension --noExt:EX1|... exclude files having given extension(s), use empty one to skip files with no extension (like some binary files are) - --includeFile:PAT search only files whose names match the given PATttern + --includeFile:PAT search only files whose names match the given PATtern --excludeFile:PAT skip files whose names match the given pattern PAT --includeDir:PAT search only files with full directory name matching PAT --excludeDir:PAT skip directories whose names match the given pattern PAT --if,--ef,--id,--ed abbreviations of 4 options above --match:PAT select files containing a (not displayed) match of PAT --noMatch:PAT select files not containing any match of PAT - --bin:yes|no|only process binary files? (detected by \0 in first 1K bytes) - --text, -t process only text files, the same as --bin:no + --bin:on|off|only process binary files? (detected by \0 in first 1K bytes) + --text, -t process only text files, the same as --bin:off --count only print counts of matches for files that matched --nocolor output will be given without any colours --color[:always] force color even if output is redirected @@ -62,10 +62,11 @@ Options: -s[:asc|desc] - ascending (default: recent files go last) or descending --group, -g group matches by file --newLine, -l display every matching line starting from a new line - --limit[:N], -m[:N] limit max width of lines from files by N characters (80) - --fit calculate --limit from terminal width for every line - --onlyAscii, -o use only printable ASCII Latin characters 0x20-0x7E - (substitutions: 0 -> @, 1-0x1F -> A-_, 0x7F-0xFF -> !) + --cols[:N] limit max width of lines from files by N characters (off) + --cols:auto, -% calculate columns from terminal width for every line + --onlyAscii, -@ use only printable ASCII Latin characters 0x20-0x7E + substitutions: 0 -> ^@, 1 -> ^A, ... 0x1F -> ^_, + 0x7F -> '7F, ..., 0xFF -> 'FF --verbose be verbose: list every processed file --filenames find the pattern in the filenames, not in the contents of the file @@ -103,12 +104,12 @@ type TOption = enum optFind, optReplace, optPeg, optRegex, optRecursive, optConfirm, optStdin, optWord, optIgnoreCase, optIgnoreStyle, optVerbose, optFilenames, - optRex, optFollow, optCount, optLimitChars, optFit + optRex, optFollow, optCount, optLimitChars TOptions = set[TOption] TConfirmEnum = enum ceAbort, ceYes, ceAll, ceNo, ceNone Bin = enum - biYes, biOnly, biNo + biOn, biOnly, biOff Pattern = Regex | Peg MatchInfo = tuple[first: int, last: int; lineBeg: int, lineEnd: int, match: string] @@ -156,8 +157,8 @@ type SinglePattern[PAT] = tuple # compile single pattern for replacef pattern: PAT Column = tuple # current column info for the cropping (--limit) feature - terminal: int - file: int + terminal: int # column in terminal emulator + file: int # column in file (for correct Tab processing) overflowMatches: int var @@ -169,7 +170,8 @@ var sortTime = false sortTimeOrder = SortOrder.Ascending useWriteStyled = true - oneline = true + oneline = true # turned off by --group + expandTabs = true # Tabs are expanded in oneline mode linesBefore = 0 linesAfter = 0 linesContext = 0 @@ -180,10 +182,11 @@ var searchRequestsChan: Channel[Trequest] resultsChan: Channel[Tresult] colorTheme: string = "simple" - limitChar = high(int) # don't limit line width by default - optOnlyAscii: bool + limitCharUsr = high(int) # don't limit line width by default + termWidth = 80 + optOnlyAscii = false -searchOpt.checkBin = biYes +searchOpt.checkBin = biOn proc ask(msg: string): string = stdout.write(msg) @@ -273,7 +276,8 @@ proc printBold(s: string) = proc printSpecial(s: string) = whenColors: case colorTheme - of "simple", "bnw": stdout.styledWrite(styleBright, s) + of "simple", "bnw": + stdout.styledWrite(if s == " ": styleReverse else: styleBright, s) of "ack", "gnu": stdout.styledWrite(styleReverse, fgBlue, bgDefault, s) proc printError(s: string) = @@ -283,8 +287,6 @@ proc printError(s: string) = of "ack", "gnu": stdout.styledWriteLine(styleReverse, fgRed, bgDefault, s) stdout.flushFile() -const alignment = 6 - proc printLineN(s: string, isMatch: bool) = whenColors: case colorTheme @@ -317,10 +319,19 @@ proc writeColored(s: string) = of "ack": stdout.styledWrite(styleReverse, fgYellow, bgDefault, s) of "gnu": stdout.styledWrite(fgRed, s) +proc printContents(s: string, isMatch: bool) = + if isMatch: + writeColored(s) + else: + stdout.write(s) + proc writeArrow(s: string) = whenColors: stdout.styledWrite(styleReverse, s) +const alignment = 6 # selected so that file contents start at 8, i.e. + # Tabs expand correctly without additional care + proc blockHeader(filename: string, line: int|string, replMode=false) = if replMode: writeArrow(" ->\n") @@ -335,7 +346,14 @@ proc blockHeader(filename: string, line: int|string, replMode=false) = proc newLn(curCol: var Column) = stdout.write("\n") curCol.file = 0 - curcol.terminal = 0 + curCol.terminal = 0 + +# We reserve 10+3 chars on the right in --cols mode (optLimitChars). +# If the current match touches this right margin, subLine before it will +# be cropped (even if space is enough for subLine after the match — we +# currently don't have a way to know it since we get it afterwards). +const matchPaddingFromRight = 10 +const ellipsis = "..." proc lineHeader(filename: string, line: int|string, isMatch: bool, curCol: var Column) = @@ -351,67 +369,109 @@ proc lineHeader(filename: string, line: int|string, isMatch: bool, printLineN(lineSym.align(alignment+1), isMatch) curcol.terminal += lineSym.align(alignment+1).len stdout.write(" "); curCol.terminal += 1 - -proc printMatch(fileName: string, mi: MatchInfo, curCol: var Column) = - let sLines = mi.match.splitLines() - for i, l in sLines: - if i > 0: - lineHeader(filename, mi.lineBeg + i, isMatch = true, curCol) - if curCol.terminal < limitChar: - writeColored(l) - else: - curCol.overflowMatches += 1 - if i < sLines.len - 1: + curCol.terminal = curCol.terminal mod termWidth + if optLimitChars in options and + curCol.terminal > limitCharUsr - matchPaddingFromRight - ellipsis.len: newLn(curCol) - curCol.terminal += mi.match.len - curCol.file += mi.match.len - -const matchPaddingFromRight = 10 -let ellipsis = "..." proc reserveChars(mi: MatchInfo): int = - if optLimitChars in options or optFit in options: + if optLimitChars in options: let patternChars = afterPattern(mi.match, 0) + 1 result = patternChars + ellipsis.len + matchPaddingFromRight else: result = 0 -proc printRaw(c: char, curCol: var Column, allowTabs = true) = - # print taking into account tabs and optOnlyAscii - if c == '\t': - if allowTabs: +# Our substitutions of non-printable symbol to ASCII character are similar to +# those of programm 'less'. +const lowestAscii = 0x20 # lowest ASCII Latin printable symbol (@) +const largestAscii = 0x7e +const by2ascii = 2 # number of ASCII chars to represent chars < lowestAscii +const by3ascii = 3 # number of ASCII chars to represent chars > largestAscii + +proc printExpanded(s: string, curCol: var Column, isMatch: bool, + limitChar: int) = + # Print taking into account tabs and optOnlyAscii (and also optLimitChar: + # the proc called from printCropped but we need to check column < limitChar + # also here, since exact cut points are known only after tab expansion). + # With optOnlyAscii non-ascii chars are highlighted even in matches. + # + # use buffer because: + # 1) we need to print non-ascii character inside matches while keeping the + # amount of color escape sequences minimal. + # 2) there is a report that fwrite buffering is slow on MacOS + # https://github.com/nim-lang/Nim/pull/15612#discussion_r510538326 + const bufSize = 8192 # typical for fwrite too + var buffer: string + const normal = 0 + const special = 1 + var lastAdded = normal + template dumpBuf() = + if lastAdded == normal: + printContents(buffer, isMatch) + else: + printSpecial(buffer) + template addBuf(i: int, s: char|string, size: int) = + if lastAdded != i or buffer.len + size > bufSize: + dumpBuf() + buffer.setlen(0) + buffer.add s + lastAdded = i + for c in s: + let charsAllowed = limitChar - curCol.terminal + if charsAllowed <= 0: + break + if lowestAscii <= int(c) and int(c) <= largestAscii: # ASCII latin + addBuf(normal, c, 1) + curCol.file += 1; curCol.terminal += 1 + elif (not optOnlyAscii) and c != '\t': # the same, print raw + addBuf(normal, c, 1) + curCol.file += 1; curCol.terminal += 1 + elif c == '\t': let spaces = 8 - (curCol.file mod 8) + let spacesAllowed = min(spaces, charsAllowed) curCol.file += spaces - curCol.terminal += spaces - if optOnlyAscii: - printSpecial " " - stdout.write " ".repeat(spaces-1) + curCol.terminal += spacesAllowed + if expandTabs: + if optOnlyAscii: # print a nice box for tab + addBuf(special, " ", 1) + addBuf(normal, " ".repeat(spacesAllowed-1), spacesAllowed-1) + else: + addBuf(normal, " ".repeat(spacesAllowed), spacesAllowed) else: - stdout.write " ".repeat(spaces) - else: + addBuf(normal, '\t', 1) + else: # substitute characters that are not ACSII Latin + if int(c) < lowestAscii: + let substitute = char(int(c) + 0x40) # use common "control codes" + addBuf(special, "^" & substitute, by2ascii) + curCol.terminal += by2ascii + else: # int(c) > largestAscii + curCol.terminal += by3ascii + let substitute = '\'' & c.BiggestUInt.toHex(2) + addBuf(special, substitute, by3ascii) curCol.file += 1 - curCol.terminal += 1 - if optOnlyAscii: - printSpecial " " - else: - stdout.write " " - elif not optOnlyAscii or (0x20 <= int(c) and int(c) <= 0x7e): - stdout.write c - curCol.file += 1 - curCol.terminal += 1 - else: # substitute characters that are not ACSII Latin - let substitute = - if int(c) < 0x20: - char(int(c) + 0x40) # use common "control codes" - else: '!' - printSpecial $substitute - curCol.file += 1 - curCol.terminal += 1 - -proc calcTabLen(s: string, chars: int, fromLeft: bool): int = - if chars < 0: - return 0 - var col = 0 + if buffer.len > 0: + dumpBuf() + +template nextCharacter(c: char, file: var int, term: var int) = + if lowestAscii <= int(c) and int(c) <= largestAscii: # ASCII latin + file += 1 + term += 1 + elif (not optOnlyAscii) and c != '\t': # the same, print raw + file += 1 + term += 1 + elif c == '\t': + term += 8 - (file mod 8) + file += 8 - (file mod 8) + elif int(c) < lowestAscii: + file += 1 + term += by2ascii + else: # int(c) > largestAscii: + file += 1 + term += by3ascii + +proc calcTermLen(s: string, firstCol: int, chars: int, fromLeft: bool): int = + # calculate additional length added by Tabs expansion and substitutions + var col = firstCol var first, last: int if fromLeft: first = max(0, s.len - chars) @@ -420,37 +480,78 @@ proc calcTabLen(s: string, chars: int, fromLeft: bool): int = first = 0 last = min(s.len - 1, chars - 1) for c in s[first .. last]: - if c == '\t': - result += 8 - (col mod 8) - 1 - col += 8 - (col mod 8) - -proc printCropped(s: string, curCol: var Column, fromLeft: bool) = - let eL = ellipsis.len - let charsAllowed = limitChar - curCol.terminal - let tabLen = calcTabLen(s, charsAllowed, fromLeft) - if s.len + tabLen <= charsAllowed: - for c in s: - printRaw(c, curCol) - elif charsAllowed <= eL: - if curCol.overflowMatches == 0: + nextCharacter(c, col, result) + +proc printCropped(s: string, curCol: var Column, fromLeft: bool, + limitChar: int, isMatch = false) = + # print line `s`, may be cropped if option --cols was set + const eL = ellipsis.len + if optLimitChars notin options: + if not expandTabs and not optOnlyAscii: # for speed mostly + printContents(s, isMatch) + else: + printExpanded(s, curCol, isMatch, limitChar) + elif optFilenames in options: + printExpanded(s, curCol, isMatch, limitChar - eL) + if curCol.terminal == limitChar - eL: printBold ellipsis curCol.terminal += eL - else: - if fromLeft: - printBold ellipsis - curCol.terminal += 3 - # don't expand tabs when cropped from left - let first = max(0, s.len - (charsAllowed - eL)) - for c in s[first .. s.len - 1]: - printRaw(c, curCol, allowTabs=false) + else: # limit columns, expand Tabs is also forced + var charsAllowed = limitChar - curCol.terminal + if fromLeft and charsAllowed < eL: + charsAllowed = eL + if (not fromLeft) and charsAllowed <= 0: + # already overflown and ellipsis shold be in place + return + let fullLenWithin = calcTermLen(s, curCol.file, charsAllowed, fromLeft) + # additional length from Tabs and special symbols + let addLen = fullLenWithin - min(s.len, charsAllowed) + # determine that the string is guaranteed to fit within `charsAllowed` + let fits = + if s.len > charsAllowed: + false + else: + if isMatch: fullLenWithin <= charsAllowed - eL + else: fullLenWithin <= charsAllowed + if fits: + printExpanded(s, curCol, isMatch, limitChar = high(int)) else: - let last = min(s.len - 1, charsAllowed - eL - 1) - for c in s[0 .. last]: - printRaw(c, curCol, allowTabs=true) - if curCol.terminal >= limitChar - eL: - break - printBold ellipsis - curCol.terminal += 3 + if fromLeft: + printBold ellipsis + curCol.terminal += eL + # find position `pos` where the right side of line will fit charsAllowed + var col = 0 + var term = 0 + var pos = min(s.len, max(0, s.len - (charsAllowed - eL))) + while pos <= s.len - 1: + let c = s[pos] + nextCharacter(c, col, term) + if term >= addLen: + break + inc pos + curCol.file = pos + # TODO don't expand tabs when cropped from the left - difficult, meaningless + printExpanded(s[pos .. s.len - 1], curCol, isMatch, + limitChar = high(int)) + else: + let last = max(-1, min(s.len - 1, charsAllowed - eL - 1)) + printExpanded(s[0 .. last], curCol, isMatch, limitChar-eL) + let numDots = limitChar - curCol.terminal + printBold ".".repeat(numDots) + curCol.terminal = limitChar + +proc printMatch(fileName: string, mi: MatchInfo, curCol: var Column) = + let sLines = mi.match.splitLines() + for i, l in sLines: + if i > 0: + lineHeader(filename, mi.lineBeg + i, isMatch = true, curCol) + let charsAllowed = limitCharUsr - curCol.terminal + if charsAllowed > 0: + printCropped(l, curCol, fromLeft = false, limitCharUsr, isMatch = true) + else: + curCol.overflowMatches += 1 + if i < sLines.len - 1: + newLn(curCol) proc getSubLinesBefore(buf: string, curMi: MatchInfo): string = let first = beforePattern(buf, curMi.first-1, linesBefore+1) @@ -466,9 +567,8 @@ proc printSubLinesBefore(filename: string, beforeMatch: string, lineBeg: int, for i, l in sLines: let isLastLine = i == sLines.len - 1 lineHeader(filename, startLine + i, isMatch = isLastLine, curCol) - if isLastLine: limitChar -= reserveChars - l.printCropped(curCol, fromLeft = isLastLine) - if isLastLine: limitChar += reserveChars + let limit = if isLastLine: limitCharUsr - reserveChars else: limitCharUsr + l.printCropped(curCol, fromLeft = isLastLine, limitChar = limit) if not isLastLine: newLn(curCol) @@ -483,7 +583,7 @@ proc getSubLinesAfter(buf: string, mi: MatchInfo): string = proc printOverflow(filename: string, line: int, curCol: var Column) = if curCol.overflowMatches > 0: lineHeader(filename, line, isMatch = true, curCol) - printBold("(" & $curCol.overflowMatches & " more matches skipped)") + printBold("(" & $curCol.overflowMatches & " matches skipped)") newLn(curCol) curCol.overflowMatches = 0 @@ -494,13 +594,13 @@ proc printSubLinesAfter(filename: string, afterMatch: string, matchLineEnd: int, if sLines.len == 0: # EOF newLn(curCol) else: - sLines[0].printCropped(curCol, fromLeft = false) + sLines[0].printCropped(curCol, fromLeft = false, limitCharUsr) # complete the line after the match itself newLn(curCol) printOverflow(filename, matchLineEnd, curCol) for i in 1 ..< sLines.len: lineHeader(filename, matchLineEnd + i, isMatch = false, curCol) - sLines[i].printCropped(curCol, fromLeft = false) + sLines[i].printCropped(curCol, fromLeft = false, limitCharUsr) newLn(curCol) proc getSubLinesBetween(buf: string, prevMi: MatchInfo, @@ -512,7 +612,7 @@ proc printBetweenMatches(filename: string, betweenMatches: string, curCol: var Column, reserveChars: int) = # continue block: print between `prevMi` and `curMi` let sLines = betweenMatches.splitLines() - sLines[0].printCropped(curCol, fromLeft = false) + sLines[0].printCropped(curCol, fromLeft = false, limitCharUsr) # finish the line of previous Match if sLines.len > 1: newLn(curCol) @@ -521,9 +621,8 @@ proc printBetweenMatches(filename: string, betweenMatches: string, let isLastLine = i == sLines.len - 1 lineHeader(filename, lastLineBeg - sLines.len + i + 1, isMatch = isLastLine, curCol) - if isLastLine: limitChar -= reserveChars - sLines[i].printCropped(curCol, fromLeft = isLastLine) - if isLastLine: limitChar += reserveChars + let limit = if isLastLine: limitCharUsr - reserveChars else: limitCharUsr + sLines[i].printCropped(curCol, fromLeft = isLastLine, limitChar = limit) if not isLastLine: newLn(curCol) @@ -749,6 +848,7 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, yieldContents=false): Output = var buffer: string + var error = false if optFilenames in options: buffer = filename else: @@ -756,44 +856,47 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, buffer = system.readFile(filename) except IOError as e: yield Output(kind: OpenError, msg: "readFile failed") + error = true + + if not error: + var reject = false + var reason: string + if searchOpt.checkBin in {biOff, biOnly}: + let isBin = detectBin(buffer) + if isBin and searchOpt.checkBin == biOff: + reject = true + reason = "binary file" + if (not isBin) and searchOpt.checkBin == biOnly: + reject = true + reason = "text file" + + if not reject: + if searchOpt.checkMatch != "": + reject = not contains(buffer, searchOptC.checkMatch, 0) + reason = "doesn't contain a requested match" + + if not reject: + if searchOpt.checkNoMatch != "": + reject = contains(buffer, searchOptC.checkNoMatch, 0) + reason = "contains a forbidden match" + + if reject: + yield Output(kind: Rejected, reason: move(reason)) + else: + var found = false + var cnt = 0 + for output in searchFile(searchOptC.pattern, filename, buffer): + found = true + if optCount notin options: + yield output + else: + if output.kind in {BlockFirstMatch, BlockNextMatch}: + inc(cnt) + if optCount in options and cnt > 0: + yield Output(kind: JustCount, matches: cnt) + if yieldContents and found and optCount notin options: + yield Output(kind: FileContents, buffer: move(buffer)) - var reject = false - var reason: string - if searchOpt.checkBin in {biNo, biOnly}: - let isBin = detectBin(buffer) - if isBin and searchOpt.checkBin == biNo: - reject = true - reason = "binary file" - if (not isBin) and searchOpt.checkBin == biOnly: - reject = true - reason = "text file" - - if not reject: - if searchOpt.checkMatch != "": - reject = not contains(buffer, searchOptC.checkMatch, 0) - reason = "doesn't contain a requested match" - - if not reject: - if searchOpt.checkNoMatch != "": - reject = contains(buffer, searchOptC.checkNoMatch, 0) - reason = "contains a forbidden match" - - if reject: - yield Output(kind: Rejected, reason: move(reason)) - else: - var found = false - var cnt = 0 - for output in searchFile(searchOptC.pattern, filename, buffer): - found = true - if optCount notin options: - yield output - else: - if output.kind in {BlockFirstMatch, BlockNextMatch}: - inc(cnt) - if optCount in options and cnt > 0: - yield Output(kind: JustCount, matches: cnt) - if yieldContents and found and optCount notin options: - yield Output(kind: FileContents, buffer: move(buffer)) proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool = let filename = path.lastPathPart @@ -1110,7 +1213,7 @@ for kind, key, val in getopt(): of "word", "w": incl(options, optWord) of "ignorecase", "ignore-case", "i": incl(options, optIgnoreCase) of "ignorestyle", "ignore-style", "y": incl(options, optIgnoreStyle) - of "nworkers", "n": + of "threads", "j": if val == "": nWorkers = countProcessors() else: @@ -1126,11 +1229,11 @@ for kind, key, val in getopt(): searchOpt.checkNoMatch = val of "bin": case val - of "no": searchOpt.checkBin = biNo - of "yes": searchOpt.checkBin = biYes + of "on": searchOpt.checkBin = biOn + of "off": searchOpt.checkBin = biOff of "only": searchOpt.checkBin = biOnly else: reportError("unknown value for --bin") - of "text", "t": searchOpt.checkBin = biNo + of "text", "t": searchOpt.checkBin = biOff of "count": incl(options, optCount) of "sorttime", "sort-time", "s": sortTime = true @@ -1156,23 +1259,35 @@ for kind, key, val in getopt(): linesAfter = parseNonNegative(val, key) of "context", "c": linesContext = parseNonNegative(val, key) - of "newline", "l": newLine = true - of "oneline": oneline = true - of "group", "g": oneline = false - of "limit", "m": + of "newline", "l": + newLine = true + # Tabs are aligned automatically for --group, --newLine, --filenames + expandTabs = false + of "group", "g": + oneline = false + expandTabs = false + of "cols", "%": incl(options, optLimitChars) - if val == "": - limitChar = 80 + termWidth = terminalWidth() + if val == "auto" or key == "%": + limitCharUsr = termWidth + when defined(windows): # Windows cmd & powershell add an empty line + limitCharUsr -= 1 # when printing '\n' right after the last column + elif val == "": + limitCharUsr = 80 + else: + limitCharUsr = parseNonNegative(val, key) + of "onlyascii", "only-ascii", "@": + if val == "" or val == "on" or key == "@": + optOnlyAscii = true + elif val == "off": + optOnlyAscii = false else: - limitChar = parseNonNegative(val, key) - of "fit": - incl(options, optFit) - limitChar = terminalWidth() - when defined(windows): # Windows cmd&powershell add an empty line when - limitChar -= 1 # printing '\n' right after the last column - of "onlyascii", "only-ascii", "o": optOnlyAscii = true + printError("unknown value for --onlyAscii option") of "verbose": incl(options, optVerbose) - of "filenames": incl(options, optFilenames) + of "filenames": + incl(options, optFilenames) + expandTabs = false of "help", "h": writeHelp() of "version", "v": writeVersion() else: reportError("unrecognized option '" & key & "'") @@ -1183,7 +1298,6 @@ checkOptions({optCount, optReplace}, "count", "replace") checkOptions({optPeg, optRegex}, "peg", "re") checkOptions({optIgnoreCase, optIgnoreStyle}, "ignore_case", "ignore_style") checkOptions({optFilenames, optReplace}, "filenames", "replace") -checkOptions({optFit, optLimitChars}, "fit", "limit") linesBefore = max(linesBefore, linesContext) linesAfter = max(linesAfter, linesContext) diff --git a/tools/nimgrep.nim.cfg b/tools/nimgrep.nim.cfg index 2609eb81f1bce..64d3edc7ae791 100644 --- a/tools/nimgrep.nim.cfg +++ b/tools/nimgrep.nim.cfg @@ -1,2 +1,4 @@ -# using markandsweep because of bug https://github.com/nim-lang/Nim/issues/14138 +# don't use --gc:refc because of bug +# https://github.com/nim-lang/Nim/issues/14138 . +# --gc:orc and --gc:markandsweep work well. --threads:on --gc:orc From 2c691d02ee7f027250dc066633d988ad76db69b6 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sat, 7 Nov 2020 17:00:12 +0300 Subject: [PATCH 19/20] implement stdin/pipe & revise --help --- tools/nimgrep.nim | 227 +++++++++++++++++++++++++++++++--------------- 1 file changed, 152 insertions(+), 75 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 24206993fc0de..8cc97dd668ac4 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -12,64 +12,106 @@ import const Version = "1.6.0" - Usage = "nimgrep - Nim Grep Utility Version " & Version & """ + Usage = "nimgrep - Nim Grep Searching and Replacement Utility Version " & + Version & """ + + (c) 2012-2020 Andreas Rumpf - (c) 2012 Andreas Rumpf Usage: - nimgrep [options] pattern [replacement] (file/directory)* +* To search: + nimgrep [options] PATTERN [(FILE/DIRECTORY)*/-] +* To replace: + nimgrep [options] PATTERN --replace REPLACEMENT (FILE/DIRECTORY)*/- +* To list file names: + nimgrep [options] --filenames [PATTERN] [(FILE/DIRECTORY)*] + +Positional arguments, from left to right: +* PATERN is either Regex (default) or Peg if --peg is specified. + PATTERN and REPLACEMENT should be skipped when --stdin is specified. +* REPLACEMENT supports $1, $# notations for captured groups in PATTERN. + Note: --replace mode DOES NOT ask confirmation unless --confirm is specified! +* Final arguments are a list of paths (FILE/DIRECTORY) or a standalone + minus '-' (pipe) or not specified (empty). Note for the empty case: when + no FILE/DIRECTORY/- is specified nimgrep DOES NOT read the pipe, but + searches files in the current dir instead! + - read buffer once from stdin: pipe or terminal input; + in --replace mode the result is directed to stdout. + (empty) current directory '.' is assumed + For any given DIRECTORY nimgrep searches only its immediate files without + traversing sub-directories unless --recursive is specified. + In replacement mode all 3 positional arguments are required to avoid damage. + Options: - --find, -f find the pattern (default) - --replace, -! replace the pattern - --peg pattern is a peg - --re pattern is a regular expression (default) +* Mode of operation: + --find, -f find the PATTERN (default) + --replace, -! replace the PATTERN to REPLACEMENT, rewriting the files + --confirm confirm each occurrence/replacement; there is a chance + to abort any time without touching the file + --filenames just list filenames. Provide a PATTERN to find it in + the filenames (not in the contents of a file) or run + with empty pattern to just list all files: + nimgrep --filenames # In current directory + nimgrep --filenames "" DIRECTORY # Note empty pattern "" + +* Interprete patterns: + --peg PATTERN and PAT are Peg + --re PATTERN and PAT are regular expressions (default) --rex, -x use the "extended" syntax for the regular expression so that whitespace is not significant + --word, -w matches should have word boundaries (buggy for pegs!) + --ignoreCase, -i be case insensitive in PATTERN and PAT + --ignoreStyle, -y be style insensitive in PATTERN and PAT + NOTE: PATERN and patterns PAT (see below in other options) are all either + Regex or Peg simultaneously and options --rex, --word, --ignoreCase, + --ignoreStyle are applied to all of them. + +* File system walk: --recursive, -r process directories recursively --follow follow all symlinks when processing recursively - --confirm confirm each occurrence/replacement; there is a chance - to abort any time without touching the file - --stdin read pattern from stdin (to avoid the shell's confusing - quoting rules) - --word, -w the match should have word boundaries (buggy for pegs!) - --ignoreCase, -i be case insensitive - --ignoreStyle, -y be style insensitive - --threads:N, -j:N speed up search by N additional workers (threads) --ext:EX1|EX2|... only search the files with the given extension(s), empty one ("--ext") means files with missing extension --noExt:EX1|... exclude files having given extension(s), use empty one to skip files with no extension (like some binary files are) - --includeFile:PAT search only files whose names match the given PATtern - --excludeFile:PAT skip files whose names match the given pattern PAT - --includeDir:PAT search only files with full directory name matching PAT - --excludeDir:PAT skip directories whose names match the given pattern PAT + --includeFile:PAT search only files whose names contain pattern PAT + --excludeFile:PAT skip files whose names contain pattern PAT + --includeDir:PAT search only files with whole directory path containing PAT + --excludeDir:PAT skip directories whose name (not path) contain pattern PAT --if,--ef,--id,--ed abbreviations of 4 options above + --sortTime order files by the last modification time (default: off): + -s[:asc|desc] ascending (recent files go last) or descending + +* Filter file content: --match:PAT select files containing a (not displayed) match of PAT --noMatch:PAT select files not containing any match of PAT --bin:on|off|only process binary files? (detected by \0 in first 1K bytes) + (default: on - binary and text files treated the same way) --text, -t process only text files, the same as --bin:off - --count only print counts of matches for files that matched + +* Represent results: --nocolor output will be given without any colours - --color[:always] force color even if output is redirected + --color[:on] force color even if output is redirected (default: auto) --colorTheme:THEME select color THEME from 'simple' (default), 'bnw' (black and white) ,'ack', or 'gnu' (GNU grep) + --count only print counts of matches for files that matched + --context:N, -c:N print N lines of leading context before every match and + N lines of trailing context after it (default N: 0) --afterContext:N, -a:N print N lines of trailing context after every match --beforeContext:N, -b:N print N lines of leading context before every match - --context:N, -c:N print N lines of leading context before every match and - N lines of trailing context after it - --sortTime order files by the last modification time - - -s[:asc|desc] - ascending (default: recent files go last) or descending --group, -g group matches by file --newLine, -l display every matching line starting from a new line - --cols[:N] limit max width of lines from files by N characters (off) + --cols[:N] limit max displayed columns/width of output lines from + files by N characters, cropping overflows (default: off) --cols:auto, -% calculate columns from terminal width for every line --onlyAscii, -@ use only printable ASCII Latin characters 0x20-0x7E substitutions: 0 -> ^@, 1 -> ^A, ... 0x1F -> ^_, 0x7F -> '7F, ..., 0xFF -> 'FF +* Miscellaneous: + --threads:N, -j:N speed up search by N additional workers (default N: 0) + --stdin read PATTERN from stdin (to avoid the shell's confusing + quoting rules) and, if --replace given, REPLACEMENT --verbose be verbose: list every processed file - --filenames find the pattern in the filenames, not in the contents - of the file --help, -h shows this help --version, -v shows the version """ @@ -104,7 +146,7 @@ type TOption = enum optFind, optReplace, optPeg, optRegex, optRecursive, optConfirm, optStdin, optWord, optIgnoreCase, optIgnoreStyle, optVerbose, optFilenames, - optRex, optFollow, optCount, optLimitChars + optRex, optFollow, optCount, optLimitChars, optPipe TOptions = set[TOption] TConfirmEnum = enum ceAbort, ceYes, ceAll, ceNo, ceNone @@ -115,20 +157,23 @@ type lineBeg: int, lineEnd: int, match: string] outputKind = enum OpenError, Rejected, JustCount, - BlockFirstMatch, BlockNextMatch, BlockEnd, FileContents + BlockFirstMatch, BlockNextMatch, BlockEnd, FileContents, FileName Output = object case kind: outputKind - of OpenError: msg: string - of Rejected: reason: string - of JustCount: matches: int - of BlockFirstMatch, BlockNextMatch: + of OpenError: msg: string # file/directory not found + of Rejected: reason: string # when the file contents do not pass + of JustCount: matches: int # the only output for option --count + of BlockFirstMatch, BlockNextMatch: # the normal case: match itself pre: string match: MatchInfo - of BlockEnd: + of BlockEnd: # block ending right after prev. match blockEnding: string - firstLine: int # = last lineNo of last match - of FileContents: + firstLine: int + # == last lineN of last match + of FileContents: # yielded for --replace only buffer: string + of FileName: # yielded for --filenames when no + name: string # PATTERN was provided Trequest = (int, string) FileResult = seq[Output] Tresult = tuple[finished: bool, fileNo: int, @@ -146,10 +191,11 @@ type includeDir : seq[Pat] excludeDir : seq[Pat] SearchOpt = tuple # used for searching inside a file - pattern: string - checkMatch: string - checkNoMatch: string - checkBin: Bin + patternSet: bool # to distinguish uninitialized 'pattern' and empty one + pattern: string # main PATTERN + checkMatch: string # --match + checkNoMatch: string # --nomatch + checkBin: Bin # --bin SearchOptComp[Pat] = tuple # a compiled version of the previous pattern: Pat checkMatch: Pat @@ -164,6 +210,8 @@ type var paths: seq[string] = @[] replacement = "" + replacementSet = false + # to distinguish between uninitialized 'replacement' and empty one options: TOptions = {optRegex} walkOpt {.threadvar.}: WalkOpt searchOpt {.threadvar.}: SearchOpt @@ -335,7 +383,7 @@ const alignment = 6 # selected so that file contents start at 8, i.e. proc blockHeader(filename: string, line: int|string, replMode=false) = if replMode: writeArrow(" ->\n") - elif newLine and optFilenames notin options: + elif newLine and optFilenames notin options and optPipe notin options: if oneline: printBlockFile(filename) printBlockLineN(":" & $line & ":") @@ -360,7 +408,7 @@ proc lineHeader(filename: string, line: int|string, isMatch: bool, let lineSym = if isMatch: $line & ":" else: $line & " " - if not newLine and optFilenames notin options: + if not newLine and optFilenames notin options and optPipe notin options: if oneline: printFile(filename) printLineN(":" & lineSym, isMatch) @@ -491,11 +539,6 @@ proc printCropped(s: string, curCol: var Column, fromLeft: bool, printContents(s, isMatch) else: printExpanded(s, curCol, isMatch, limitChar) - elif optFilenames in options: - printExpanded(s, curCol, isMatch, limitChar - eL) - if curCol.terminal == limitChar - eL: - printBold ellipsis - curCol.terminal += eL else: # limit columns, expand Tabs is also forced var charsAllowed = limitChar - curCol.terminal if fromLeft and charsAllowed < eL: @@ -671,7 +714,7 @@ proc replace1match(filename: string, buf: string, mi: MatchInfo, i: int, of ceNone: gVar.reallyReplace = false options.excl(optConfirm) - else: + elif optPipe notin options: printReplacement(filename, buf, mi, r, showRepl=gVar.reallyReplace, i, newBuf, curLine) if gVar.reallyReplace: @@ -687,7 +730,7 @@ template updateCounters(output: Output) = of BlockFirstMatch, BlockNextMatch: inc(gVar.matches) of JustCount: inc(gVar.matches, output.matches) of OpenError: inc(gVar.errors) - of Rejected, BlockEnd, FileContents: discard + of Rejected, BlockEnd, FileContents, FileName: discard proc printInfo(filename:string, output: Output) = case output.kind @@ -698,12 +741,16 @@ proc printInfo(filename:string, output: Output) = echo "(rejected: ", output.reason, ")" of JustCount: echo " (" & $output.matches & " matches)" - else: discard # impossible + of BlockFirstMatch, BlockNextMatch, BlockEnd, FileContents, FileName: + discard proc printOutput(filename: string, output: Output, curCol: var Column) = case output.kind of OpenError, Rejected, JustCount: printInfo(filename, output) of FileContents: discard # impossible + of FileName: + printCropped(output.name, curCol, fromLeft=false, limitCharUsr) + newLn(curCol) of BlockFirstMatch: printSubLinesBefore(filename, output.pre, output.match.lineBeg, curCol, reserveChars(output.match)) @@ -717,8 +764,7 @@ proc printOutput(filename: string, output: Output, curCol: var Column) = if linesAfter + linesBefore >= 2 and not newLine and optFilenames notin options: stdout.write("\n") -iterator searchFile(pattern: Pattern; filename: string; - buffer: string): Output = +iterator searchFile(pattern: Pattern; buffer: string): Output = var prevMi, curMi: MatchInfo prevMi.lineEnd = 1 var i = 0 @@ -851,6 +897,8 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, var error = false if optFilenames in options: buffer = filename + elif optPipe in options: + buffer = stdin.readAll() else: try: buffer = system.readFile(filename) @@ -882,10 +930,12 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, if reject: yield Output(kind: Rejected, reason: move(reason)) + elif optFilenames in options and searchOpt.pattern == "": + yield Output(kind: FileName, name: move(buffer)) else: var found = false var cnt = 0 - for output in searchFile(searchOptC.pattern, filename, buffer): + for output in searchFile(searchOptC.pattern, buffer): found = true if optCount notin options: yield output @@ -913,17 +963,17 @@ proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool = if walkOptC.includeFile.len != 0: var matched = false for pat in walkOptC.includeFile: - if filename.match(pat): + if filename.contains(pat): matched = true break if not matched: return false for pat in walkOptC.excludeFile: - if filename.match(pat): return false + if filename.contains(pat): return false let dirname = path.parentDir if walkOptC.includeDir.len != 0: var matched = false for pat in walkOptC.includeDir: - if dirname.match(pat): + if dirname.contains(pat): matched = true break if not matched: return false @@ -932,7 +982,7 @@ proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool = proc hasRightDirectory(path: string, walkOptC: WalkOptComp[Pattern]): bool = let dirname = path.lastPathPart for pat in walkOptC.excludeDir: - if dirname.match(pat): return false + if dirname.contains(pat): return false result = true iterator walkDirBasic(dir: string, walkOptC: WalkOptComp[Pattern]): string = @@ -1006,7 +1056,7 @@ proc replaceMatches(pattern: Pattern; filename: string, buffer: string, if replace1match(filename, buffer, curMi, i, r, newBuf, lineRepl): changed = true i = curMi.last + 1 - if changed: + if changed and optPipe notin options: newBuf.add(substr(buffer, i)) # finalize new buffer after last match var f: File if open(f, filename, fmWrite): @@ -1015,6 +1065,9 @@ proc replaceMatches(pattern: Pattern; filename: string, buffer: string, else: printError "cannot open file for overwriting: " & filename inc(gVar.errors) + elif optPipe in options: # always print new buffer to stdout in pipe mode + newBuf.add(substr(buffer, i)) # finalize new buffer after last match + stdout.write(newBuf) template processFileResult(pattern: Pattern; filename: string, fileResult: untyped) = @@ -1047,7 +1100,7 @@ template processFileResult(pattern: Pattern; filename: string, for output in fileResult: updateCounters(output) case output.kind - of Rejected, OpenError, JustCount: printInfo(filename, output) + of Rejected, OpenError, JustCount, FileName: printInfo(filename, output) of BlockFirstMatch, BlockNextMatch, BlockEnd: matches.add(output) of FileContents: buffer = output.buffer @@ -1059,6 +1112,10 @@ proc run1Thread() = compile1Pattern(searchOpt.pattern, searchOptC.pattern) compile1Pattern(searchOpt.checkMatch, searchOptC.checkMatch) compile1Pattern(searchOpt.checkNoMatch, searchOptC.checkNoMatch) + if optPipe in options: + processFileResult(searchOptC.pattern, "-", + processFile(searchOptC, "-", + yieldContents=optReplace in options)) for (err, filename) in walkRec(paths): if err != "": inc(gVar.errors) @@ -1076,7 +1133,7 @@ proc run1Thread() = # | Main thread |----------------->| pathProducer | # ------------------ ------------------- # ^ | | -# resultsChan | | | searchRequestsChan +# resultsChan | walking errors, | | searchRequestsChan # | number of files | -----+----- # ----+--------------------------- | | # | | (when walking finished) |a path |a path to file @@ -1186,10 +1243,12 @@ for kind, key, val in getopt(): of cmdArgument: if options.contains(optStdin): paths.add(key) - elif searchOpt.pattern.len == 0: + elif not searchOpt.patternSet: searchOpt.pattern = key - elif options.contains(optReplace) and replacement.len == 0: + searchOpt.patternSet = true + elif options.contains(optReplace) and not replacementSet: replacement = key + replacementSet = true else: paths.add(key) of cmdLongOption, cmdShortOption: @@ -1225,7 +1284,7 @@ for kind, key, val in getopt(): of "includefile", "include-file", "if": walkOpt.includeFile.add val of "excludefile", "exclude-file", "ef": walkOpt.excludeFile.add val of "match": searchOpt.checkMatch = val - of "nomatch", "notmatch", "not-match", "no-match": + of "nomatch": searchOpt.checkNoMatch = val of "bin": case val @@ -1236,18 +1295,21 @@ for kind, key, val in getopt(): of "text", "t": searchOpt.checkBin = biOff of "count": incl(options, optCount) of "sorttime", "sort-time", "s": - sortTime = true case normalize(val) - of "": discard - of "asc", "ascending": sortTimeOrder = SortOrder.Ascending - of "desc", "descending": sortTimeOrder = SortOrder.Descending + of "off": sortTime = false + of "", "on", "asc", "ascending": + sortTime = true + sortTimeOrder = SortOrder.Ascending + of "desc", "descending": + sortTime = true + sortTimeOrder = SortOrder.Descending else: reportError("invalid value '" & val & "' for --sortTime") of "nocolor", "no-color": useWriteStyled = false of "color": case val of "auto": discard - of "never", "false": useWriteStyled = false - of "", "always", "true": useWriteStyled = true + of "off", "never", "false": useWriteStyled = false + of "", "on", "always", "true": useWriteStyled = true else: reportError("invalid value '" & val & "' for --color") of "colortheme", "color-theme": colortheme = normalize(val) @@ -1290,6 +1352,7 @@ for kind, key, val in getopt(): expandTabs = false of "help", "h": writeHelp() of "version", "v": writeVersion() + of "": incl(options, optPipe) else: reportError("unrecognized option '" & key & "'") of cmdEnd: assert(false) # cannot happen @@ -1298,28 +1361,42 @@ checkOptions({optCount, optReplace}, "count", "replace") checkOptions({optPeg, optRegex}, "peg", "re") checkOptions({optIgnoreCase, optIgnoreStyle}, "ignore_case", "ignore_style") checkOptions({optFilenames, optReplace}, "filenames", "replace") +checkOptions({optPipe, optStdin}, "-", "stdin") +checkOptions({optPipe, optFilenames}, "-", "filenames") +checkOptions({optPipe, optConfirm}, "-", "confirm") +checkOptions({optPipe, optRecursive}, "-", "recursive") linesBefore = max(linesBefore, linesContext) linesAfter = max(linesAfter, linesContext) +if optPipe in options and paths.len != 0: + reportError("both - and paths are specified") + if optStdin in options: searchOpt.pattern = ask("pattern [ENTER to exit]: ") if searchOpt.pattern.len == 0: quit(0) if optReplace in options: replacement = ask("replacement [supports $1, $# notations]: ") -if searchOpt.pattern.len == 0: +if optReplace in options and not replacementSet: + reportError("provide REPLACEMENT as second argument (use \"\" for empty one)") +if optReplace in options and paths.len == 0 and optPipe notin options: + reportError("provide paths for replacement explicitly (use . for current directory)") + +if searchOpt.pattern == "" and optFilenames notin options: reportError("empty pattern was given") else: - if paths.len == 0: + if paths.len == 0 and optPipe notin options: paths.add(".") - if nWorkers == 0: + if optPipe in options or nWorkers == 0: run1Thread() else: runMultiThread() if gVar.errors != 0: printError $gVar.errors & " errors" - printBold($gVar.matches & " matches") - stdout.write("\n") + if searchOpt.pattern != "": + # PATTERN allowed to be empty if --filenames is given + printBold($gVar.matches & " matches") + stdout.write("\n") if gVar.errors != 0: quit(1) From 9ace3c17f7303b257b582e91ae3f074e190cd060 Mon Sep 17 00:00:00 2001 From: Andrey Makarov Date: Sun, 8 Nov 2020 16:19:56 +0300 Subject: [PATCH 20/20] address stylistic review & add limitations --- tools/nimgrep.nim | 124 ++++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 55 deletions(-) diff --git a/tools/nimgrep.nim b/tools/nimgrep.nim index 8cc97dd668ac4..1e563435c2989 100644 --- a/tools/nimgrep.nim +++ b/tools/nimgrep.nim @@ -34,12 +34,13 @@ Positional arguments, from left to right: minus '-' (pipe) or not specified (empty). Note for the empty case: when no FILE/DIRECTORY/- is specified nimgrep DOES NOT read the pipe, but searches files in the current dir instead! - - read buffer once from stdin: pipe or terminal input; - in --replace mode the result is directed to stdout. - (empty) current directory '.' is assumed + - read buffer once from stdin: pipe or terminal input; + in --replace mode the result is directed to stdout; + it's not compatible with --stdin, --filenames, --confirm + (empty) current directory '.' is assumed (not with --replace) For any given DIRECTORY nimgrep searches only its immediate files without traversing sub-directories unless --recursive is specified. - In replacement mode all 3 positional arguments are required to avoid damage. + In replacement mode all 3 positional arguments are required to avoid damaging. Options: * Mode of operation: @@ -88,7 +89,7 @@ Options: --text, -t process only text files, the same as --bin:off * Represent results: - --nocolor output will be given without any colours + --nocolor output will be given without any colors --color[:on] force color even if output is redirected (default: auto) --colorTheme:THEME select color THEME from 'simple' (default), 'bnw' (black and white) ,'ack', or 'gnu' (GNU grep) @@ -104,11 +105,11 @@ Options: --cols[:N] limit max displayed columns/width of output lines from files by N characters, cropping overflows (default: off) --cols:auto, -% calculate columns from terminal width for every line - --onlyAscii, -@ use only printable ASCII Latin characters 0x20-0x7E + --onlyAscii, -@ display only printable ASCII Latin characters 0x20-0x7E substitutions: 0 -> ^@, 1 -> ^A, ... 0x1F -> ^_, 0x7F -> '7F, ..., 0xFF -> 'FF * Miscellaneous: - --threads:N, -j:N speed up search by N additional workers (default N: 0) + --threads:N, -j:N speed up search by N additional workers (default: 0, off) --stdin read PATTERN from stdin (to avoid the shell's confusing quoting rules) and, if --replace given, REPLACEMENT --verbose be verbose: list every processed file @@ -116,6 +117,19 @@ Options: --version, -v shows the version """ +# Limitations / ideas / TODO: +# * No unicode support with --cols +# * Consider making --onlyAscii default, since dumping binary data has +# stability and security repercussions +# * Mode - reads entire buffer by whole from stdin, which is bad for streaming. +# To implement line-by-line reading after adding option to turn off +# multiline matches +# * Add some form of file pre-processing, e.g. feed binary files to utility +# `strings` and then do the search inside these strings +# * Add --showCol option to also show column (of match), not just line; it +# makes it easier when jump to line+col in an editor or on terminal + + # Search results for a file are modelled by these levels: # FileResult -> Block -> Output/Chunk -> SubLine # @@ -123,7 +137,7 @@ Options: # # 2. Chunk, which is a sequence of SubLine, represents a match and its # surrounding context. -# Output is a Chunk or one of auxiliary results like an OpenError. +# Output is a Chunk or one of auxiliary results like an openError. # # 3. Block, which is a sequence of Chunks, is not present as a separate type. # It will just be separated from another Block by newline when there is @@ -156,23 +170,23 @@ type MatchInfo = tuple[first: int, last: int; lineBeg: int, lineEnd: int, match: string] outputKind = enum - OpenError, Rejected, JustCount, - BlockFirstMatch, BlockNextMatch, BlockEnd, FileContents, FileName + openError, rejected, justCount, + blockFirstMatch, blockNextMatch, blockEnd, fileContents, outputFileName Output = object case kind: outputKind - of OpenError: msg: string # file/directory not found - of Rejected: reason: string # when the file contents do not pass - of JustCount: matches: int # the only output for option --count - of BlockFirstMatch, BlockNextMatch: # the normal case: match itself + of openError: msg: string # file/directory not found + of rejected: reason: string # when the file contents do not pass + of justCount: matches: int # the only output for option --count + of blockFirstMatch, blockNextMatch: # the normal case: match itself pre: string match: MatchInfo - of BlockEnd: # block ending right after prev. match + of blockEnd: # block ending right after prev. match blockEnding: string firstLine: int # == last lineN of last match - of FileContents: # yielded for --replace only + of fileContents: # yielded for --replace only buffer: string - of FileName: # yielded for --filenames when no + of outputFileName: # yielded for --filenames when no name: string # PATTERN was provided Trequest = (int, string) FileResult = seq[Output] @@ -669,10 +683,9 @@ proc printBetweenMatches(filename: string, betweenMatches: string, if not isLastLine: newLn(curCol) -proc printReplacement(filename: string, buf: string, mi: MatchInfo, +proc printReplacement(fileName: string, buf: string, mi: MatchInfo, repl: string, showRepl: bool, curPos: int, newBuf: string, curLine: int) = - let filename = fileName var curCol: Column printSubLinesBefore(fileName, getSubLinesBefore(buf, mi), mi.lineBeg, curCol, reserveChars(mi)) @@ -727,39 +740,39 @@ proc replace1match(filename: string, buf: string, mi: MatchInfo, i: int, template updateCounters(output: Output) = case output.kind - of BlockFirstMatch, BlockNextMatch: inc(gVar.matches) - of JustCount: inc(gVar.matches, output.matches) - of OpenError: inc(gVar.errors) - of Rejected, BlockEnd, FileContents, FileName: discard + of blockFirstMatch, blockNextMatch: inc(gVar.matches) + of justCount: inc(gVar.matches, output.matches) + of openError: inc(gVar.errors) + of rejected, blockEnd, fileContents, outputFileName: discard proc printInfo(filename:string, output: Output) = case output.kind - of OpenError: + of openError: printError("can not open path " & filename & " " & output.msg) - of Rejected: + of rejected: if optVerbose in options: echo "(rejected: ", output.reason, ")" - of JustCount: + of justCount: echo " (" & $output.matches & " matches)" - of BlockFirstMatch, BlockNextMatch, BlockEnd, FileContents, FileName: + of blockFirstMatch, blockNextMatch, blockEnd, fileContents, outputFileName: discard proc printOutput(filename: string, output: Output, curCol: var Column) = case output.kind - of OpenError, Rejected, JustCount: printInfo(filename, output) - of FileContents: discard # impossible - of FileName: + of openError, rejected, justCount: printInfo(filename, output) + of fileContents: discard # impossible + of outputFileName: printCropped(output.name, curCol, fromLeft=false, limitCharUsr) newLn(curCol) - of BlockFirstMatch: + of blockFirstMatch: printSubLinesBefore(filename, output.pre, output.match.lineBeg, curCol, reserveChars(output.match)) printMatch(filename, output.match, curCol) - of BlockNextMatch: + of blockNextMatch: printBetweenMatches(filename, output.pre, output.match.lineBeg, curCol, reserveChars(output.match)) printMatch(filename, output.match, curCol) - of BlockEnd: + of blockEnd: printSubLinesAfter(filename, output.blockEnding, output.firstLine, curCol) if linesAfter + linesBefore >= 2 and not newLine and optFilenames notin options: stdout.write("\n") @@ -774,7 +787,7 @@ iterator searchFile(pattern: Pattern; buffer: string): Output = let t = findBounds(buffer, pattern, matches, i) if t.first < 0 or t.last < t.first: if prevMi.lineBeg != 0: # finalize last match - yield Output(kind: BlockEnd, + yield Output(kind: blockEnd, blockEnding: getSubLinesAfter(buffer, prevMi), firstLine: prevMi.lineEnd) break @@ -788,20 +801,20 @@ iterator searchFile(pattern: Pattern; buffer: string): Output = if prevMi.lineBeg == 0: # no prev. match, so no prev. block to finalize let pre = getSubLinesBefore(buffer, curMi) prevMi = curMi - yield Output(kind: BlockFirstMatch, pre: pre, match: move(curMi)) + yield Output(kind: blockFirstMatch, pre: pre, match: move(curMi)) else: let nLinesBetween = curMi.lineBeg - prevMi.lineEnd if nLinesBetween <= linesAfter + linesBefore + 1: # print as 1 block let pre = getSubLinesBetween(buffer, prevMi, curMi) prevMi = curMi - yield Output(kind: BlockNextMatch, pre: pre, match: move(curMi)) + yield Output(kind: blockNextMatch, pre: pre, match: move(curMi)) else: # finalize previous block and then print next block let after = getSubLinesAfter(buffer, prevMi) - yield Output(kind: BlockEnd, blockEnding: after, + yield Output(kind: blockEnd, blockEnding: after, firstLine: prevMi.lineEnd) let pre = getSubLinesBefore(buffer, curMi) prevMi = curMi - yield Output(kind: BlockFirstMatch, + yield Output(kind: blockFirstMatch, pre: pre, match: move(curMi)) i = t.last+1 @@ -903,7 +916,7 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, try: buffer = system.readFile(filename) except IOError as e: - yield Output(kind: OpenError, msg: "readFile failed") + yield Output(kind: openError, msg: "readFile failed") error = true if not error: @@ -929,9 +942,9 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, reason = "contains a forbidden match" if reject: - yield Output(kind: Rejected, reason: move(reason)) + yield Output(kind: rejected, reason: move(reason)) elif optFilenames in options and searchOpt.pattern == "": - yield Output(kind: FileName, name: move(buffer)) + yield Output(kind: outputFileName, name: move(buffer)) else: var found = false var cnt = 0 @@ -940,12 +953,12 @@ iterator processFile(searchOptC: SearchOptComp[Pattern], filename: string, if optCount notin options: yield output else: - if output.kind in {BlockFirstMatch, BlockNextMatch}: + if output.kind in {blockFirstMatch, blockNextMatch}: inc(cnt) if optCount in options and cnt > 0: - yield Output(kind: JustCount, matches: cnt) + yield Output(kind: justCount, matches: cnt) if yieldContents and found and optCount notin options: - yield Output(kind: FileContents, buffer: move(buffer)) + yield Output(kind: fileContents, buffer: move(buffer)) proc hasRightFileName(path: string, walkOptC: WalkOptComp[Pattern]): bool = @@ -1037,10 +1050,10 @@ iterator walkRec(paths: seq[string]): (string, string) = if dirExists(path): for p in walkDirBasic(path, walkOptC): yield ("", p) - elif fileExists(path): - yield ("", path) else: - yield ("Error: no such file or directory: ", path) + yield ( + if fileExists(path): ("", path) + else: ("Error: no such file or directory: ", path)) proc replaceMatches(pattern: Pattern; filename: string, buffer: string, fileResult: FileResult) = @@ -1050,7 +1063,7 @@ proc replaceMatches(pattern: Pattern; filename: string, buffer: string, var lineRepl = 1 var i = 0 for output in fileResult: - if output.kind in {BlockFirstMatch, BlockNextMatch}: + if output.kind in {blockFirstMatch, blockNextMatch}: let curMi = output.match let r = replacef(curMi.match, pattern, replacement) if replace1match(filename, buffer, curMi, i, r, newBuf, lineRepl): @@ -1086,12 +1099,12 @@ template processFileResult(pattern: Pattern; filename: string, for output in fileResult: updateCounters(output) toFlush = true - if output.kind notin {Rejected, OpenError, JustCount} and not oneline: + if output.kind notin {rejected, openError, justCount} and not oneline: showFilename - if output.kind == JustCount and oneline: + if output.kind == justCount and oneline: printFile(filename & ":") printOutput(filename, output, curCol) - if nWorkers == 0 and output.kind in {BlockFirstMatch, BlockNextMatch}: + if nWorkers == 0 and output.kind in {blockFirstMatch, blockNextMatch}: stdout.flushFile() # flush immediately in single thread mode if toFlush: stdout.flushFile() else: @@ -1100,10 +1113,11 @@ template processFileResult(pattern: Pattern; filename: string, for output in fileResult: updateCounters(output) case output.kind - of Rejected, OpenError, JustCount, FileName: printInfo(filename, output) - of BlockFirstMatch, BlockNextMatch, BlockEnd: + of rejected, openError, justCount, outputFileName: + printInfo(filename, output) + of blockFirstMatch, blockNextMatch, blockEnd: matches.add(output) - of FileContents: buffer = output.buffer + of fileContents: buffer = output.buffer if matches.len > 0: replaceMatches(pattern, filename, buffer, matches) @@ -1173,7 +1187,7 @@ proc pathProducer(arg: (seq[string], WalkOpt)) {.thread.} = searchRequestsChan.send((nextFileN,filename)) else: resultsChan.send((false, nextFileN, - filename, @[Output(kind: OpenError, msg: err)])) + filename, @[Output(kind: openError, msg: err)])) nextFileN += 1 resultsChan.send((true, nextFileN, "", @[])) # pass total number of files