Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const openAsync = promisify(fs.open);
const closeAsync = promisify(fs.close);

const MAX_BYTES: number = 512;
const UTF8_BOUNDARY_RESERVE: number = 3;

// A very basic non-exception raising reader. Read bytes and
// at the end use hasError() to check whether this worked.
Expand Down Expand Up @@ -120,12 +121,12 @@ export async function isBinaryFile(file: string | Buffer, size?: number): Promis

const fileDescriptor = await openAsync(file, 'r');

const allocBuffer = Buffer.alloc(MAX_BYTES);
const allocBuffer = Buffer.alloc(MAX_BYTES + UTF8_BOUNDARY_RESERVE);

// Read the file with no encoding for raw buffer access.
// NB: something is severely wrong with promisify, had to construct my own Promise
return new Promise((fulfill, reject) => {
fs.read(fileDescriptor, allocBuffer, 0, MAX_BYTES, 0, (err, bytesRead, _) => {
fs.read(fileDescriptor, allocBuffer, 0, MAX_BYTES + UTF8_BOUNDARY_RESERVE, 0, (err, bytesRead, _) => {
closeAsync(fileDescriptor);
if (err) {
reject(err);
Expand Down Expand Up @@ -154,9 +155,9 @@ export function isBinaryFileSync(file: string | Buffer, size?: number): boolean

const fileDescriptor = fs.openSync(file, 'r');

const allocBuffer = Buffer.alloc(MAX_BYTES);
const allocBuffer = Buffer.alloc(MAX_BYTES + UTF8_BOUNDARY_RESERVE);

const bytesRead = fs.readSync(fileDescriptor, allocBuffer, 0, MAX_BYTES, 0);
const bytesRead = fs.readSync(fileDescriptor, allocBuffer, 0, MAX_BYTES + UTF8_BOUNDARY_RESERVE, 0);
fs.closeSync(fileDescriptor);

return isBinaryCheck(allocBuffer, bytesRead);
Expand All @@ -175,7 +176,8 @@ function isBinaryCheck(fileBuffer: Buffer, bytesRead: number): boolean {
}

let suspiciousBytes = 0;
const totalBytes = Math.min(bytesRead, MAX_BYTES);
const totalBytes = Math.min(bytesRead, MAX_BYTES + UTF8_BOUNDARY_RESERVE);
const scanBytes = Math.min(totalBytes, MAX_BYTES);

// UTF-8 BOM
if (bytesRead >= 3 && fileBuffer[0] === 0xef && fileBuffer[1] === 0xbb && fileBuffer[2] === 0xbf) {
Expand Down Expand Up @@ -230,7 +232,7 @@ function isBinaryCheck(fileBuffer: Buffer, bytesRead: number): boolean {
return false;
}

for (let i = 0; i < totalBytes; i++) {
for (let i = 0; i < scanBytes; i++) {
if (fileBuffer[i] === 0) {
// NULL byte--it's binary!
return true;
Expand Down Expand Up @@ -264,17 +266,17 @@ function isBinaryCheck(fileBuffer: Buffer, bytesRead: number): boolean {

suspiciousBytes++;
// Read at least 32 fileBuffer before making a decision
if (i >= 32 && (suspiciousBytes * 100) / totalBytes > 10) {
if (i >= 32 && (suspiciousBytes * 100) / (scanBytes) > 10) {
return true;
}
}
}

if ((suspiciousBytes * 100) / totalBytes > 10) {
if ((suspiciousBytes * 100) / (scanBytes) > 10) {
return true;
}

if (suspiciousBytes > 1 && isBinaryProto(fileBuffer, totalBytes)) {
if (suspiciousBytes > 1 && isBinaryProto(fileBuffer, scanBytes)) {
return true;
}

Expand Down
1 change: 1 addition & 0 deletions test/fixtures/508A-4byte.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA😀
1 change: 1 addition & 0 deletions test/fixtures/509A-3byte.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
测
1 change: 1 addition & 0 deletions test/fixtures/509A-4byte.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
😀
1 change: 1 addition & 0 deletions test/fixtures/510A-2byte.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ã
1 change: 1 addition & 0 deletions test/fixtures/510A-3byte.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
测
1 change: 1 addition & 0 deletions test/fixtures/510A-4byte.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
😀
32 changes: 32 additions & 0 deletions test/fixtures/utf8-boundary-truncation_case.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
测试脚本 - DDD增强网络推理
只保存预测结果,不计算指

作者: Dxxx Dexxx
日期: 2025
"""

import os
import sys
import argparse
import torch
import cv2
import numpy as np
from tqdm import tqdm
from pathlib import Path

# 添加上级目录到路径

from data import DDDEnhancerDataset


def function():
"""
保存预测结果

Args:
pred: 预测结果张量 [1, H, W] 或 [H, W],值在[0, 1]
save_path: 保存路径
original_size: 原始图像尺寸 (height, width),如果提供则调整到此尺寸
"""
pass
42 changes: 42 additions & 0 deletions test/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -316,3 +316,45 @@ it("should return false on a UTF-8 file with emoji", () => {
const result = isBinaryFileSync(file);
expect(result).toBe(false);
});

it("should return false on UTF-8 file with 4-byte sequence truncated at byte 508", () => {
const file = path.join(FIXTURE_PATH, "508A-4byte.txt");
const result = isBinaryFileSync(file);
expect(result).toBe(false);
});

it("should return false on UTF-8 file with 3-byte sequence truncated at byte 509", () => {
const file = path.join(FIXTURE_PATH, "509A-3byte.txt");
const result = isBinaryFileSync(file);
expect(result).toBe(false);
});

it("should return false on UTF-8 file with 4-byte sequence truncated at byte 509", () => {
const file = path.join(FIXTURE_PATH, "509A-4byte.txt");
const result = isBinaryFileSync(file);
expect(result).toBe(false);
});

it("should return false on UTF-8 file with 2-byte sequence truncated at byte 510", () => {
const file = path.join(FIXTURE_PATH, "510A-2byte.txt");
const result = isBinaryFileSync(file);
expect(result).toBe(false);
});

it("should return false on UTF-8 file with 3-byte sequence truncated at byte 510", () => {
const file = path.join(FIXTURE_PATH, "510A-3byte.txt");
const result = isBinaryFileSync(file);
expect(result).toBe(false);
});

it("should return false on UTF-8 file with 4-byte sequence truncated at byte 510", () => {
const file = path.join(FIXTURE_PATH, "510A-4byte.txt");
const result = isBinaryFileSync(file);
expect(result).toBe(false);
});

it("should return false on real-world Python file with UTF-8 at boundary (utf8-boundary-truncation bug case)", () => {
const file = path.join(FIXTURE_PATH, "utf8-boundary-truncation_case.py");
const result = isBinaryFileSync(file);
expect(result).toBe(false);
});