Skip to content

Commit a46d851

Browse files
authored
Fix(jupyter):use non-blocking stdin check to prevent hanging (#380)
* implement non-blocking stdin check to prevent hanging * Add Jupyter notebook tests and run it in CI * add more juper notebook tests * generate temporary CSV for testing
1 parent f5915d2 commit a46d851

File tree

6 files changed

+220
-4
lines changed

6 files changed

+220
-4
lines changed

.github/workflows/build_linux_arm64_wheels-gh.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ jobs:
117117
echo "Installing dependencies for Python $version"
118118
pyenv shell $version
119119
python -m pip install --upgrade pip
120-
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel
120+
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel jupyter nbconvert
121121
pyenv shell --unset
122122
done
123123
- name: Upgrade Rust toolchain
@@ -281,6 +281,15 @@ jobs:
281281
pyenv shell --unset
282282
done
283283
continue-on-error: false
284+
- name: Run notebook tests
285+
run: |
286+
export PATH="$HOME/.pyenv/bin:$PATH"
287+
eval "$(pyenv init -)"
288+
pyenv shell 3.8
289+
python -m pip install dist/*.whl --force-reinstall
290+
jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb
291+
pyenv shell --unset
292+
continue-on-error: false
284293
- name: Check and upload core files if present
285294
if: always()
286295
run: |

.github/workflows/build_linux_x86_wheels.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ jobs:
117117
echo "Installing dependencies for Python $version"
118118
pyenv shell $version
119119
python -m pip install --upgrade pip
120-
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel
120+
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel jupyter nbconvert
121121
pyenv shell --unset
122122
done
123123
- name: Upgrade Rust toolchain
@@ -280,6 +280,15 @@ jobs:
280280
pyenv shell --unset
281281
done
282282
continue-on-error: false
283+
- name: Run notebook tests
284+
run: |
285+
export PATH="$HOME/.pyenv/bin:$PATH"
286+
eval "$(pyenv init -)"
287+
pyenv shell 3.8
288+
python -m pip install dist/*.whl --force-reinstall
289+
jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb
290+
pyenv shell --unset
291+
continue-on-error: false
283292
- name: Check and upload core files if present
284293
if: always()
285294
run: |

.github/workflows/build_macos_arm64_wheels.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ jobs:
102102
echo "Installing dependencies for Python $version"
103103
pyenv shell $version
104104
python -m pip install --upgrade pip
105-
python -m pip install setuptools wheel tox pandas pyarrow twine psutil deltalake wheel>=0.40.0
105+
python -m pip install setuptools wheel tox pandas pyarrow twine psutil deltalake wheel>=0.40.0 jupyter nbconvert
106106
pyenv shell --unset
107107
done
108108
- name: Remove /usr/local/bin/python3
@@ -276,6 +276,15 @@ jobs:
276276
pyenv shell --unset
277277
done
278278
continue-on-error: false
279+
- name: Run notebook tests
280+
run: |
281+
export PATH="$HOME/.pyenv/bin:$PATH"
282+
eval "$(pyenv init -)"
283+
pyenv shell 3.8
284+
python -m pip install dist/*.whl --force-reinstall
285+
jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb
286+
pyenv shell --unset
287+
continue-on-error: false
279288
- name: Check and upload core files if present
280289
if: always()
281290
run: |

.github/workflows/build_macos_x86_wheels.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ jobs:
9191
echo "Installing dependencies for Python $version"
9292
pyenv shell $version
9393
python -m pip install --upgrade pip
94-
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel>=0.40.0
94+
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel>=0.40.0 jupyter nbconvert
9595
pyenv shell --unset
9696
done
9797
- name: Remove /usr/local/bin/python3
@@ -277,6 +277,15 @@ jobs:
277277
pyenv shell --unset
278278
done
279279
continue-on-error: false
280+
- name: Run notebook tests
281+
run: |
282+
export PATH="$HOME/.pyenv/bin:$PATH"
283+
eval "$(pyenv init -)"
284+
pyenv shell 3.8
285+
python -m pip install dist/*.whl --force-reinstall
286+
jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb
287+
pyenv shell --unset
288+
continue-on-error: false
280289
- name: Check and upload core files if present
281290
if: always()
282291
run: |

src/Client/ClientBase.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,21 @@ bool isStdinNotEmptyAndValid(ReadBuffer & std_in)
17771777
{
17781778
try
17791779
{
1780+
// Use non-blocking check for stdin to avoid hanging
1781+
if (auto * fd_buffer = typeid_cast<ReadBufferFromFileDescriptor *>(&std_in))
1782+
{
1783+
int fd = fd_buffer->getFD();
1784+
if (fd == STDIN_FILENO)
1785+
{
1786+
int flags = fcntl(fd, F_GETFL);
1787+
if (flags != -1)
1788+
{
1789+
fcntl(fd, F_SETFL, flags | O_NONBLOCK);
1790+
SCOPE_EXIT({ fcntl(fd, F_SETFL, flags); });
1791+
return !std_in.eof();
1792+
}
1793+
}
1794+
}
17801795
return !std_in.eof();
17811796
}
17821797
catch (const Exception & e)

tests/test_data_insertion.ipynb

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"jupyter": {
8+
"is_executing": true
9+
}
10+
},
11+
"outputs": [],
12+
"source": [
13+
"from chdb import session\n",
14+
"import time\n",
15+
"import tempfile\n",
16+
"import os\n",
17+
"\n",
18+
"print(\"Connecting to chdb session...\")\n",
19+
"chs = session.Session()\n",
20+
"\n",
21+
"temp_csv = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False)\n",
22+
"temp_csv.write(\"movieId,embedding\\n\") # Header\n",
23+
"\n",
24+
"# Generate 10,000 rows of test data\n",
25+
"for i in range(1, 10001):\n",
26+
" embedding = [float(i + j * 0.1) for j in range(10)]\n",
27+
" embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n",
28+
" temp_csv.write(f'{i},\"{embedding_str}\"\\n')\n",
29+
"\n",
30+
"temp_csv.close()\n",
31+
"csv_path = temp_csv.name\n",
32+
"\n",
33+
"# Setup database and table\n",
34+
"print(\"\\n=== Setup Phase ===\")\n",
35+
"chs.query(\"CREATE DATABASE IF NOT EXISTS test ENGINE = Atomic\")\n",
36+
"chs.query(\"USE test\")\n",
37+
"chs.query('DROP TABLE IF EXISTS embeddings')\n",
38+
"\n",
39+
"chs.query(\"\"\"CREATE TABLE embeddings (\n",
40+
" movieId UInt32 NOT NULL,\n",
41+
" embedding Array(Float32) NOT NULL\n",
42+
" ) ENGINE = MergeTree()\n",
43+
" ORDER BY movieId\"\"\")\n",
44+
"\n",
45+
"# Test 1: INFILE insertion (10k rows)\n",
46+
"print(\"\\n=== Test 1: INFILE Insertion (10k rows) ===\")\n",
47+
"start_time = time.time()\n",
48+
"try:\n",
49+
" result = chs.query(f\"INSERT INTO embeddings FROM INFILE '{csv_path}' FORMAT CSV\")\n",
50+
" infile_time = time.time() - start_time\n",
51+
" print(f\"✓ INFILE insertion successful! Time: {infile_time:.3f}s\")\n",
52+
" \n",
53+
" count = chs.query('SELECT COUNT(*) as count FROM embeddings')\n",
54+
" print(f\"Records inserted via INFILE: {count}\")\n",
55+
" \n",
56+
" if count != '0':\n",
57+
" print(\"Sample data from INFILE:\")\n",
58+
" sample = chs.query('SELECT movieId, embedding FROM embeddings ORDER BY movieId LIMIT 3')\n",
59+
" print(sample)\n",
60+
" \n",
61+
"except Exception as e:\n",
62+
" print(f\"✗ INFILE insertion failed: {e}\")\n",
63+
" infile_time = 0\n",
64+
"\n",
65+
"# Test 2: Regular insertion (10 additional rows)\n",
66+
"print(\"\\n=== Test 2: Regular VALUES Insertion (10 rows) ===\")\n",
67+
"start_time = time.time()\n",
68+
"try:\n",
69+
" # Insert 10 additional rows with movieId starting from 20001\n",
70+
" for i in range(20001, 20011):\n",
71+
" embedding = [float(i + j * 0.1) for j in range(10)]\n",
72+
" embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n",
73+
" chs.query(f\"INSERT INTO embeddings VALUES ({i}, {embedding_str})\")\n",
74+
" \n",
75+
" values_time = time.time() - start_time\n",
76+
" print(f\"✓ VALUES insertion successful! Time: {values_time:.3f}s\")\n",
77+
" \n",
78+
"except Exception as e:\n",
79+
" print(f\"✗ VALUES insertion failed: {e}\")\n",
80+
" values_time = 0\n",
81+
"\n",
82+
"# Test 3: Verify total count\n",
83+
"print(\"\\n=== Test 3: Count Verification ===\")\n",
84+
"try:\n",
85+
" total_count = chs.query('SELECT COUNT(*) as total FROM embeddings')\n",
86+
" print(f\"Total records in embeddings table: {total_count}\")\n",
87+
" \n",
88+
" # Count by range\n",
89+
" infile_count = chs.query('SELECT COUNT(*) as infile_count FROM embeddings WHERE movieId <= 10000')\n",
90+
" values_count = chs.query('SELECT COUNT(*) as values_count FROM embeddings WHERE movieId >= 20001')\n",
91+
" \n",
92+
" print(f\"Records from INFILE (movieId <= 10000): {infile_count}\")\n",
93+
" print(f\"Records from VALUES (movieId >= 20001): {values_count}\")\n",
94+
" \n",
95+
" # Sample from both ranges\n",
96+
" print(\"\\nSample from INFILE data:\")\n",
97+
" print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId <= 10000 ORDER BY movieId LIMIT 2'))\n",
98+
" \n",
99+
" print(\"Sample from VALUES data:\")\n",
100+
" print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId >= 20001 ORDER BY movieId LIMIT 2'))\n",
101+
" \n",
102+
"except Exception as e:\n",
103+
" print(f\"Count verification error: {e}\")\n",
104+
"\n",
105+
"# Test 4: Direct CSV engine reading\n",
106+
"print(\"\\n=== Test 4: CSV Engine Direct Reading ===\")\n",
107+
"try:\n",
108+
" print(\"Reading generated CSV file directly using CSV engine:\")\n",
109+
" \n",
110+
" # Method 1: Using file() function\n",
111+
" csv_count1 = chs.query(f\"SELECT COUNT(*) as csv_count FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String')\")\n",
112+
" print(f\"CSV file rows (via file() function): {csv_count1}\")\n",
113+
" \n",
114+
" # Method 2: Using CSV table engine directly\n",
115+
" print(\"Sample rows from CSV file:\")\n",
116+
" csv_sample = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId LIMIT 3\")\n",
117+
" print(csv_sample)\n",
118+
" \n",
119+
" print(\"Last few rows from CSV file:\")\n",
120+
" csv_tail = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId DESC LIMIT 3\")\n",
121+
" print(csv_tail)\n",
122+
" \n",
123+
"except Exception as e:\n",
124+
" print(f\"CSV engine reading error: {e}\")\n",
125+
"\n",
126+
"# Cleanup\n",
127+
"print(\"\\n=== Cleanup ===\")\n",
128+
"try:\n",
129+
" os.unlink(csv_path)\n",
130+
" print(\"✓ Temporary CSV file cleaned up\")\n",
131+
"except Exception as e:\n",
132+
" print(f\"Warning: Could not clean up temporary file: {e}\")\n",
133+
"\n",
134+
"print(f\"\\n=== Performance Summary ===\")\n",
135+
"if infile_time > 0:\n",
136+
" print(f\"INFILE insertion (10k rows): {infile_time:.3f}s\")\n",
137+
"if values_time > 0:\n",
138+
" print(f\"VALUES insertion (10 rows): {values_time:.3f}s\")\n",
139+
" if infile_time > 0:\n",
140+
" print(f\"INFILE is {values_time/infile_time*1000:.1f}x faster per 1000 rows\")"
141+
]
142+
}
143+
],
144+
"metadata": {
145+
"kernelspec": {
146+
"display_name": ".venv",
147+
"language": "python",
148+
"name": "python3"
149+
},
150+
"language_info": {
151+
"codemirror_mode": {
152+
"name": "ipython",
153+
"version": 3
154+
},
155+
"file_extension": ".py",
156+
"mimetype": "text/x-python",
157+
"name": "python",
158+
"nbconvert_exporter": "python",
159+
"pygments_lexer": "ipython3",
160+
"version": "3.13.7"
161+
}
162+
},
163+
"nbformat": 4,
164+
"nbformat_minor": 4
165+
}

0 commit comments

Comments
 (0)