File tree Expand file tree Collapse file tree 2 files changed +52
-0
lines changed Expand file tree Collapse file tree 2 files changed +52
-0
lines changed Original file line number Diff line number Diff line change 1+ # Summary
2+
3+ Used to run the llama.cpp OpenAI-compatible server.
4+
5+ ## Usage
6+
7+ ``` yaml
8+ steps :
9+ - name :
10+ uses : neuralmagic/nm-actions/actions/llama-cpp@main
11+ with :
12+ port : 8000
13+ model : " aminkhalafi/Phi-3-mini-4k-instruct-Q4_K_M-GGUF"
14+ context-size : 2048
15+ ` ` `
Original file line number Diff line number Diff line change 1+ name : " Run llama.cpp"
2+ description : " Run llama.cpp OpenAI compatible web server"
3+
4+ inputs :
5+ port :
6+ description : " The port of running service"
7+ required : false
8+ default : 8080
9+ model :
10+ description : " The Hugging Face model"
11+ required : false
12+ default : " aminkhalafi/Phi-3-mini-4k-instruct-Q4_K_M-GGUF"
13+ context-size :
14+ description : " The size of input context size (tokens)"
15+ required : false
16+ default : 2048
17+
18+ runs :
19+ using : " composite"
20+ steps :
21+ - name : Install llama.cpp
22+ id : install
23+ shell : bash
24+ run : |
25+ brew install llama.cpp
26+
27+ - name : Start llama.cpp web server
28+ id : start
29+ shell : bash
30+ run : |
31+ llama-server --hf-repo "${{inputs.port}}" -ctx-size "${{inputs.context-size}}" --port "${{inputs.port}}" &
32+
33+ - name : Wait llama server to be started
34+ id : wait
35+ shell : bash
36+ run : |
37+ sleep 10
You can’t perform that action at this time.
0 commit comments