From 9a8345bcd5cbe636e1460bda92e4f2b88148dcc7 Mon Sep 17 00:00:00 2001 From: Taylor Bockman Date: Mon, 10 Jul 2017 01:12:26 -0700 Subject: [PATCH] Initial Pass at the Virtual Machine --- .gitignore | 3 ++ CMakeLists.txt | 4 +- README.md | 75 ++++++++++++++++++-------- include/cpu.h | 81 +++++++++++++++++++++++----- include/logging.h | 18 +++++++ include/opcodes.h | 32 ----------- src/cpu.cc | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- src/logging.cc | 25 +++++++++ src/uvm.cc | 112 ++++++++++++++++++++++++++++++++++++-- 9 files changed, 432 insertions(+), 76 deletions(-) create mode 100644 include/logging.h delete mode 100644 include/opcodes.h create mode 100644 src/logging.cc diff --git a/.gitignore b/.gitignore index e94c98f..9c6a630 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,6 @@ cmake_install.cmake install_manifest.txt compile_commands.json CTestTestfile.cmake + +# Binaries lol +bin/* diff --git a/CMakeLists.txt b/CMakeLists.txt index 61e2649..1509904 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,8 +16,8 @@ set (uvm_VERSION_PATCH 0) set (EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) -file(GLOB uvm "include/*") -file(GLOB uvm "src/*") +file(GLOB uvm_INCLUDE "include/*") +file(GLOB uvm_SRC "src/*") add_executable(${PROJECT_NAME} ${uvm_SRC} ${uvm_INCLUDE}) target_link_libraries( diff --git a/README.md b/README.md index 1d1171e..b6b1fce 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,16 @@ uVM is simply an experiment in developing a working virtual machine with it's own bytecode. +## Contributing + +I am taking any and all contributions! This is a fun project for me and I'd like to see other people throw +their ideas into this. + ## Requirements * CMake version 3.8 or higher * Clang and Clang++ +* Boost (because im lazy) ## Building @@ -15,6 +21,8 @@ uVM is simply an experiment in developing a working virtual machine with it's ow ## Usage +_NOTE_: This is unimplemented - Looking for contributors + `uvm [-v] [-h] [-d ] -f .uc` * `-v`: Enable verbose logging mode @@ -35,37 +43,62 @@ uVM is a _stack machine_. That is, it gets all of it's arguments from the stack, of the computation back to the stack when it's done. There are some exceptions to this. In particular the current version of uvm supports a single variable, -called `0`, that can be written to using `xpop0` where x is the datatype of the thing getting taken off +called `t0`, that can be written to using `xpop0` where x is the datatype of the thing getting taken off the stack. -Additionally `0` can be placed on the top of the stack using `xpush0`. +Additionally the contents of `t0` can be placed on the top of the stack using `xload0`. ## uVM Instruction Set uVM possesses the standard issue instruction set you might expect in a simple VM: -| Instruction | Opcode | Action | -| ------------ | ------ | ------------------------------------------------------------------------------ | -| ipush _X_ | 0 | Pushes _integer_ X onto the stack | -| ipush0 | 1 | Pushes the _integer_ in `0` onto the stack | -| ipop0 | 2 | Pops the top _integer_ of the stack off and into variable 0 | -| icmp | 3 | Compares the top two items on the stack together and returns a boolean result | -| iadd | 4 | Adds the top two _integer_ arguments of the stack together | -| isub | 5 | Subtracts the top two _integer_ arguments of the stack from each other | -| jmp _LABEL_ | 6 | Unconditional jump to _label_ | -| jc _LABEL_ | 7 | Jump if the top of the stack is a 1 | -| halt | 8 | Halts the VM | +| Instruction | Opcode | Action | +| ------------ | ------ | -------------------------------------------------------------------------------------- | +| ipush _X_ | 0 | Pushes _integer_ X onto the stack | +| isave0 | 1 | Takes the top of the stack and stores it in `t0` | +| isave1 | 2 | Takes the top of the stack and stores it in `t1` | +| isave2 | 3 | Takes the top of the stack and stores it in `t2` | +| iload0 | 4 | Puts the value in `t0` onto the top of the stack | +| iload1 | 5 | Puts the value in `t1` onto the top of the stack | +| iload2 | 6 | Puts the value in `t2` onto the top of the stack | +| icmp | 7 | Compares the top two items on the stack together and returns a boolean result | +| iadd | 8 | Adds the top two _integer_ arguments of the stack together | +| isub | 9 | Subtracts the top two _integer_ arguments of the stack from each other | +| jmp _BYTE_ | A | Unconditional jump to _BYTE_ | +| jc _BYTE_ | B | Jump to _BYTE_ if the top of the stack is a 1 | +| halt | C | Halts the VM | +| print | D | Prints the current top of the stack as an integer | +| imul | E | Multiples the top two integers on the stack | +| call | F | TBD | +| idiv | 10 | Takes the top two values on the stack and performs integer division on them | +| irem | 11 | Takes the integer remainder of the division of the top two integer values on the stack | + +Additionally some registers exist to ease computation: + +| Register | Use | +| -------- | ------------------------ | +| t0 | Temporary register 0 | +| t1 | Temporary register 1 | +| t2 | Temporary register 2 | +| cf | Comparison flag register | As I learn more about VM development this instruction set will likely become much more robust. +## uVM Calling Convention + +uVM borrows heavily from past work - so a CDECL convention modified to +work with stack machines (result is store on the top of the stack instead of +EAX) is used. + ## TODO -- [] Tests for good paths for all instructions -- [] Tests for uncompilable code -- [] Tests for maximum stack size reached -- [] Tests to make sure the maximum stack depth is always greater than 0 -- [] Logging out current stack position, etc when verbose mode is enabled -- [] If verbose mode isn't enabled it shows the ascii loading while processing -- [] Come up with a way to allow the user to echo to the screen -- [] It would be cool to eventually write a high level language compiler that compiles down to the uvm +- [ ] Tests for good paths for all instructions +- [ ] Come up with a clever way to support floating point operations +- [ ] Tests for uncompilable code +- [ ] Tests for maximum stack size reached +- [ ] Tests to make sure the maximum stack depth is always greater than 0 +- [ ] Test to make sure that run doesn't run when the byte code array is empty +- [ ] Logging out current stack position, etc when verbose mode is enabled +- [ ] If verbose mode isn't enabled it shows the ascii loading while processing +- [ ] It would be cool to eventually write a high level language compiler that compiles down to the uvm diff --git a/include/cpu.h b/include/cpu.h index e601ccb..06a3823 100644 --- a/include/cpu.h +++ b/include/cpu.h @@ -1,36 +1,91 @@ -// This file is part of UVM. +// This file is part of uVM. // -// UVM is free software: you can redistribute it and/or modify +// uVM is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // -// UVM is distributed in the hope that it will be useful, +// uVM is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License -// along with UVM. If not, see . +// along with uVM. If not, see . // +#include + #ifndef CPU_H_ #define CPU_H_ +// CPU is a wrapper around our representation of a virtual CPU. class CPU { private: - int sp; // Stack pointer - int ip; // Instruction pointer - int fp; // Frame pointer + int sp = -1; // Stack pointer + int ip = 0; // Instruction pointer + int fp; // Frame pointer + std::vector code; // Our code in memory + + int *stack = NULL; - // Stack goes here + // XXX: FIND A C++ TEST FRAMEWORK OMG LOL public: - CPU(); // The CPU should be initialized with the code and stuff - fetch(); - decode(); - execute(); - run(); // Runs the loaded code...this should have something in args + enum opcode { + IPUSH = 0x0, + ISAVE0 = 0x1, + ISAVE1 = 0x2, + ISAVE2 = 0x3, + ILOAD0 = 0x4, + ILOAD1 = 0x5, + ILOAD2 = 0x6, + CMP = 0x7, + IADD = 0x8, + ISUB = 0x9, + JMP = 0xA, + JC = 0xB, + HALT = 0xC, + PRINT = 0xD, + IMUL = 0xE, + CALL = 0xF, // XXX: IMPLEMENT + IDIV = 0x10, + IREM = 0x11, + }; + + // Temporary registers + int t0; + int t1; + int t2; + + // Comparison flag register + int cf; + + const char* opcode_map[18] = { + "IPUSH", + "ISAVE0", + "ISAVE1", + "ISAVE2", + "ILOAD0", + "ILOAD1", + "ILOAD2", + "CMP", + "IADD", + "ISUB", + "JMP", + "JC", + "HALT", + "PRINT", + "IMUL", + "CALL", + "IDIV", + "IREM", + }; + + CPU(unsigned int); + ~CPU(); + bool load(std::vector code); + void run(); }; #endif // CPU_H_ diff --git a/include/logging.h b/include/logging.h new file mode 100644 index 0000000..3e9cc68 --- /dev/null +++ b/include/logging.h @@ -0,0 +1,18 @@ +// This file is part of uVM. +// +// uVM is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// uVM is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with uVM. If not, see . +// + +void LogMem(int, const char[]); +void LogMemWithArg(int, const char[], int); diff --git a/include/opcodes.h b/include/opcodes.h deleted file mode 100644 index 16be48f..0000000 --- a/include/opcodes.h +++ /dev/null @@ -1,32 +0,0 @@ -// This file is part of UVM. -// -// UVM is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// UVM is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with UVM. If not, see . -// - - -#ifndef OPCODES_H_ -#define OPCODES_H_ - -// Instructions - -#define IPUSH 0x0 -#define IPUSH0 0x1 -#define IPOP0 0x2 -#define CMP 0x3 -#define IADD 0x4 -#define ISUB 0x5 -#define JMP 0x6 -#define JC 0x7 - -#endif // OPCODES_H_ diff --git a/src/cpu.cc b/src/cpu.cc index 7ff3f4b..eb06348 100644 --- a/src/cpu.cc +++ b/src/cpu.cc @@ -1,15 +1,165 @@ -// This file is part of UVM. +// This file is part of uVM. // -// UVM is free software: you can redistribute it and/or modify +// uVM is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // -// UVM is distributed in the hope that it will be useful, +// uVM is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License -// along with UVM. If not, see . +// along with uVM. If not, see . // + +#include +#include +#include +#include + +#include "../include/cpu.h" +#include "../include/logging.h" + +CPU::CPU(unsigned int stack_size) { + // If someone says stack_size 0 they are in for a surprise + stack = (int*)malloc(stack_size * sizeof(int)); +} + +CPU::~CPU() { + if(stack != NULL) { + free(stack); + } +} + +// Load loads the code into CPU memory...and always returns true +bool CPU::load(std::vector c) { + code = c; + return true; +} + +// Run is the main loop of the entire CPU. It handles each opcode individually. +// +// At the end of each run the stack pointer and instruction pointer are reset to get the CPU +// ready to load the next program. +void CPU::run() { + while(code[ip] != CPU::opcode::HALT && ip != code.size()) { + switch(code[ip]) { + case CPU::opcode::IPUSH: { + LogMemWithArg(ip, CPU::opcode_map[code[ip]], code[ip + 1]); + stack[++sp] = code[++ip]; + ip++; + break; + } + case CPU::opcode::ISAVE0: { + LogMem(ip, CPU::opcode_map[code[ip]]); + t0 = stack[sp--]; + ip++; + break; + } + case CPU::opcode::ISAVE1: { + LogMem(ip, CPU::opcode_map[code[ip]]); + t1 = stack[sp--]; + ip++; + break; + } + case CPU::opcode::ISAVE2: { + LogMem(ip, CPU::opcode_map[code[ip]]); + t2 = stack[sp--]; + ip++; + break; + } + case CPU::opcode::ILOAD0: { + LogMem(ip, CPU::opcode_map[code[ip]]); + stack[++sp] = t0; + ip++; + break; + } + case CPU::opcode::ILOAD1: { + LogMem(ip, CPU::opcode_map[code[ip]]); + stack[++sp] = t1; + ip++; + break; + } + case CPU::opcode::ILOAD2: { + LogMem(ip, CPU::opcode_map[code[ip]]); + stack[++sp] = t2; + ip++; + break; + } + case CPU::opcode::IADD: { + LogMem(ip, CPU::opcode_map[code[ip]]); + int a = stack[sp--]; + int b = stack[sp--]; + stack[sp] = a + b; + ip++; + break; + } + case CPU::opcode::IDIV: { + LogMem(ip, CPU::opcode_map[code[ip]]); + int a = stack[sp--]; + int b = stack[sp--]; + stack[sp] = a / b; + ip++; + break; + } + case CPU::opcode::IREM: { + LogMem(ip, CPU::opcode_map[code[ip]]); + int a = stack[sp--]; + int b = stack[sp--]; + stack[sp] = a % b; + ip++; + break; + } + case CPU::opcode::CMP: { + LogMem(ip, CPU::opcode_map[code[ip]]); + int a = stack[sp--]; + int b = stack[sp--]; + cf = a == b; + ip++; + break; + } + case CPU::opcode::JC: { + LogMemWithArg(ip, CPU::opcode_map[code[ip]], code[ip + 1]); + if(cf) { + ip = code[ip + 1]; + } else { + ip += 2; + } + break; + } + case CPU::opcode::ISUB: { + LogMem(ip, CPU::opcode_map[code[ip]]); + int a = stack[sp--]; + int b = stack[sp--]; + stack[sp] = a - b; + ip++; + break; + } + case CPU::opcode::IMUL: { + LogMem(ip, CPU::opcode_map[code[ip]]); + int a = stack[sp--]; + int b = stack[sp--]; + stack[sp] = a * b; + ip++; + break; + } + case CPU::opcode::JMP: { + LogMemWithArg(ip, CPU::opcode_map[code[ip]], code[ip + 1]); + ip = code[ip + 1]; + break; + } + case CPU::opcode::PRINT: { + LogMem(ip, CPU::opcode_map[code[ip]]); + printf("%d\n", stack[sp]); + sp--; + ip++; + break; + } + } + } + + sp = -1; + ip = 0; +} diff --git a/src/logging.cc b/src/logging.cc new file mode 100644 index 0000000..45030ed --- /dev/null +++ b/src/logging.cc @@ -0,0 +1,25 @@ +// This file is part of uVM. +// +// uVM is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// uVM is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with uVM. If not, see . +// + +#include + +void LogMem(int addr, const char opcode[]) { + printf("[%.8d]\t%s\n", addr, opcode); +} + +void LogMemWithArg(int addr, const char opcode[], int arg) { + printf("[%.8d]\t%s\t%x\n", addr, opcode, arg); +} diff --git a/src/uvm.cc b/src/uvm.cc index cc7154b..5f88d9b 100644 --- a/src/uvm.cc +++ b/src/uvm.cc @@ -1,16 +1,120 @@ -// This file is part of UVM. +// This file is part of uVM. // -// UVM is free software: you can redistribute it and/or modify +// uVM is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // -// UVM is distributed in the hope that it will be useful, +// uVM is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License -// along with UVM. If not, see . +// along with uVM. If not, see . // // + +#include + +#include "../include/cpu.h" + +int main(int argc, char* argv[]) { + // Allocate an 800 word stack size to the CPU on initialization + CPU c(800); + + // XXX: + // Until I find a unit testing framework these vectors of bytecode will + // serve as (terrible) tests. + + // Answer: 3 + std::vector code_iadd { + CPU::opcode::IPUSH, 0x1, + CPU::opcode::IPUSH, 0x2, + CPU::opcode::IADD, + CPU::opcode::PRINT, + CPU::opcode::HALT + }; + + // Answer: 1 + std::vector code_isub { + CPU::opcode::IPUSH, 0x1, + CPU::opcode::IPUSH, 0x2, + CPU::opcode::ISUB, + CPU::opcode::PRINT, + CPU::opcode::HALT + }; + + // Answer: 16 + std::vector code_imul { + CPU::opcode::IPUSH, 0x4, + CPU::opcode::IPUSH, 0x4, + CPU::opcode::IMUL, + CPU::opcode::PRINT, + CPU::opcode::HALT, + }; + + // Answer: 1 + std::vector code_idiv { + CPU::opcode::IPUSH, 0x4, + CPU::opcode::IPUSH, 0x4, + CPU::opcode::IDIV, + CPU::opcode::PRINT, + CPU::opcode::HALT, + }; + + + // Answer: 3 % 4 = 3 (0 remainder 3) + std::vector code_irem { + CPU::opcode::IPUSH, 0x4, + CPU::opcode::IPUSH, 0x3, + CPU::opcode::IREM, + CPU::opcode::PRINT, + CPU::opcode::HALT, + }; + + // Answer: 8 + // This tests unconditional jumping. If the JMP instruction fails the code will fail to + // answer correctly. + std::vector code_jmp { + CPU::opcode::IPUSH, 0x4, + CPU::opcode::IPUSH, 0x4, + CPU::opcode::JMP, 0x7, // JMP to 0x7 - the 8th line of code in memory + CPU::opcode::ISUB, + CPU::opcode::IADD, + CPU::opcode::PRINT, + CPU::opcode::HALT, + }; + + // Answer: 10 + // This tests the use of conditional jumps to implement a simple while loop. + std::vector code_jc { + CPU::opcode::IPUSH, 0xA, // Limit + CPU::opcode::ISAVE1, // Store the limit + CPU::opcode::IPUSH, 0x0, // Counter + CPU::opcode::ISAVE2, // Store the counter + CPU::opcode::IPUSH, 0x0, // Accumulator (for our print) + CPU::opcode::ISAVE0, // Store the accumulator + CPU::opcode::ILOAD1, // Load the limit + CPU::opcode::ILOAD2, // Load the counter + CPU::opcode::CMP, // Compare limit to counter + CPU::opcode::JC, 0x1A, // Jump to PRINT if true + CPU::opcode::ILOAD0, // Load the accumulator + CPU::opcode::IPUSH, 0x1, // Push 1 onto the stack to be added to the accumulator + CPU::opcode::IADD, // Add 1 to the accumulator + CPU::opcode::ISAVE0, // Take the top of the stack, store it in t0 and pop it off + CPU::opcode::ILOAD2, // Reload the counter + CPU::opcode::IPUSH, 0x1, // Push 1 onto the stack to be added to the counter + CPU::opcode::IADD, // Add one to the accumulator + CPU::opcode::ISAVE2, // Store the counter + CPU::opcode::JMP, 0x9, // Jump to the CMP statement + CPU::opcode::ILOAD0, // Load the accumulator for PRINT + CPU::opcode::PRINT, + CPU::opcode::HALT, + }; + + c.load(code_jc); + c.run(); + + return 0; +}