Move sehn to sehn-serde

This commit is contained in:
James Dyson 2019-01-20 14:24:19 +00:00
parent 9d4a63a405
commit 05ca67db3b
Signed by: avitex
GPG Key ID: 38C76CBF3749D62C
23 changed files with 1632 additions and 0 deletions

12
sehn-serde/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "sehn-serde"
version = "0.1.0"
authors = ["avitex <theavitex@gmail.com>"]
edition = "2018"
[dependencies]
itoa = "0.4"
dtoa = "0.4"
serde = "1.0"
serde_derive = "1.0"
error-chain = "0.12"

View File

@ -0,0 +1,19 @@
use serde_derive::Serialize;
#[derive(Serialize)]
pub struct UserType<T>(T);
#[derive(Serialize)]
pub struct Test<'a> {
foo: &'a str,
bar: UserType<&'a str>
}
fn main() {
let val = Test {
foo: "hello",
bar: UserType("world")
};
println!("{}", sehn_serde::to_string(&val).unwrap());
}

View File

@ -0,0 +1,32 @@
use error_chain::{
error_chain,
error_chain_processing,
impl_error_chain_processed,
impl_error_chain_kind,
impl_extract_backtrace
};
error_chain! {
errors {
Eof {
description("unexpected end of input")
}
NestingLimit {
description("nesting limit reached")
}
Unexpected(c: char) {
description("unexpected character")
display("unexpected character: '{}'", c)
}
Message(t: String) {
description(t)
display("internal error: '{}'", t)
}
}
}
impl serde::de::Error for Error {
fn custom<T: std::fmt::Display>(msg: T) -> Self {
ErrorKind::Message(msg.to_string()).into()
}
}

3
sehn-serde/src/de/mod.rs Normal file
View File

@ -0,0 +1,3 @@
mod error;
pub use self::error::*;

14
sehn-serde/src/error.rs Normal file
View File

@ -0,0 +1,14 @@
use error_chain::{
error_chain,
error_chain_processing,
impl_error_chain_processed,
impl_error_chain_kind,
impl_extract_backtrace
};
error_chain! {
links {
Serialization(crate::ser::Error, crate::ser::ErrorKind);
Deserialization(crate::de::Error, crate::de::ErrorKind);
}
}

14
sehn-serde/src/lib.rs Normal file
View File

@ -0,0 +1,14 @@
mod error;
pub mod de;
pub mod ser;
pub mod syntax;
pub mod value;
//pub use self::de::{from_str, Deserializer};
pub use self::ser::{to_string, Serializer};
pub use self::error::{Error, ErrorKind, Result};
mod private {
pub trait Sealed {}
}

View File

@ -0,0 +1,33 @@
use super::{Format, StandardFormat};
pub trait Config {
type Format: Format;
/// `struct $name { ..., ... }`
const TAG_STRUCTS: bool = false;
/// `struct $name ( ... )`
const TAG_NEWTYPE_STRUCTS: bool = false;
/// `struct $name ( ..., ... )`
const TAG_TUPLE_STRUCTS: bool = false;
/// `enum { $name ( ..., ... ) }`
const TAG_TUPLE_VARIANTS: bool = false;
/// `enum { $name { ..., ... } }`
const TAG_STRUCT_VARIANTS: bool = false;
/// `struct $name`
const UNIT_STRUCT_TO_KIND: bool = false;
/// The initial size of the stack used for
/// checking if a value was tagged or not
/// as it ascends nesting.
const INITIAL_TAG_STACK_SIZE: usize = 265;
/// Disable string escaping (DANGEROUS!)
const DISABLE_STRING_ESCAPING: bool = false;
}
pub struct DefaultConfig;
impl Config for DefaultConfig {
type Format = StandardFormat;
const UNIT_STRUCT_TO_KIND: bool = true;
const TAG_NEWTYPE_STRUCTS: bool = true;
}

View File

@ -0,0 +1,25 @@
use error_chain::{
error_chain,
error_chain_processing,
impl_error_chain_processed,
impl_error_chain_kind,
impl_extract_backtrace
};
error_chain! {
foreign_links {
Io(::std::io::Error);
}
errors {
Message(t: String) {
description(t)
display("internal error: '{}'", t)
}
}
}
impl serde::ser::Error for Error {
fn custom<T: std::fmt::Display>(msg: T) -> Self {
ErrorKind::Message(msg.to_string()).into()
}
}

View File

@ -0,0 +1,12 @@
use super::{Format, RealFormat, WriteReal};
pub struct DeterministicRealFormat;
impl RealFormat for DeterministicRealFormat {}
pub struct DeterministicFormat;
impl Format for DeterministicFormat {
type Engine = ();
type RealFormat = DeterministicRealFormat;
}

View File

@ -0,0 +1,30 @@
use std::io::Write;
use crate::ser::error::Result;
use super::{RealFormat, WriteReal};
macro_rules! impl_write_real_for_fast_format {
($($prim:ident with $xtoa:ident),*) => {
$(impl WriteReal<$prim> for FastRealFormat {
fn write_real<W: Write>(w: &mut W, r: $prim) -> Result<()> {
$xtoa::write(w, r).map(|_| ()).map_err(|e| e.into())
}
})*
}
}
pub struct FastRealFormat;
impl_write_real_for_fast_format!(
i8 with itoa,
i16 with itoa,
i32 with itoa,
i64 with itoa,
u8 with itoa,
u16 with itoa,
u32 with itoa,
u64 with itoa,
f32 with dtoa,
f64 with dtoa
);
impl RealFormat for FastRealFormat {}

View File

@ -0,0 +1,40 @@
mod fast_real;
mod pretty;
mod standard;
//mod deterministic;
use super::{Write, Result};
pub use self::fast_real::*;
pub use self::pretty::*;
pub use self::standard::*;
//pub use self::deterministic::*;
pub trait WriteReal<T> {
fn write_real<W: Write>(w: &mut W, i: T) -> Result<()>;
}
pub trait RealFormat:
WriteReal<i8> + WriteReal<i16> +
WriteReal<i32> + WriteReal<i64> +
WriteReal<u8> + WriteReal<u16> +
WriteReal<u32> + WriteReal<u64> +
WriteReal<f32> + WriteReal<f64> {}
pub trait FormatEngine: Default {
fn mark_delim(&mut self, _d: u8) {}
}
impl FormatEngine for () {}
pub trait Format {
type Engine: FormatEngine;
type RealFormat: RealFormat;
#[inline]
fn write<W: Write>(_e: &mut Self::Engine, w: &mut Write, bytes: &[u8]) -> Result<()> {
// Passthrough
w.write(bytes)?;
Ok(())
}
}

View File

@ -0,0 +1,117 @@
use std::io::Write;
use std::marker::PhantomData;
use crate::syntax::*;
use crate::ser::error::Result;
use super::{Format, FormatEngine, FastRealFormat};
pub type DefaultPrettyFormat = PrettyFormat<TwoSpaces>;
pub trait IndentStyle {
const LEVEL: &'static [u8];
}
pub struct OneTab;
pub struct TwoSpaces;
pub struct FourSpaces;
impl IndentStyle for OneTab {
const LEVEL: &'static [u8] = &[
WHITESPACE_TAB_CHAR
];
}
impl IndentStyle for TwoSpaces {
const LEVEL: &'static [u8] = &[
WHITESPACE_SPACE_CHAR,
WHITESPACE_SPACE_CHAR
];
}
impl IndentStyle for FourSpaces {
const LEVEL: &'static [u8] = &[
WHITESPACE_SPACE_CHAR,
WHITESPACE_SPACE_CHAR,
WHITESPACE_SPACE_CHAR,
WHITESPACE_SPACE_CHAR
];
}
pub struct PrettyFormat<I: IndentStyle> {
indent_style: PhantomData<I>
}
impl<I: IndentStyle> PrettyFormat<I> {
fn style_before_write(e: &mut PrettyFormatEngine, w: &mut Write) -> Result<()> {
match e.delim {
DICT_END_CHAR | LIST_END_CHAR => {
e.level -= 1;
Self::write_newline(w)?;
Self::write_indentation(e, w)?;
},
_ => ()
}
Ok(())
}
fn style_after_write(e: &mut PrettyFormatEngine, w: &mut Write) -> Result<()> {
match e.delim {
DICT_START_CHAR | LIST_START_CHAR => {
e.level += 1;
Self::write_newline(w)?;
Self::write_indentation(e, w)?;
},
DICT_KV_SEPARATOR_CHAR => {
Self::write_space(w)?;
},
COMMA_CHAR => {
Self::write_newline(w)?;
Self::write_indentation(e, w)?;
},
_ => ()
}
e.delim = 0;
Ok(())
}
fn write_space(w: &mut Write) -> Result<()> {
w.write(&[WHITESPACE_SPACE_CHAR])?;
Ok(())
}
fn write_newline(w: &mut Write) -> Result<()> {
w.write(&[NEWLINE_LF_CHAR])?;
Ok(())
}
fn write_indentation(e: &mut PrettyFormatEngine, w: &mut Write) -> Result<()> {
for _ in 0..e.level {
w.write(I::LEVEL)?;
}
Ok(())
}
}
#[derive(Default)]
pub struct PrettyFormatEngine {
level: usize,
delim: u8
}
impl FormatEngine for PrettyFormatEngine {
fn mark_delim(&mut self, delim: u8) {
self.delim = delim;
}
}
impl<I: IndentStyle> Format for PrettyFormat<I> {
type Engine = PrettyFormatEngine;
type RealFormat = FastRealFormat;
fn write<W: Write>(e: &mut PrettyFormatEngine, w: &mut Write, bytes: &[u8]) -> Result<()> {
Self::style_before_write(e, w)?;
w.write(bytes)?;
Self::style_after_write(e, w)
}
}

View File

@ -0,0 +1,8 @@
use super::{Format, FastRealFormat};
pub struct StandardFormat;
impl Format for StandardFormat {
type Engine = ();
type RealFormat = FastRealFormat;
}

View File

@ -0,0 +1,35 @@
macro_rules! impl_ser_real {
($f:ident, $t:ident) => {
fn $f(self, v: $t) -> Result<()> {
self.write_real::<$t>(v)
}
}
}
macro_rules! tag_start {
($s:expr, $opt_enabled:expr, $name:expr) => {
if $opt_enabled {
if $name.is_empty() {
$s.tag_stack.push(false);
} else {
$s.write_bytes($name.as_bytes())?;
$s.write_delim(syntax::TAG_START_CHAR)?;
$s.tag_stack.push(true);
}
}
}
}
macro_rules! tag_end {
($s:expr, $opt_enabled:expr) => {
if $opt_enabled {
if $s.tag_stack.pop().expect("stack tag value") {
$s.write_delim(syntax::TAG_END_CHAR)
} else {
Ok(())
}
} else {
Ok(())
}
}
}

491
sehn-serde/src/ser/mod.rs Normal file
View File

@ -0,0 +1,491 @@
mod error;
mod string_writer;
mod format;
mod config;
#[macro_use]
mod macros;
use std::io::Write;
use std::marker::PhantomData;
use serde::ser::{self, Serialize};
use crate::syntax;
pub use self::format::*;
pub use self::config::*;
pub use self::string_writer::StringWriter;
pub use self::error::{Error, ErrorKind, Result};
pub struct Serializer<W: Write, C: Config> {
out: W,
tag_stack: Vec<bool>,
first_element: bool,
config: PhantomData<C>,
format_engine: <C::Format as Format>::Engine
}
impl<W: Write, C: Config> Serializer<W, C> {
#[inline]
fn set_first_element(&mut self) {
self.first_element = true;
}
#[inline]
fn check_not_first_element(&mut self) -> bool {
if self.first_element {
self.first_element = false;
false
} else {
true
}
}
#[inline]
fn write_delim(&mut self, delim: u8) -> Result<()> {
self.format_engine.mark_delim(delim);
self.write_bytes(&[delim])
}
#[inline]
fn write_bytes(&mut self, bytes: &[u8]) -> Result<()> {
<C::Format as Format>::write::<W>(&mut self.format_engine, &mut self.out, bytes)
}
#[inline]
fn write_real<R>(&mut self, r: R) -> Result<()> where <C::Format as Format>::RealFormat: WriteReal<R> {
<C::Format as Format>::RealFormat::write_real::<W>(&mut self.out, r)
}
}
pub fn to_string_with_config<T, C>(value: &T) -> Result<String>
where
T: Serialize,
C: Config
{
let mut serializer = Serializer {
out: StringWriter::new(),
config: PhantomData::<C>,
tag_stack: Vec::with_capacity(C::INITIAL_TAG_STACK_SIZE),
first_element: false,
format_engine: Default::default()
};
value.serialize(&mut serializer)?;
Ok(serializer.out.to_string().expect("valid utf8"))
}
pub fn to_string<T>(value: &T) -> Result<String>
where
T: Serialize,
{
to_string_with_config::<T, DefaultConfig>(value)
}
impl<'a, W: Write, C: Config> ser::Serializer for &'a mut Serializer<W, C> {
type Ok = ();
type Error = Error;
type SerializeSeq = Self;
type SerializeTuple = Self;
type SerializeTupleStruct = Self;
type SerializeTupleVariant = Self;
type SerializeMap = Self;
type SerializeStruct = Self;
type SerializeStructVariant = Self;
impl_ser_real!(serialize_i8, i8);
impl_ser_real!(serialize_i16, i16);
impl_ser_real!(serialize_i32, i32);
impl_ser_real!(serialize_i64, i64);
impl_ser_real!(serialize_u8, u8);
impl_ser_real!(serialize_u16, u16);
impl_ser_real!(serialize_u32, u32);
impl_ser_real!(serialize_u64, u64);
impl_ser_real!(serialize_f32, f32);
impl_ser_real!(serialize_f64, f64);
fn serialize_char(self, v: char) -> Result<()> {
let mut char_bytes: [u8; 4] = unsafe { std::mem::uninitialized() };
let char_str = v.encode_utf8(&mut char_bytes[..]);
self.serialize_str(&char_str)
}
fn serialize_str(self, v: &str) -> Result<()> {
self.write_delim(syntax::TEXT_CHAR)?;
if C::DISABLE_STRING_ESCAPING {
self.write_bytes(v.as_bytes())?;
} else {
// TODO: Escape characters
self.write_bytes(v.as_bytes())?;
}
self.write_delim(syntax::TEXT_CHAR)
}
fn serialize_bool(self, v: bool) -> Result<()> {
self.write_bytes(
if v {
syntax::BOOLEAN_TRUE_KEYWORD
} else {
syntax::BOOLEAN_FALSE_KEYWORD
}
)
}
fn serialize_bytes(self, v: &[u8]) -> Result<()> {
use serde::ser::SerializeSeq;
let mut seq = self.serialize_seq(Some(v.len()))?;
for byte in v {
seq.serialize_element(byte)?;
}
seq.end()
}
fn serialize_some<T>(self, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
value.serialize(self)
}
fn serialize_none(self) -> Result<()> {
self.serialize_unit()
}
fn serialize_unit(self) -> Result<()> {
self.write_bytes(b"none")
}
fn serialize_unit_struct(self, name: &'static str) -> Result<()> {
if C::UNIT_STRUCT_TO_KIND {
self.write_bytes(name.as_bytes())
} else {
Ok(())
}
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
variant: &'static str,
) -> Result<()> {
self.serialize_str(variant)
}
fn serialize_newtype_struct<T>(
self,
name: &'static str,
value: &T,
) -> Result<()>
where
T: ?Sized + Serialize,
{
tag_start!(self, C::TAG_NEWTYPE_STRUCTS, name);
value.serialize(&mut *self)?;
tag_end!(self, C::TAG_NEWTYPE_STRUCTS)
}
fn serialize_newtype_variant<T>(
self,
_name: &'static str,
_variant_index: u32,
variant: &'static str,
value: &T,
) -> Result<()>
where
T: ?Sized + Serialize,
{
self.write_delim(syntax::DICT_START_CHAR)?;
variant.serialize(&mut *self)?;
self.write_delim(syntax::DICT_KV_SEPARATOR_CHAR)?;
value.serialize(&mut *self)?;
self.write_delim(syntax::DICT_END_CHAR)
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq> {
self.write_delim(syntax::LIST_START_CHAR)?;
self.set_first_element();
Ok(self)
}
fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple> {
self.serialize_seq(Some(len))
}
fn serialize_tuple_struct(
self,
name: &'static str,
len: usize,
) -> Result<Self::SerializeTupleStruct> {
tag_start!(self, C::TAG_TUPLE_STRUCTS, name);
self.serialize_seq(Some(len))
}
fn serialize_tuple_variant(
self,
_name: &'static str,
_variant_index: u32,
variant: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleVariant> {
tag_start!(self, C::TAG_TUPLE_VARIANTS, variant);
self.write_delim(syntax::DICT_START_CHAR)?;
variant.serialize(&mut *self)?;
self.write_delim(syntax::DICT_KV_SEPARATOR_CHAR)?;
self.write_delim(syntax::LIST_START_CHAR)?;
self.set_first_element();
Ok(self)
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap> {
self.write_delim(syntax::DICT_START_CHAR)?;
self.set_first_element();
Ok(self)
}
fn serialize_struct(
self,
name: &'static str,
len: usize,
) -> Result<Self::SerializeStruct> {
tag_start!(self, C::TAG_STRUCTS, name);
self.serialize_map(Some(len))?;
self.set_first_element();
Ok(self)
}
fn serialize_struct_variant(
self,
_name: &'static str,
_variant_index: u32,
variant: &'static str,
_len: usize,
) -> Result<Self::SerializeStructVariant> {
tag_start!(self, C::TAG_STRUCT_VARIANTS, variant);
self.write_delim(syntax::DICT_START_CHAR)?;
variant.serialize(&mut *self)?;
self.write_delim(syntax::DICT_KV_SEPARATOR_CHAR)?;
self.write_delim(syntax::DICT_START_CHAR)?;
self.set_first_element();
Ok(self)
}
}
impl<'a, W: Write, C: Config> ser::SerializeSeq for &'a mut Serializer<W, C> {
type Ok = ();
type Error = Error;
fn serialize_element<T>(&mut self, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
if self.check_not_first_element() {
self.write_delim(syntax::LIST_SEPARATOR_CHAR)?;
}
value.serialize(&mut **self)
}
fn end(self) -> Result<()> {
self.write_delim(syntax::LIST_END_CHAR)
}
}
impl<'a, W: Write, C: Config> ser::SerializeTuple for &'a mut Serializer<W, C> {
type Ok = ();
type Error = Error;
fn serialize_element<T>(&mut self, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
if self.check_not_first_element() {
self.write_delim(syntax::LIST_SEPARATOR_CHAR)?;
}
value.serialize(&mut **self)
}
fn end(self) -> Result<()> {
self.write_delim(syntax::LIST_END_CHAR)
}
}
impl<'a, W: Write, C: Config> ser::SerializeTupleStruct for &'a mut Serializer<W, C> {
type Ok = ();
type Error = Error;
fn serialize_field<T>(&mut self, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
if self.check_not_first_element() {
self.write_delim(syntax::LIST_SEPARATOR_CHAR)?;
}
value.serialize(&mut **self)
}
fn end(self) -> Result<()> {
self.write_delim(syntax::LIST_END_CHAR)?;
tag_end!(self, C::TAG_TUPLE_STRUCTS)
}
}
impl<'a, W: Write, C: Config> ser::SerializeTupleVariant for &'a mut Serializer<W, C> {
type Ok = ();
type Error = Error;
fn serialize_field<T>(&mut self, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
if self.check_not_first_element() {
self.write_delim(syntax::LIST_SEPARATOR_CHAR)?;
}
value.serialize(&mut **self)
}
fn end(self) -> Result<()> {
self.write_delim(syntax::LIST_END_CHAR)?;
self.write_delim(syntax::DICT_END_CHAR)?;
tag_end!(self, C::TAG_TUPLE_VARIANTS)
}
}
impl<'a, W: Write, C: Config> ser::SerializeMap for &'a mut Serializer<W, C> {
type Ok = ();
type Error = Error;
fn serialize_key<T>(&mut self, key: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
if self.check_not_first_element() {
self.write_delim(syntax::DICT_PAIR_SEPARATOR_CHAR)?;
}
key.serialize(&mut **self)
}
fn serialize_value<T>(&mut self, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
self.write_delim(syntax::DICT_KV_SEPARATOR_CHAR)?;
value.serialize(&mut **self)
}
fn end(self) -> Result<()> {
self.write_delim(syntax::DICT_END_CHAR)
}
}
impl<'a, W: Write, C: Config> ser::SerializeStruct for &'a mut Serializer<W, C> {
type Ok = ();
type Error = Error;
fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
if self.check_not_first_element() {
self.write_delim(syntax::DICT_PAIR_SEPARATOR_CHAR)?;
}
key.serialize(&mut **self)?;
self.write_delim(syntax::DICT_KV_SEPARATOR_CHAR)?;
value.serialize(&mut **self)
}
fn end(self) -> Result<()> {
self.write_delim(syntax::DICT_END_CHAR)?;
tag_end!(self, C::TAG_STRUCTS)
}
}
impl<'a, W: Write, C: Config> ser::SerializeStructVariant for &'a mut Serializer<W, C> {
type Ok = ();
type Error = Error;
fn serialize_field<T>(&mut self, key: &'static str, value: &T) -> Result<()>
where
T: ?Sized + Serialize,
{
if self.check_not_first_element() {
self.write_delim(syntax::DICT_PAIR_SEPARATOR_CHAR)?;
}
key.serialize(&mut **self)?;
self.write_delim(syntax::DICT_KV_SEPARATOR_CHAR)?;
value.serialize(&mut **self)
}
fn end(self) -> Result<()> {
self.write_delim(syntax::DICT_END_CHAR)?;
self.write_delim(syntax::DICT_END_CHAR)?;
tag_end!(self, C::TAG_STRUCT_VARIANTS)
}
}
////////////////////////////////////////////////////////////////////////////////
#[test]
fn test_struct() {
#[derive(Serialize)]
struct Test {
int: u32,
seq: Vec<&'static str>,
}
let test = Test {
int: 1,
seq: vec!["a", "b"],
};
let expected = r#"{"int":1,"seq":["a","b"]}"#;
assert_eq!(to_string(&test).unwrap(), expected);
}
#[test]
fn test_enum() {
#[derive(Serialize)]
enum E {
Unit,
Newtype(u32),
Tuple(u32, u32),
Struct { a: u32 },
}
let u = E::Unit;
let expected = r#""Unit""#;
assert_eq!(to_string(&u).unwrap(), expected);
let n = E::Newtype(1);
let expected = r#"{"Newtype":1}"#;
assert_eq!(to_string(&n).unwrap(), expected);
let t = E::Tuple(1, 2);
let expected = r#"{"Tuple":[1,2]}"#;
assert_eq!(to_string(&t).unwrap(), expected);
let s = E::Struct { a: 1 };
let expected = r#"{"Struct":{"a":1}}"#;
assert_eq!(to_string(&s).unwrap(), expected);
}
#[test]
fn test_simple() {
#[derive(Serialize)]
#[serde(rename = "Test/Unit")]
struct TestUnitStruct;
#[derive(Serialize)]
#[serde(rename = "Test/NewtypeStruct")]
struct TestNewtypeStruct(i8);
#[derive(Serialize)]
#[serde(rename = "")]
struct TestTransparentUnitStruct;
#[derive(Serialize)]
#[serde(transparent)]
struct TestTransparentNewtypeStruct(i8);
assert_eq!(to_string(&TestUnitStruct).unwrap(), "Test/Unit");
assert_eq!(to_string(&TestNewtypeStruct(1)).unwrap(), "Test/NewtypeStruct(1)");
assert_eq!(to_string(&TestTransparentUnitStruct).unwrap(), "");
assert_eq!(to_string(&TestTransparentNewtypeStruct(1)).unwrap(), "1");
}

View File

@ -0,0 +1,29 @@
use std::io;
use std::string::FromUtf8Error;
pub struct StringWriter {
buf: Vec<u8>
}
impl StringWriter {
pub fn new() -> Self {
Self {
buf: Vec::new()
}
}
pub fn to_string(self) -> Result<String, FromUtf8Error> {
String::from_utf8(self.buf)
}
}
impl io::Write for StringWriter {
fn write(&mut self, bytes: &[u8]) -> io::Result<usize> {
self.buf.extend_from_slice(bytes);
Ok(bytes.len())
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}

View File

@ -0,0 +1,115 @@
use std::fmt::{self, Display};
// Shared
pub const COMMA_CHAR: u8 = b',';
// Newline
pub const NEWLINE_LF_CHAR: u8 = b'\n';
pub const NEWLINE_CR_CHAR: u8 = b'\r';
// Whitespace
pub const WHITESPACE_TAB_CHAR: u8 = b'\t';
pub const WHITESPACE_SPACE_CHAR: u8 = b' ';
// Real
pub const REAL_ZERO_CHAR: u8 = b'0';
pub const REAL_NEG_CHAR: u8 = b'-';
pub const REAL_SIGEXP_SEPARATOR_CHAR: u8 = b'e';
// Text
pub const TEXT_CHAR: u8 = b'"';
pub const MULTI_TEXT_CHAR: u8 = b'`';
// Kind
pub const KIND_NS_SEPARATOR_CHAR: u8 = b'/';
// Tag
pub const TAG_START_CHAR: u8 = b'(';
pub const TAG_END_CHAR: u8 = b')';
// Dict
pub const DICT_START_CHAR: u8 = b'{';
pub const DICT_END_CHAR: u8 = b'}';
pub const DICT_KV_SEPARATOR_CHAR: u8 = b':';
pub const DICT_PAIR_SEPARATOR_CHAR: u8 = COMMA_CHAR;
// List
pub const LIST_START_CHAR: u8 = b'[';
pub const LIST_END_CHAR: u8 = b']';
pub const LIST_SEPARATOR_CHAR: u8 = COMMA_CHAR;
// Comment
pub const COMMENT_CHAR: u8 = b'#';
// Boolean
pub const BOOLEAN_TRUE_KEYWORD: &[u8] = b"true";
pub const BOOLEAN_FALSE_KEYWORD: &[u8] = b"false";
#[derive(Clone, Debug, PartialEq)]
pub enum Token {
// Non-value types
Newline,
Whitespace,
Comment,
AlphaLower,
AlphaUpper,
// Kinds
KindNsSep,
// Real
RealMinus,
RealZero,
RealNumeric,
RealSigExpSep,
// Text
TextStart,
TextEnd,
MultiTextStart,
MultiTextEnd,
// Structures
TagStart,
TagEnd,
DictStart,
DictEnd,
DictKvSep,
DictPairSep,
ListStart,
ListEnd,
ListSep
}
impl Token {
pub fn description(&self) -> &'static str {
match self {
Token::Newline => "newline '<lf/<crlf>'",
Token::Whitespace => "whitespace '<tab/space>'",
Token::Comment => "comment '#'",
Token::AlphaLower => "alpha-lower '<a-z>'",
Token::AlphaUpper => "alpha-upper '<A-Z>'",
Token::KindNsSep => "kind ns separator '/'",
Token::RealMinus => "minus '-'",
Token::RealZero => "zero '0'",
Token::RealNumeric => "number '<0-9>'",
Token::RealSigExpSep => "sig-exp separator 'e'",
Token::TextStart => "text '\"'",
Token::TextEnd => "text end '\"'",
Token::MultiTextStart => "multi text '`'",
Token::MultiTextEnd => "multi text end '`'",
Token::TagStart => "tag '('",
Token::TagEnd => "tag end ')'",
Token::DictStart => "dict '{'",
Token::DictEnd => "dict end '}'",
Token::DictKvSep => "dict kv separator ':'",
Token::DictPairSep => "dict pair separator ','",
Token::ListStart => "list '['",
Token::ListEnd => "list end ']'",
Token::ListSep => "list separator ','",
}
}
}
impl Display for Token {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{}", self.description())
}
}

View File

@ -0,0 +1,156 @@
pub struct Mark {
i: usize
}
pub trait Read<'a> {
// Per char reading.
fn next_char(&mut self) -> Result<char>;
fn peek_char(&mut self) -> Result<char>;
// Sectioning off data.
fn mark(&self) -> Mark;
/// Returns reference to the section.
fn from_mark(&mut self, m: Mark) -> &'a str;
/// Attempt to enter a new level of nesting.
fn enter_nest(&mut self) -> Result<()>;
/// Leave a nest.
fn leave_nest(&mut self);
/// Called when the parser passes a line.
fn passed_line(&mut self);
}
//pub trait Visitor {}
pub struct Parser<'a, R: Read + 'a> {
r: &'a mut R
}
impl<'a, R: Read> Parser<'a, R> {
fn parse_value(&mut self) -> Result<()> {
loop {
match self.r.next() {
Some(c) => match c {
COMMENT_CHAR => if !self.skip_to_next_line() {
return Ok(())
},
// Whitespace
LF_CHAR => self.r.mark_line(),
CR_CHAR => {
self.skip_char(LF_CHAR);
self.r.mark_line();
},
SPACE_CHAR | TAB_CHAR => {
self.skip_one();
},
// Values
DICT_START_CHAR => return self.parse_dict(),
LIST_START_CHAR => return self.parse_list(),
TEXT_DELIM_CHAR => return self.parse_text(),
MULTI_TEXT_DELIM_CHAR => return self.parse_multi_text(),
REAL_NEG_CHAR => return self.parse_real_negative(),
REAL_ZERO_CHAR => return self.parse_real_positive_decimal(),
'1'...'9' => return self.parse_real_positive_int(),
'a'...'z' => return self.parse_initial_lowercase(c),
'A'...'Z' => return self.parse_initial_uppercase(c),
// Unexpected
c => return Some(ParseError::Unexpected(c))
},
None => return Ok(())
}
}
}
#[inline]
fn skip_one(&mut self) -> bool {
match self.r.next() {
Some(_) => true,
None => false
}
}
#[inline]
fn skip_char(&mut self, c: char) -> bool {
match self.r.peek() {
Some(peeked_c) if c == peeked_c => self.skip_one(),
Some(_) => true,
_ => false
}
}
#[inline]
fn skip_to_next_line(&mut self) -> bool {
loop {
match self.r.next() {
Some(LF_CHAR) => {
self.r.mark_line();
return true
},
Some(CR_CHAR) => {
if self.skip_char(LF_CHAR) {
self.r.mark_line();
return true
} else {
return false
}
},
Some(_) => return true,
None => return false
}
}
}
#[inline]
fn parse_dict(&mut self) -> Option<ParseError> {
if self.r.enter_ctx() {
None
} else {
Some(ParseError::NestingLimit)
}
}
#[inline]
fn parse_list(&mut self) -> Option<ParseError> {
if self.r.enter_ctx() {
None
} else {
Some(ParseError::NestingLimit)
}
}
#[inline]
fn parse_text(&mut self) -> Option<ParseError> {
None
}
#[inline]
fn parse_multi_text(&mut self) -> Option<ParseError> {
None
}
#[inline]
fn parse_real_negative(&mut self) -> Option<ParseError> {
None
}
#[inline]
fn parse_real_positive_int(&mut self) -> Option<ParseError> {
None
}
#[inline]
fn parse_real_positive_decimal(&mut self) -> Option<ParseError> {
None
}
#[inline]
fn parse_initial_lowercase(&mut self, c: char) -> Option<ParseError> {
None
}
#[inline]
fn parse_initial_uppercase(&mut self, c: char) -> Option<ParseError> {
None
}
}

View File

View File

@ -0,0 +1,24 @@
//mod sig_exp;
//mod str_int;
// //pub use self::sig_exp::*;
// pub use self::str_int::*;
// pub type PrimaryInt = i64;
// pub type PrimaryNat = u64;
// #[derive(Debug, PartialEq)]
// pub enum ToPrimitiveError {
// Overflow,
// Underflow
//}
// pub enum Real {
// /// Zero to the natural primitive max.
// Nat(PrimitiveNatValue),
// /// Negative to the integer primitive min.
// Int(PrimitiveIntValue),
// /// SigExp
// SigExp(SigExp)
// }

View File

@ -0,0 +1,125 @@
use super::{
StrInt,
StringInt,
PrimitiveInt,
PrimitiveIntValue
};
pub struct StrSigExp<'a> {
sig: StrInt<'a>,
exp: StrInt<'a>
}
impl<'a> StrSigExp<'a> {
#[inline]
pub fn from_parts(sig: StrInt<'a>, exp: StrInt<'a>) -> Self {
Self {
sig,
exp
}
}
}
impl<'a> From<StrSigExp<'a>> for SigExp {
fn from(raw: StrSigExp<'a>) -> SigExp {
let raw_exp_digits = raw.exp.digits();
if raw_exp_digits.len() <= I16_MAX_DIGITS {
raw_exp_digits
}
if let PrimitiveInt(sig) = PrimitiveInt::from_str_int(raw.sig) {
let raw_exp_digits = raw.exp.digits();
if raw_exp_digits.len() <= I16_MAX_DIGITS {
raw_exp_digits
}
if let PrimitiveInt(exp) = PrimitiveInt::from_str_int() {
SigExp::Fit {
sig,
exp
}
} else {
SigExp::Fat {
sig: raw.sig.to_string_int(),
exp: raw.sig.to_string_int()
}
}
} else {
SigExp::Massive {
sig: raw.sig.to_string_int(),
exp: raw.sig.to_string_int()
}
}
}
}
#[derive(Debug, PartialEq)]
pub enum SigExp {
/// When both `sig` AND `exp` can be
/// represented by fast primitive
/// types without loss of precision.
Fit {
sig: PrimitiveIntValue,
exp: i16
},
/// When `sig` can NOT be represented
/// by a fast primitive type
/// without loss of precision.
Fat {
sig: StringInt,
exp: i16
},
/// When both `sig` OR `exp` can NOT be
/// represented by fast primitive types
/// without loss of precision.
// #[feature(massive_sig_exp)]
Massive {
sig: StringInt,
exp: StringInt
}
}
impl SigExp {
// pub fn is_fit(&self) -> f64 {
// }
// pub fn map_approx_f64(&self) -> f64 {
// match self {
// SigExp::Fit { sig, exp } => {
// }
// }
// }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_str_sig_exp_to_sig_exp_fit() {
let sig_str_int = StrInt::new("+1").unwrap();
let exp_str_int = StrInt::new("+1").unwrap();
let sig_exp = SigExp::from(StrSigExp::from_parts(sig_str_int, exp_str_int));
assert_eq!(sig_exp, SigExp::Fit{sig: 1, exp: 1});
}
#[test]
fn test_str_sig_exp_to_sig_exp_fat() {
let sig_str_int = StrInt::new("+10000000000000000000").unwrap();
let exp_str_int = StrInt::new("+1").unwrap();
let sig_exp = SigExp::from(StrSigExp::from_parts(sig_str_int, exp_str_int));
assert_eq!(sig_exp, SigExp::Fat{sig: sig_str_int.to_string_int(), exp: 1});
}
#[test]
fn test_str_sig_exp_to_sig_exp_massive() {
let sig_str_int = StrInt::new("+10000000000000000000").unwrap();
let exp_str_int = StrInt::new("+10000000000000000000").unwrap();
let sig_exp = SigExp::from(StrSigExp::from_parts(sig_str_int, exp_str_int));
assert_eq!(sig_exp, SigExp::Massive{sig: sig_str_int.to_string_int(), exp: exp_str_int.to_string_int()});
}
}

View File

@ -0,0 +1,295 @@
use super::ToPrimitiveError;
const EMPTY_STR: &str = "";
const ZERO_STR: &str = "0";
const RADIX_10: u8 = 10;
macro_rules! max_digits (
($x:ident) => (($x::max_value() as f64).log10().floor() as usize + 1)
);
macro_rules! unchecked_str_to_primitive {
($n:ident) => {
for &c in digits {
let x = match (c as char).to_digit(RADIX_10) {
Some(x) => x,
None => panic!("invalid char")
};
result = match result.checked_mul(RADIX_10) {
Some(result) => result,
None => return Err(PIE { kind: Overflow }),
};
result = match result.checked_add(x) {
Some(result) => result,
None => return Err(PIE { kind: Overflow }),
};
}
}
}
macro_rules! impl_str_int_to_primitive_int {
($n:ident, $p:ident) => {
pub fn $n(self: &Self) -> Result<$p, ToPrimitiveError> {
if self.is_zero() {
return Ok(0)
}
if self.digits().len() <= max_digits!($p) {
for digit.
if let Ok(v) = $p::from_str_radix(self.digits(), 10) {
return Ok(v * self.signum() as $p)
}
}
if self.is_positive() {
Err(ToPrimitiveError::Overflow)
} else {
Err(ToPrimitiveError::Underflow)
}
}
}
}
macro_rules! impl_str_int_to_primitive_nat {
($n:ident, $p:ident) => {
pub fn $n(self: &Self) -> Result<$p, ToPrimitiveError> {
if self.is_zero() {
return Ok(0)
}
if self.digits().len() <= max_digits!($p) {
if let Ok(v) = $p::from_str_radix(self.digits(), 10) {
return Ok(v)
}
}
Err(ToPrimitiveError::Overflow)
}
}
}
#[derive(Debug)]
pub enum StrIntParseError {
InvalidChar,
NonZeroEmpty,
ZeroWithTrailing
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct StrInt<'a> {
signum: i8,
digits: &'a str
}
impl<'a> StrInt<'a> {
pub fn new(str_int: &'a str) -> Result<Self, StrIntParseError> {
match str_int.chars().next() {
Some('0') => Self::from_parts(0, &str_int[1..]),
Some('-') => Self::from_parts(-1, &str_int[1..]),
Some('+') => Self::from_parts(1, &str_int[1..]),
_ => Self::from_parts(1, str_int)
}
}
pub fn new_zero() -> Self {
Self {
signum: 0,
digits: EMPTY_STR
}
}
pub fn from_parts(signum: i8, digits: &'a str) -> Result<Self, StrIntParseError> {
if signum == 0 {
if digits.len() > 0 {
return Err(StrIntParseError::ZeroWithTrailing)
}
} else {
if digits.len() == 0 {
return Err(StrIntParseError::NonZeroEmpty)
}
let maybe_invalid_char = digits.chars().find(|c| match c {
'0'...'9' => false,
_ => true
});
if let Some(_) = maybe_invalid_char {
return Err(StrIntParseError::InvalidChar)
}
}
Ok(Self {
signum,
digits
})
}
#[inline]
pub unsafe fn from_parts_unchecked(signum: i8, digits: &'a str) -> Self {
Self {
signum,
digits
}
}
#[inline]
pub fn to_string_int(&self) -> StringInt {
StringInt {
signum: self.signum,
digits: self.digits.to_string()
}
}
#[inline]
pub fn is_positive(&self) -> bool {
self.signum > -1
}
#[inline]
pub fn is_negative(&self) -> bool {
self.signum == -1
}
#[inline]
pub fn is_zero(&self) -> bool {
self.signum == 0
}
#[inline]
pub fn digits(&self) -> &str {
if self.is_zero() {
ZERO_STR
} else {
self.digits
}
}
#[inline]
pub fn signum(&self) -> i8 {
self.signum
}
impl_str_int_to_primitive_int!(to_i8, i8);
impl_str_int_to_primitive_int!(to_i16, i16);
impl_str_int_to_primitive_int!(to_i32, i32);
impl_str_int_to_primitive_int!(to_i64, i64);
impl_str_int_to_primitive_nat!(to_u8, u8);
impl_str_int_to_primitive_nat!(to_u16, u16);
impl_str_int_to_primitive_nat!(to_u32, u32);
impl_str_int_to_primitive_nat!(to_u64, u64);
}
#[derive(Debug, Clone, PartialEq)]
pub struct StringInt {
signum: i8,
digits: String
}
impl StringInt {
#[inline]
pub fn is_positive(&self) -> bool {
self.signum > -1
}
#[inline]
pub fn is_negative(&self) -> bool {
self.signum == -1
}
#[inline]
pub fn is_zero(&self) -> bool {
self.signum == 0
}
#[inline]
pub fn digits(&self) -> &str {
if self.is_zero() {
ZERO_STR
} else {
self.digits.as_str()
}
}
pub fn to_str_int(&self) -> StrInt {
StrInt {
signum: self.signum,
digits: self.digits.as_str()
}
}
}
#[cfg(test)]
mod tests {
use super::*;
const GOOD_INT_STR_CASES: [(&str, StrInt<'static>); 6] = [
("0", StrInt {signum: 0, digits: ""}),
("1", StrInt {signum: 1, digits: "1"}),
("-1", StrInt {signum: -1, digits: "1"}),
("123", StrInt {signum: 1, digits: "123"}),
("+123", StrInt {signum: 1, digits: "123"}),
("-123", StrInt {signum: -1, digits: "123"})
];
const BAD_INT_STR_CASES: [&str; 5] = [
"01",
"0a",
"--",
"++",
"00"
];
#[test]
fn test_good_str_int_cases() {
for (input, expected) in GOOD_INT_STR_CASES.iter() {
if let Ok(output) = StrInt::new(input) {
assert_eq!(output, *expected);
} else {
panic!("input {:?}", input);
}
}
}
#[test]
fn test_bad_str_int_cases() {
for input in BAD_INT_STR_CASES.iter() {
assert!(StrInt::new(input).is_err());
}
}
#[test]
fn test_str_int_to_int_primitives() {
// Pos
assert_eq!(StrInt::new("+128").unwrap().to_i8(), Ok(i8::max_value()));
assert_eq!(StrInt::new("+32767").unwrap().to_i16(), Ok(i16::max_value()));
assert_eq!(StrInt::new("+2147483647").unwrap().to_i32(), Ok(i32::max_value()));
assert_eq!(StrInt::new("+9223372036854775807").unwrap().to_i64(), Ok(i64::max_value()));
// Neg
assert_eq!(StrInt::new("-127").unwrap().to_i8(), Ok(i8::min_value()));
assert_eq!(StrInt::new("-32767").unwrap().to_i16(), Ok(i16::min_value()));
assert_eq!(StrInt::new("-2147483647").unwrap().to_i32(), Ok(i32::min_value()));
assert_eq!(StrInt::new("-9223372036854775807").unwrap().to_i64(), Ok(i64::min_value()));
}
#[test]
fn test_str_int_to_nat_primitives() {
assert_eq!(StrInt::new("+255").unwrap().to_u8(), Ok(u8::max_value()));
assert_eq!(StrInt::new("+65535").unwrap().to_u16(), Ok(u16::max_value()));
assert_eq!(StrInt::new("+4294967295").unwrap().to_u32(), Ok(u32::max_value()));
assert_eq!(StrInt::new("+18446744073709551615").unwrap().to_u64(), Ok(u64::max_value()));
assert_eq!(StrInt::new("-255").unwrap().to_u8(), Ok(u8::max_value()));
assert_eq!(StrInt::new("-65535").unwrap().to_u16(), Ok(u16::max_value()));
assert_eq!(StrInt::new("-4294967295").unwrap().to_u32(), Ok(u32::max_value()));
assert_eq!(StrInt::new("-18446744073709551615").unwrap().to_u64(), Ok(u64::max_value()));
}
#[test]
fn test_int_parse_overflow_underflow() {
let definite_overflow = "+256";
let definite_underflow = "-256";
assert_eq!(StrInt::new(definite_overflow).unwrap().to_i8(), Err(ToPrimitiveError::Overflow));
assert_eq!(StrInt::new(definite_underflow).unwrap().to_i8(), Err(ToPrimitiveError::Underflow));
}
#[test]
fn test_nat_parse_overflow() {
let definite_overflow_1 = "+256";
let definite_overflow_2 = "-256";
assert_eq!(StrInt::new(definite_overflow_1).unwrap().to_u8(), Err(ToPrimitiveError::Overflow));
assert_eq!(StrInt::new(definite_overflow_2).unwrap().to_u8(), Err(ToPrimitiveError::Overflow));
}
}

View File

@ -0,0 +1,3 @@
pub struct Multiline {
lines: Vec<String>
}