Skip to content

Commit 9f4e4bf

Browse files
Feature/int float types (#4)
* feat: support for int and float types * feat: support for int and float types * chores: `rubocop -a` * chores: `rubocop -A` * docs: update readme
1 parent 4c1808a commit 9f4e4bf

10 files changed

+160
-64
lines changed

.rubocop.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,6 @@ Style/BlockDelimiters:
7070
Exclude:
7171
- spec/factories/**
7272
- Gemfile
73+
74+
Style/HashSyntax:
75+
EnforcedShorthandSyntax: never

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,8 @@ avg_get: 0.000010 0.000044 0.000
7070
| Persistant | :white_check_mark: |
7171
| Get API | :white_check_mark: |
7272
| Put API | :white_check_mark: |
73-
| string data types for k/v | :white_check_mark: |
73+
| int, float and string types for k/v | :white_check_mark: |
7474
| CRC | :white_check_mark: |
75-
| int, float, hash, array for k/v | :x: |
7675
| Directory Support | :x: |
7776
| Delete API | :x: |
7877
| File Merge | :x: |

benchmark.rb

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,55 @@
1+
# frozen_string_literal: true
2+
13
require 'benchmark'
24
require 'faker'
35
require_relative 'lib/bitcask'
46

5-
include Benchmark
6-
7+
include Benchmark
8+
79
disk_store = Bitcask::DiskStore.new('bitcask_benchmark.db')
810
value = Faker::Lorem.sentence(word_count: 10)
911

1012
puts "Benchmark for value size: #{value.length} bytes"
1113

12-
Benchmark.benchmark(CAPTION, 50, FORMAT, "avg_put:", "avg_get:") do |benchmark|
13-
tt_put_10k = benchmark.report("DiskStore#put : 10k records") do
14+
Benchmark.benchmark(CAPTION, 50, FORMAT, 'avg_put:', 'avg_get:') do |benchmark|
15+
tt_put_10k = benchmark.report('DiskStore#put : 10k records') {
1416
10_000.times do |n_time|
15-
disk_store.put("10_000" + n_time.to_s, value)
17+
disk_store.put("10_000#{n_time}", value)
1618
end
17-
end
19+
}
1820

19-
tt_get_10k = benchmark.report("DiskStore#get : 10k records") do
21+
tt_get_10k = benchmark.report('DiskStore#get : 10k records') {
2022
10_000.times do
21-
disk_store.get("10_000" + rand(1..10_000).to_s)
23+
disk_store.get("10_000#{rand(1..10_000)}")
2224
end
23-
end
25+
}
2426

25-
tt_put_100k = benchmark.report("DiskStore#put : 100k records") do
27+
tt_put_100k = benchmark.report('DiskStore#put : 100k records') {
2628
100_000.times do |n_time|
27-
disk_store.put("100_000" + n_time.to_s, value)
29+
disk_store.put("100_000#{n_time}", value)
2830
end
29-
end
31+
}
3032

31-
tt_get_100k = benchmark.report("DiskStore#get : 100k records") do
33+
tt_get_100k = benchmark.report('DiskStore#get : 100k records') {
3234
100_000.times do
33-
disk_store.get("100_000" + rand(1..100_000).to_s)
35+
disk_store.get("100_000#{rand(1..100_000)}")
3436
end
35-
end
37+
}
3638

37-
tt_put_1M = benchmark.report("DiskStore#put : 1M records") do
39+
tt_put_1M = benchmark.report('DiskStore#put : 1M records') {
3840
1_000_000.times do |n_time|
39-
disk_store.put("1_000_000" + n_time.to_s, value)
41+
disk_store.put("1_000_000#{n_time}", value)
4042
end
41-
end
43+
}
4244

43-
tt_get_1M = benchmark.report("DiskStore#get : 1M records") do
45+
tt_get_1M = benchmark.report('DiskStore#get : 1M records') {
4446
1_000_000.times do
45-
disk_store.get("1_000_000" + rand(1..1_000_000).to_s)
47+
disk_store.get("1_000_000#{rand(1..1_000_000)}")
4648
end
47-
end
49+
}
4850

49-
[(tt_put_10k+tt_put_100k+tt_put_1M)/(10_000+100_000+1_000_000).to_f, (tt_get_10k+tt_get_100k+tt_get_1M)/(10_000+100_000+1_000_000).to_f]
51+
[(tt_put_10k + tt_put_100k + tt_put_1M) / (10_000 + 100_000 + 1_000_000).to_f,
52+
(tt_get_10k + tt_get_100k + tt_get_1M) / (10_000 + 100_000 + 1_000_000).to_f]
5053
end
5154

5255
File.delete('bitcask_benchmark.db')
53-

image.png

-119 KB
Loading

lib/bitcask/disk_store.rb

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def get(key)
3434
end
3535

3636
def put(key, value)
37-
size, data = serialize(epoc: Time.now.to_i, key:, value:)
37+
size, data = serialize(epoc: Time.now.to_i, key: key, value: value)
3838

3939
@key_dir[key] = key_struct(@write_pos, size, key)
4040
persist(data)
@@ -72,23 +72,30 @@ def incr_write_pos(pos)
7272
end
7373

7474
def key_struct(write_pos, size, key)
75-
OpenStruct.new(write_pos:, size:, key:)
75+
OpenStruct.new(write_pos: write_pos, size: size, key: key)
7676
end
7777

7878
def init_key_dir
7979
while (crc_and_header = @db_fh.read(crc32_header_offset))
8080

8181
header = crc_and_header[crc32_offset..]
82-
epoc, keysz, valuesz = deserialize_header(header)
82+
epoc, keysz, valuesz, key_type, value_type = deserialize_header(header)
8383

84-
key = @db_fh.read(keysz)
85-
value = @db_fh.read(valuesz)
84+
key_raw = @db_fh.read(keysz)
85+
value_raw = @db_fh.read(valuesz)
86+
87+
key_type = TYPES_LOOK_UP[key_type]
88+
value_type = TYPES_LOOK_UP[value_type]
89+
90+
key = unpack(key_raw, key_type)
91+
value = unpack(value_raw, value_type)
8692

8793
crc = crc_and_header[..crc32_offset - 1]
88-
raise Error('file corrupted') unless validate_crc32(desearlize_crc32(crc), header + key + value)
94+
raise StandardError, 'file corrupted' unless validate_crc32(desearlize_crc32(crc),
95+
header + key_raw + value_raw)
8996

9097
size = crc32_header_offset + keysz + valuesz
91-
@key_dir[key] = OpenStruct.new(write_pos: @write_pos, size:, key:)
98+
@key_dir[key] = OpenStruct.new(write_pos: @write_pos, size: size, key: key)
9299
incr_write_pos(size)
93100
end
94101
end

lib/bitcask/serializer.rb

Lines changed: 65 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,42 @@
44

55
module Bitcask
66
module Serializer
7-
# tz, keysz, valuesz
8-
# 3 - 32 bit unsigned long int with little endian byte order
9-
# 3 - 32 bit unsigned long int = 12 bytes
10-
HEADER_FORMAT = 'L<L<L<'
11-
HEADER_SIZE = 12
7+
# tz, keysz, valuesz, keytype, valuetype
8+
# 3, 2 - 32, 16 bit unsigned long int with little endian byte order
9+
HEADER_FORMAT = 'L<L<L<S<S<'
10+
HEADER_SIZE = 16
1211

1312
# 32 bit unsigned long int
1413
CRC32_FORMAT = 'L'
1514
CRC32_SIZE = 4
1615

16+
TYPES = {
17+
'Integer' => 1,
18+
'Float' => 2,
19+
'String' => 3
20+
}.freeze
21+
22+
TYPES_LOOK_UP = {
23+
TYPES['Integer'] => 'Integer',
24+
TYPES['Float'] => 'Float',
25+
TYPES['String'] => 'String'
26+
}.freeze
27+
28+
TYPES_FORMAT = {
29+
TYPES['Integer'] => 'q<', # 64 bit signed long int
30+
TYPES['Float'] => 'E' # 64 bit double
31+
}.freeze
32+
1733
def serialize(epoc:, key:, value:)
18-
header = serialize_header(epoc:, keysz: key.length, valuesz: value.length)
19-
data = key.encode('utf-8') + value.encode('utf-8')
34+
key_type = key.class.to_s
35+
value_type = value.class.to_s
36+
37+
key_raw = pack(key, key_type)
38+
value_raw = pack(value, value_type)
39+
40+
header = serialize_header(epoc: epoc, keysz: key_raw.length, key_type: key_type, value_type: value_type,
41+
valuesz: value_raw.length)
42+
data = key_raw + value_raw
2043

2144
[crc32_header_offset + data.length, crc32(header + data) + header + data]
2245
end
@@ -27,23 +50,29 @@ def deserialize(data)
2750

2851
return 0, '', '' unless validate_crc32(desearlize_crc32(crc_data), raw_data)
2952

30-
header_data = data[crc32_offset..header_offset - 1]
31-
epoc, keysz, valuesz = deserialize_header(header_data)
53+
header_data = data[crc32_offset..crc32_header_offset - 1]
54+
epoc, keysz, valuesz, key_type, value_type = deserialize_header(header_data)
55+
56+
key_type = TYPES_LOOK_UP[key_type]
57+
value_type = TYPES_LOOK_UP[value_type]
3258

33-
key = data[crc32_header_offset..crc32_header_offset + keysz - 1]
34-
value = data[crc32_header_offset + keysz..]
59+
key_raw = data[crc32_header_offset..crc32_header_offset + keysz - 1]
60+
value_raw = data[crc32_header_offset + keysz..]
61+
62+
key = unpack(key_raw, key_type)
63+
value = unpack(value_raw, value_type)
3564

3665
[epoc, key, value]
3766
end
3867

39-
def serialize_header(epoc:, keysz:, valuesz:)
40-
[epoc, keysz, valuesz].pack(HEADER_FORMAT)
68+
def serialize_header(epoc:, key_type:, keysz:, value_type:, valuesz:)
69+
[epoc, keysz, valuesz, TYPES[key_type], TYPES[value_type]].pack(HEADER_FORMAT)
4170
end
4271

4372
def deserialize_header(header_data)
4473
header = header_data.unpack(HEADER_FORMAT)
4574

46-
[header[0], header[1], header[2]]
75+
[header[0], header[1], header[2], header[3], header[4]]
4776
end
4877

4978
def crc32_offset
@@ -69,5 +98,27 @@ def desearlize_crc32(crc_data)
6998
def validate_crc32(digest, data)
7099
digest == Zlib.crc32(data)
71100
end
101+
102+
def pack(attribute, attribute_type)
103+
case attribute_type
104+
when 'Integer', 'Float'
105+
[attribute].pack(TYPES_FORMAT[TYPES[attribute_type]])
106+
when 'String'
107+
attribute.encode('utf-8')
108+
else
109+
raise StandardError, 'Invalid attribute_type'
110+
end
111+
end
112+
113+
def unpack(attribute, attribute_type)
114+
case attribute_type
115+
when 'Integer', 'Float'
116+
attribute.unpack1(TYPES_FORMAT[TYPES[attribute_type]])
117+
when 'String'
118+
attribute
119+
else
120+
raise StandardError, 'Invalid attribute_type'
121+
end
122+
end
72123
end
73124
end

spec/bitcask/disk_store_spec.rb

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
expect(subject.put(Faker::Lorem.word, Faker::Lorem.sentence(word_count: 10))).to be_nil
1919
expect(subject.put(Faker::Lorem.word, Faker::Lorem.sentence(word_count: 100))).to be_nil
2020
expect(subject.put(Faker::Lorem.word, Faker::Lorem.sentence(word_count: 1000))).to be_nil
21-
expect(subject.put(Faker::Lorem.word, Faker::Lorem.sentence(word_count: 10_000))).to be_nil
21+
expect(subject.put(rand(1..10_000), Faker::Lorem.sentence(word_count: 10_000))).to be_nil
22+
expect(subject.put(rand(10.2..100.234), rand(1..10_000))).to be_nil
23+
expect(subject.put(rand(1..10_000), rand(10.2..100.234))).to be_nil
2224
end
2325
end
2426

@@ -37,6 +39,8 @@
3739
expect(subject.get('ut')).to eq('Voluptatum esse non vero ut vitae harum blanditiis ducimus vel nam rerum quia ipsa necessitatibus quo eaque animi ab voluptatem sed sunt non ipsam aut velit rerum perspiciatis quasi doloribus omnis eum et reprehenderit qui minima aut illo veritatis atque sequi quas eius consequatur magni saepe numquam molestias odio beatae quo maiores dignissimos illum aut sint qui porro sed in enim enim asperiores et tenetur voluptas maxime possimus quidem accusantium laudantium aliquam ipsum voluptates consequuntur et tempora cumque voluptatem dolor tempore sint nemo ex omnis repudiandae aliquid pariatur neque nostrum debitis odit qui nihil voluptatem minus temporibus voluptatem ut sit.')
3840
expect(subject.get('saepe')).to eq('Non autem magni non quaerat non eos enim amet qui molestiae pariatur quam rerum facilis nulla tempora reprehenderit ipsa rerum sunt ducimus aspernatur magni blanditiis blanditiis eveniet sed nobis quisquam iusto quia corporis in deleniti repellendus iure similique facere maxime beatae aut quidem fuga labore laborum reprehenderit suscipit eveniet molestias aspernatur vel minus sunt quo reprehenderit sint deserunt corporis velit hic recusandae et voluptatem ipsa dolores eos facere eum qui ut nam et aperiam voluptatibus minima laborum doloremque officiis optio eaque voluptatibus et sint dicta esse ab ex ut cumque temporibus alias voluptatum qui iusto laboriosam nihil qui veritatis aut pariatur et sint rerum fugiat reiciendis non quia atque blanditiis et suscipit unde magni iste ea voluptates ex ad expedita eum ut quasi et adipisci reiciendis et a earum enim excepturi autem vel accusantium qui veniam est qui odio voluptatem aut deleniti omnis quia placeat ut modi nostrum tempora est labore ipsa accusamus et sit eum delectus nobis dolores laboriosam omnis est sed voluptatem neque ut dicta et est et dolor vero quis praesentium minima ut voluptas omnis ut velit sed eum harum veniam dignissimos tempora aliquid et consectetur in ducimus cum molestiae dolor hic quisquam sed tenetur non quam eaque fugiat ea qui esse recusandae nisi officia provident dolores facere voluptatem corrupti distinctio magnam perferendis sunt soluta ut perspiciatis laudantium officia veniam quia magnam sunt libero corporis ut nisi est reprehenderit saepe ut debitis et eum ipsum illo iste animi expedita rerum totam et animi temporibus voluptatum minus amet illum inventore vel aut dolorem est necessitatibus rem fugit est omnis laudantium dolor fugit sit distinctio voluptates voluptatem aut exercitationem placeat rerum molestias corrupti earum impedit numquam minima eum voluptate in quasi illo quo veritatis perferendis et est numquam sit quo dolores dolorem animi consectetur voluptatem est expedita commodi reprehenderit inventore accusamus nostrum sapiente adipisci molestiae nam et itaque odit assumenda cupiditate deserunt vel aut cupiditate quidem quo ut dolores aut ut ad nihil aut dolores sed eaque modi molestiae debitis qui omnis eius dicta dolorem odit ut et eveniet et repellat sit omnis porro nemo qui ullam culpa explicabo accusantium omnis aut ipsum sed dignissimos odit fugit cumque harum nihil ut repudiandae nisi impedit aut dolor velit nemo a nulla facilis eum optio quaerat repellendus expedita dolores sunt perferendis ullam optio ullam consequatur iste id saepe officiis aspernatur ea rerum et nesciunt sit velit et dolorem quo quam alias in omnis et quos consequatur consequatur natus ut ut mollitia ut quae sed hic et ex quae ut libero doloremque unde rem esse sint maiores cumque maiores non ratione vitae perferendis ratione qui sapiente doloribus consequuntur consequatur dicta laudantium eligendi qui voluptas voluptate amet vel quas sunt qui similique tenetur culpa alias accusamus repellat eos sint facere qui labore facilis vitae aliquam voluptates ipsum voluptatem voluptate est dolorum autem quia illum ea aliquid molestias voluptatem ipsam atque eaque mollitia minus excepturi aliquam est iure impedit deleniti occaecati et nemo dignissimos totam illo laudantium temporibus magni suscipit tempora veritatis aut autem ut quis necessitatibus natus maiores dignissimos nulla alias adipisci recusandae quis doloribus ipsam consequatur et quis eum inventore et architecto neque at sed aliquam asperiores voluptatem vel libero asperiores natus tempore sint distinctio voluptatem est commodi sequi saepe molestiae tempora maxime dolor facilis nesciunt repudiandae corporis consequatur similique itaque eum tempore et ex ipsa deserunt possimus ullam odio aut repellat molestiae earum ea et sit doloribus provident fugit eveniet sit excepturi adipisci nostrum ab corrupti quia harum omnis quia non deleniti libero et maiores consequatur eligendi accusantium quod nihil minus ut dicta voluptatem dolor distinctio sit harum dolorum est tenetur itaque quia vel est ratione atque ex voluptas est aut molestiae est sapiente commodi saepe atque possimus rerum nihil aut neque eum repellat porro hic est debitis sequi qui optio nam tempore aut nobis assumenda blanditiis commodi occaecati reiciendis et fuga hic rerum aut aut nam ut doloribus assumenda sit assumenda et dolorem nobis error debitis maxime et quod commodi asperiores voluptas velit earum et delectus enim qui quod sit corporis est totam blanditiis mollitia dolores voluptatem consequatur sunt ut cumque a et id consequuntur molestias eos occaecati mollitia at pariatur nesciunt reiciendis corrupti voluptatibus quia laboriosam excepturi qui cum odit neque dolore accusamus cupiditate et veniam animi necessitatibus nesciunt totam dolorem qui ea dolorum doloribus et odio sit fugit eos facere culpa perspiciatis ipsa consequuntur modi velit quidem aperiam labore consequatur animi ipsam exercitationem sed ipsam aperiam labore at cum tenetur dolor provident consequatur nobis quibusdam quaerat vel alias deserunt sed voluptas et et consectetur consequatur quos deleniti in dolorum ab voluptates quo id expedita quas eius esse quo explicabo fuga tempore ut quo velit enim voluptatibus voluptas placeat voluptas id porro molestiae autem accusantium sed voluptas voluptatem maxime enim illum sint omnis qui aperiam aliquam quis aut voluptatum in aut recusandae illum quia et quae quaerat asperiores explicabo autem sunt recusandae accusamus eius praesentium qui in quis voluptas nulla aut magnam quia sapiente ea eius est error soluta autem molestias enim rerum quod sed laborum rem delectus ea adipisci rerum cumque id harum enim omnis repellat veniam sed nostrum numquam quas incidunt quibusdam esse similique nemo qui autem perspiciatis non pariatur et beatae nisi repellendus itaque officiis consequatur qui cum voluptatem et placeat non sed aspernatur amet unde aut suscipit cupiditate doloremque soluta veritatis magni nihil error et totam sint porro et nostrum aut unde officia dolorem quia unde corrupti libero rem et et natus est vero consectetur modi quibusdam voluptatem fugiat dolore incidunt reiciendis asperiores nihil veritatis amet velit quo et eligendi numquam in ea id ut iure id sequi in consequatur id facilis dolore voluptas voluptatem quia qui quibusdam qui quos exercitationem ad omnis vitae sunt voluptate qui quo aut quod possimus est quos dolorem quasi aspernatur provident beatae iusto officia nihil voluptatum et et praesentium explicabo ut itaque id laboriosam aliquid autem ipsum quos vero id inventore quis officiis ad temporibus quisquam sit deserunt fugiat incidunt repellendus cum magnam quidem enim consectetur maxime laborum rerum.')
3941
expect(subject.get('suscipit')).to eq('Quia sapiente maiores sunt.')
42+
expect(subject.get(1)).to eq(10_000)
43+
expect(subject.get(1.000)).to eq(10_000.000)
4044
end
4145
end
4246
end
@@ -57,17 +61,21 @@
5761
context 'when the key is present' do
5862
let(:key_1) { Faker::Lorem.word }
5963
let(:key_2) { Faker::Lorem.word }
64+
let(:key_3) { rand(1..1000) }
6065
let(:value_1) { Faker::Lorem.sentence(word_count: 10_000) }
6166
let(:value_2) { Faker::Lorem.sentence(word_count: 100_000) }
67+
let(:value_3) { rand(11.2...76.9) }
6268

6369
before do
6470
subject.put(key_1, value_1)
6571
subject.put(key_2, value_2)
72+
subject.put(key_3, value_3)
6673
end
6774

6875
it 'returns value' do
6976
expect(subject.get(key_1)).to eq(value_1)
7077
expect(subject.get(key_2)).to eq(value_2)
78+
expect(subject.get(key_3)).to eq(value_3)
7179
end
7280
end
7381
end
@@ -80,7 +88,7 @@
8088
context 'when the db file has data' do
8189
it 'returns an array of keys' do
8290
expect(subject.keys.length).to eq(5)
83-
expect(subject.keys).to eq(%w[suscipit dolores ut saepe illo])
91+
expect(subject.keys).to eq([1, 1.0, 'ut', 'saepe', 'suscipit'])
8492
end
8593
end
8694
end

0 commit comments

Comments
 (0)