From 1e90eaaaf372a24b9e36ea1b0d4e27ea34a6ab5d Mon Sep 17 00:00:00 2001 From: niamtokik Date: Tue, 28 Dec 2021 10:11:39 +0000 Subject: [PATCH] Global Cleanup This project was sleeping for too long. Too many parts are not clean at all. So, here some cleanup: - add more test unit - clean msdos date/time format for zip - add crc32 - add version support - rewrite notes - rewrite the whole interface from scratch. - update github actions - update license (MIT) - create dotzip application - update notes regarding data-structure used - fix date and time ms-dos format (issue with endianess) - fix local file header encoding - update documentation - update with new extra fields and third party support - add extended timestamp third party support - add unix info new third party support --- .github/workflows/actions.yml | 44 +- LICENSE | 30 +- README.md | 22 +- lib/dotzip.ex | 320 +++++++++++++-- lib/dotzip/central_directory_header.ex | 13 +- lib/dotzip/compression_method.ex | 8 + lib/dotzip/crc32.ex | 17 + lib/dotzip/date.ex | 122 ++++++ lib/dotzip/decode.ex | 58 +++ lib/dotzip/encode.ex | 3 + lib/dotzip/end_central_directory.ex | 2 +- lib/dotzip/extra_field.ex | 6 +- lib/dotzip/extra_field/ntfs.ex | 43 ++ lib/dotzip/extra_field/openvms.ex | 47 +++ lib/dotzip/extra_field/os2.ex | 34 ++ lib/dotzip/extra_field/patch.ex | 5 + lib/dotzip/extra_field/pkcs7.ex | 5 + lib/dotzip/extra_field/strong_encryption.ex | 5 + lib/dotzip/extra_field/unix.ex | 88 +++- lib/dotzip/extra_field/x509.ex | 11 + .../extra_field/zip64_extended_information.ex | 45 +++ lib/dotzip/format.ex | 13 - lib/dotzip/general_purpose_bit_flag.ex | 103 +++++ lib/dotzip/headers/local_file.ex | 13 + lib/dotzip/local_file_header.ex | 380 +++++++++++------- lib/dotzip/server.ex | 8 + lib/dotzip/third_party.ex | 2 + lib/dotzip/third_party/extended_timestamp.ex | 185 +++++++++ lib/dotzip/third_party/info_zip_unix_new.ex | 66 +++ lib/dotzip/time.ex | 124 ++++++ lib/dotzip/version_made_by.ex | 42 ++ lib/dotzip/version_needed_to.ex | 104 +++++ lib/dotzip_app.ex | 8 + lib/extension.ex | 15 + mix.exs | 6 + notes/README.md | 311 +++++++++----- test/dotzip/crc32_test.exs | 13 + test/dotzip/date_test.exs | 4 + test/dotzip/decode_test.exs | 43 ++ test/dotzip/extra_field/os2_test.exs | 4 + test/dotzip/extra_field/unix_test.exs | 26 +- test/dotzip/local_file_header.exs | 9 + .../dotzip/third_party/extended_timestamp.exs | 4 + test/dotzip/third_party/info_zip_unix_new.exs | 4 + test/dotzip/time_test.exs | 4 + test/dotzip_test.exs | 4 - test/fixtures/a.zip | Bin 0 -> 162 bytes test/fixtures/directory.zip | Bin 0 -> 472 bytes test/fixtures/directory/file | 1 + test/fixtures/file | 1 + 50 files changed, 2088 insertions(+), 337 deletions(-) create mode 100644 lib/dotzip/compression_method.ex create mode 100644 lib/dotzip/crc32.ex create mode 100644 lib/dotzip/date.ex create mode 100644 lib/dotzip/decode.ex create mode 100644 lib/dotzip/encode.ex create mode 100644 lib/dotzip/extra_field/ntfs.ex create mode 100644 lib/dotzip/extra_field/openvms.ex create mode 100644 lib/dotzip/extra_field/os2.ex create mode 100644 lib/dotzip/extra_field/patch.ex create mode 100644 lib/dotzip/extra_field/pkcs7.ex create mode 100644 lib/dotzip/extra_field/strong_encryption.ex create mode 100644 lib/dotzip/extra_field/x509.ex create mode 100644 lib/dotzip/extra_field/zip64_extended_information.ex delete mode 100644 lib/dotzip/format.ex create mode 100644 lib/dotzip/general_purpose_bit_flag.ex create mode 100644 lib/dotzip/headers/local_file.ex create mode 100644 lib/dotzip/server.ex create mode 100644 lib/dotzip/third_party.ex create mode 100644 lib/dotzip/third_party/extended_timestamp.ex create mode 100644 lib/dotzip/third_party/info_zip_unix_new.ex create mode 100644 lib/dotzip/time.ex create mode 100644 lib/dotzip/version_made_by.ex create mode 100644 lib/dotzip/version_needed_to.ex create mode 100644 lib/dotzip_app.ex create mode 100644 lib/extension.ex create mode 100644 test/dotzip/crc32_test.exs create mode 100644 test/dotzip/date_test.exs create mode 100644 test/dotzip/decode_test.exs create mode 100644 test/dotzip/extra_field/os2_test.exs create mode 100644 test/dotzip/local_file_header.exs create mode 100644 test/dotzip/third_party/extended_timestamp.exs create mode 100644 test/dotzip/third_party/info_zip_unix_new.exs create mode 100644 test/dotzip/time_test.exs create mode 100644 test/fixtures/a.zip create mode 100644 test/fixtures/directory.zip create mode 100644 test/fixtures/directory/file create mode 100644 test/fixtures/file diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index e3a9c22..9c95593 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -3,16 +3,46 @@ on: [push] jobs: compile: runs-on: ubuntu-latest + name: OTP ${{matrix.otp}} / Elixir ${{matrix.elixir}} strategy: matrix: - otp: ['21.3', '22.2', '23.3', '24.1'] - elixir: ['1.9.4', '1.12.3', '1.13.1'] + otp: ['22.2', '23.1', '24.1'] + elixir: ['1.11.3', '1.12.3', '1.13.1'] steps: - - uses: actions/checkout@v2 - - uses: erlef/setup-beam@v1 + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Configure Erlang OTP and Elixir + uses: erlef/setup-beam@v1 with: otp-version: ${{matrix.otp}} elixir-version: ${{matrix.elixir}} - - run: mix deps.get - - run: mix compile - - run: mix test + + - name: Fetch dependencies + run: mix deps.get + + - name: Compile application + run: mix compile + + - name: Test application + run: mix test + + - name: Generate documentation + run: mix docs + + - name: Upload documentation + uses: actions/upload-artifact@v2 + with: + name: documentation + path: doc + + - name: Generate hex release + run: mix hex.build + + - name: Upload hex release + uses: actions/upload-artifact@v2 + with: + name: hex + path: dotzip-*.tar + + \ No newline at end of file diff --git a/LICENSE b/LICENSE index d9f6448..a84ac17 100644 --- a/LICENSE +++ b/LICENSE @@ -1,14 +1,22 @@ +The MIT License (MIT) + Copyright (c) 2021 Mathieu Kerjouan -Permission to use, copy, modify, and distribute this software for any -purpose with or without fee is hereby granted, provided that the above -copyright notice and this permission notice appear in all copies. +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +“Software”), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE -AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL -DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR -PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER -TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. \ No newline at end of file +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index fb6d9d6..84b8a49 100644 --- a/README.md +++ b/README.md @@ -6,17 +6,26 @@ > is defined by this format and no specific implementation guidance is > provided. This document provides details on the storage format for > creating ZIP files. Information is provided on the records and -> fields that describe what a ZIP file is. -- from [official -> specification +> fields that describe what a ZIP file is. +> +> -- from [official specification > file](https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.3.TXT) -Note: This project is a work in progress. Please don't use it in -production. +NOTE: This project is a work in progress. Please don't use it in +production (even in staging). Things are moving, and nothing is +stable. Many notes are present in `notes` directory, feel free to +react. + +More information can be found in `notes/` directory. This code is +generated using TDD and literate programming. All function or modules +added must be documented (with examples at least) and tested before +commit. ## Installation -If [available in Hex](https://hex.pm/docs/publish), the package can be installed -by adding `dotzip` to your list of dependencies in `mix.exs`: +If [available in Hex](https://hex.pm/docs/publish), the package can be +installed by adding `dotzip` to your list of dependencies in +`mix.exs`: ```elixir def deps do @@ -70,4 +79,3 @@ Dotzip.decode(file) Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc) and published on [HexDocs](https://hexdocs.pm). Once published, the docs can be found at [https://hexdocs.pm/dotzip](https://hexdocs.pm/dotzip). - diff --git a/lib/dotzip.ex b/lib/dotzip.ex index 946be1f..4d643ab 100644 --- a/lib/dotzip.ex +++ b/lib/dotzip.ex @@ -1,45 +1,305 @@ defmodule Dotzip do - - @moduledoc """ - Elixir Implementation of ZIP File Format. + @moduledoc ~S""" + Elixir Implementation of ZIP File Format. This module is the main + interface to control Dotzip application with simple, specified and + documented functions. + """ - """ + @type dotzip :: [] + @type file :: String.t() + @type opts :: Keyword.t() - def decode(data) do - {:ok, local, rest} = Dotzip.LocalFileHeader.decode(data) - {:ok, central, r} = Dotzip.CentralDirectoryHeader.decode(rest) - {:ok, e, rr} = Dotzip.EndCentralDirectory.decode(r) - {local, central, e, rr} + @doc ~S""" + `start/0` function start Dotzip application with default options. + + ## Examples + + iex> Dotzip.start() + :ok + + """ + def start(), do: start([]) + + @doc ~S""" + `start/1` function start Dotzip application with customer options. + + ## Examples + + iex> Dotzip.start([]) + :ok + + """ + def start(_opts), do: Application.start(:dotzip) + + @doc ~S""" + check/0 function check if Dotzip application is running. + + ## Examples + + iex> Dotzip.check() + :ok + + """ + def check(), do: :wip + + @doc ~S""" + stop/0 function stop Dotzip application. + + ## Examples + + iex> Dotzip.stop() + :ok + + """ + def stop(), do: Application.stop(:dotzip) + + @doc ~S""" + See `preload/2` function. + + ## Examples + + iex> Dotzip.preload("test/fixtures/a.zip") + {:ok, reference} + + """ + def preload(target), do: preload(target, []) + + @doc ~S""" + `preload/2` function preload a Zip archive present on the system by + extracting metadata and other information but not the content of + compressed files. This function is mainly used when users need to + work on massive archive without impacting BEAM memory. + + ## Examples + + iex> Dotzip.preload("test/fixtures/a.zip", []) + {:ok, reference} + + """ + def preload(_target, _opts), do: :wip + + @doc ~S""" + See `load/2` function. + + ## Examples + + iex> Dotzip.load("test/fixtures/a.zip") + {:ok, reference} + + """ + def load(target), do: load(target, []) + + + @doc ~S""" + `load/2` function load a Zip archive present on the system. Content + of compressed files are also stored in memory and can impact the + whole performance of the BEAM. + + ## Examples + + iex> Dotzip.load("test/fixtures/a.zip", []) + {:ok, reference} + + """ + def load(_target, _opts), do: :wip + + @doc ~S""" + See `analyze/2` function. + + ## Examples + + iex> Dotzip.analyze(reference) + {:ok, analysis} + + """ + def analyze(reference), do: analyze(reference, []) + + @doc ~S""" + `analyze/2` function is used to analyze metadata and content of + loaded or preload archive. + + ## Examples + + iex> Dotzip.analyze(reference, []) + {:ok, analysis} + + """ + def analyze(_reference, _opts), do: :wip + + @doc ~S""" + + See `extract/1` function. Extract by default in `/tmp` directory on + Unix/Linux system. + + ## Examples + + iex> Dotzip.extract(reference) + {:ok, info} + + iex> Dotzip.extract("test/fixtures/a.zip") + {:ok, info} + + """ + def extract(reference, target), do: extract(reference, target, []) + + @doc ~S""" + `extract/2` function extract the content of a loaded or preloaded + archive directly on the filesystem. + + ## Examples + + iex> Dotzip.extract(reference, "/tmp") + {:ok, info} + + iex> Dotzip.extract("test/fixtures/a.zip", destination: "/tmp") + {:ok, info} + + """ + def extract(_reference, _target, _opts), do: :wip + + @doc ~S""" + `unload/1` function unload a loaded or preloaded archive. + + ## Examples + + iex> Dotzip.unload(reference) + :ok + + """ + def unload(_reference), do: :wip + + @doc ~S""" + See `new/1` function. + + ## Examples + + iex> Dotzip.new() + {:ok, reference} + + """ + @spec new() :: dotzip() + def new() do + new([]) end - def decode_file(file) do - {:ok, data} = :file.read_file(file) - decode(data) + @doc ~S""" + `new/1` function create a new Dotzip reference, an empty archive + directly in memory. + + ## Examples + + iex> Dotzip.new([]) + {:ok, reference} + + """ + @spec new(opts()) :: dotzip() + def new(_opts) do + [] end - def encode(data) do - {:error, :not_supported} + @doc ~S""" + See `add/3` function. + + ## Examples + + iex> Dotzip.new() |> Dotzip.add("test/fixtures/a.zip") + {:ok, info} + + """ + @spec add(dotzip(), file()) :: dotzip + def add(zip, file) do + add(zip, file, []) end - def encode_file(_file) do - {:error, :not_supported} + @doc ~S""" + `add/3` add a new file in the archive. + + ## Examples + + iex> Dotzip.new() |> Dotzip.add("test/fixtures/a.zip", compressed: :lz4) + {:ok, info} + + """ + @spec add(dotzip(), file(), opts()) :: dotzip + def add(zip, file, opts) when is_bitstring(file) do + add(zip, {:file, file}, opts) + end + def add(zip, {:file, file}, _opts) do + [%{name: file}|zip] + end + def add(zip, {:raw, file, content}, _opts) do + [%{name: file, content: content}|zip] + end + def add(zip, {:external, file, _url}, _opts) do + [%{name: file}|zip] + end + def add(zip, {:directory, file}, _opts) do + [%{name: file, uncompressed_size: 0, compression_size: 0 }|zip] + end + def add(_zip, _file, _opts) do end - def end_central_directory?(<>) do - end_central_directory = Dotzip.EndCentralDirectory.signature() - signature == end_central_directory + @doc ~S""" + See `delete/3` function. + + ## Examples + + iex> Dotzip.delete(reference, "/file") + :ok + + """ + @spec delete(dotzip(), file()) :: dotzip() + def delete(zip, file) do + delete(zip, file, []) end - - def central_directory_header?(<>) do - central_directory_header = Dotzip.CentralDirectoryHeader.signature() - signature == central_directory_header + + @doc ~S""" + `delete/3` function remove a file from an in memory archive. + + ## Examples + + iex> Dotzip.delete(reference, "/file", []) + :ok + + """ + @spec delete(dotzip(), file(), opts()) :: dotzip() + def delete(_zip, _file, _opts) do end - - def local_file_header?(<>) do - local_file_header = Dotzip.LocalFileHeader.signature() - signature == local_file_header - end - + + @doc ~S""" + See `update/4` function. + """ + @spec update(dotzip(), file(), bitstring()) :: dotzip() + def update(zip, file, content), do: update(zip, file, content, []) + + @doc ~S""" + `update/4` function the content of a file, it can also alter + metadata and other elements of the archived file. + """ + @spec update(dotzip(), file(), bitstring(), opts()) :: dotzip() + def update(_zip, _file, _content, _opts), do: :wip + + @doc ~S""" + `set/2` function configure options for the whole archive. + + ## Examples + + iex> Dotzip.set(reference, compression: :lz4) + :ok + + """ + def set(reference, opts), do: set(reference, :all, opts) + + @doc ~S""" + `set/3` function configure options for individual archived files. + + ## Examples + + iex> Dotzip.set(reference, "path/to/my/file", compression: :lz4) + :ok + + """ + def set(_reference, _target, _opts), do: :wip + end - diff --git a/lib/dotzip/central_directory_header.ex b/lib/dotzip/central_directory_header.ex index 045f825..3d3bbd5 100644 --- a/lib/dotzip/central_directory_header.ex +++ b/lib/dotzip/central_directory_header.ex @@ -1,5 +1,14 @@ defmodule Dotzip.CentralDirectoryHeader do + @moduledoc """ + `Dotzip.CentralDirectoryHeader` module is a low level module used to + decode and encode Local File Header data-structure. This module + should not be used by developers as it, and can be changed at + anytime. Only stable interfaces are `decode/1`, `decode/2`, + `encode/1` and `encode/2` functions. Generated data structure may + change during development phase. + """ + def signature() do << 0x50, 0x4b, 0x01, 0x02 >> end @@ -16,7 +25,7 @@ defmodule Dotzip.CentralDirectoryHeader do {:ok, Map.put(data, :version_made, decode_version_made_type(version)), rest} end - defp decode_version_made_type(<>) do + defp decode_version_made_type(<>) do # this algorithm seems false, at least for the version # need to be investigated attribute = case version do @@ -202,7 +211,7 @@ defmodule Dotzip.CentralDirectoryHeader do end def decode(data) when is_bitstring(data) do - {:ok, central_directory_header, rest} = signature(data) + signature(data) |> version_made() |> version_needed() |> purpose_flag() diff --git a/lib/dotzip/compression_method.ex b/lib/dotzip/compression_method.ex new file mode 100644 index 0000000..baa1723 --- /dev/null +++ b/lib/dotzip/compression_method.ex @@ -0,0 +1,8 @@ +defmodule Dotzip.CompressionMethod do + + def decode() do + end + + def encode() do + end +end diff --git a/lib/dotzip/crc32.ex b/lib/dotzip/crc32.ex new file mode 100644 index 0000000..c262854 --- /dev/null +++ b/lib/dotzip/crc32.ex @@ -0,0 +1,17 @@ +defmodule Dotzip.Crc32 do + def raw(bitstring) do + raw(bitstring, []) + end + def raw(bitstring, _opts) do + checksum = :erlang.crc32(bitstring) + {:ok, <>} + end + + def file(path) do + file(path, []) + end + def file(path, opts) do + {:ok, content} = File.read(path) + raw(content, opts) + end +end diff --git a/lib/dotzip/date.ex b/lib/dotzip/date.ex new file mode 100644 index 0000000..1c2e5ad --- /dev/null +++ b/lib/dotzip/date.ex @@ -0,0 +1,122 @@ +defmodule Dotzip.Date do + + @moduledoc """ + + This module implement MS-DOS Date format. Here some source if you + want Microsoft Date Format specification: + + - https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-dosdatetimetofiletime?redirectedfrom=MSDN + - https://docs.microsoft.com/en-us/cpp/c-runtime-library/32-bit-windows-time-date-formats?view=msvc-170 + + """ + + @doc ~S""" + + `decode/1` function decode a binary string and convert it in + `Date.t()` data type. + + ## Examples + + iex> Dotzip.Date.decode(<<0x9c, 0x53>>) + {:ok, ~D[2021-12-28]} + + """ + @spec decode(bitstring()) :: {:ok, Date.t()} + def decode(<> = _bitstring) do + <> = <> + Date.new(1980+offset, month, day) + end + + @doc ~S""" + + `decode!/1` function decode a binary string and convert it in + `Date.t()` data type. + + ## Examples + + iex> Dotzip.Date.decode!(<<0x9c, 0x53>>) + ~D[2021-12-28] + + """ + @spec decode!(bitstring()) :: Date.t() + def decode!(bitstring) do + {:ok, decoded} = decode(bitstring) + decoded + end + + @doc ~S""" + + `encode/1` function encode a `Date.t()` type into MS-DOS Date + Format. + + ## Examples + + iex> Dotzip.Date.encode(~D[2021-12-28]) + {:ok, <<156, 83>>} + + """ + @spec encode(Date.t()) :: {:ok, bitstring()} + def encode(date) do + case date.year >= 1980 and date.year <= 2107 do + true -> + day = date.day + month = date.month + offset = date.year - 1980 + <> = <> + {:ok, <>} + false -> + {:error, "year is less than 1980 or greater than 2108"} + end + end + + @doc ~S""" + + `encode!/1` function encode a `Date.t()` type into MS-DOS Date + Format. + + ## Examples + + iex> Dotzip.Date.encode!(~D[2021-12-28]) + <<156, 83>> + + """ + @spec encode!(Date.t()) :: bitstring() + def encode!(date) do + {:ok, encoded} = encode(date) + encoded + end + + @doc ~S""" + + `encode/3` function encode year, month and day in MS-DOS Date + Format. + + ## Examples + + iex> Dotzip.Date.encode(2021,12,28) + {:ok, <<156, 83>>} + + """ + @spec encode(integer(), integer(), integer()) :: {:ok, bitstring()} + def encode(year, month, day) do + {:ok, date} = Date.new(year, month, day) + encode(date) + end + + @doc ~S""" + + `encode!/3` function encode year, month and day in MS-DOS Date + Format. + + ## Examples + + iex> Dotzip.Date.encode!(2021,12,28) + <<156, 83>> + + """ + @spec encode!(integer(), integer(), integer()) :: bitstring() + def encode!(year, month, day) do + {:ok, encoded} = encode(year, month, day) + encoded + end +end diff --git a/lib/dotzip/decode.ex b/lib/dotzip/decode.ex new file mode 100644 index 0000000..7aeb774 --- /dev/null +++ b/lib/dotzip/decode.ex @@ -0,0 +1,58 @@ +defmodule Dotzip.Decode do + + def raw(data) do + raw(data, []) + end + + defp raw(<<>>, list) do + {:ok, list} + end + + defp raw(data, list) do + pattern = { local_file_header?(data), + central_directory_header?(data), + end_central_directory?(data) } + case pattern do + {true,_,_} -> + {:ok, result, rest} = Dotzip.LocalFileHeader.decode(data) + raw(rest, [result|list]) + {_,true,_} -> + {:ok, result, rest} = Dotzip.CentralDirectoryHeader.decode(data) + raw(rest, [result|list]) + {_,_,true} -> + {:ok, result, rest} = Dotzip.EndCentralDirectory.decode(data) + raw(rest, [result|list]) + {_,_,_} -> + {:ok, list} + end + end + + def file(file) do + {:ok, data} = File.read(file) + raw(data) + end + + def encode(_data) do + {:error, :not_supported} + end + + def encode_file(_file) do + {:error, :not_supported} + end + + def end_central_directory?(<>) do + end_central_directory = Dotzip.EndCentralDirectory.signature() + signature == end_central_directory + end + + def central_directory_header?(<>) do + central_directory_header = Dotzip.CentralDirectoryHeader.signature() + signature == central_directory_header + end + + def local_file_header?(<>) do + local_file_header = Dotzip.LocalFileHeader.signature() + signature == local_file_header + end + +end diff --git a/lib/dotzip/encode.ex b/lib/dotzip/encode.ex new file mode 100644 index 0000000..3dba532 --- /dev/null +++ b/lib/dotzip/encode.ex @@ -0,0 +1,3 @@ +defmodule Dotzip.Encode do + +end diff --git a/lib/dotzip/end_central_directory.ex b/lib/dotzip/end_central_directory.ex index 00d2525..163c640 100644 --- a/lib/dotzip/end_central_directory.ex +++ b/lib/dotzip/end_central_directory.ex @@ -79,7 +79,7 @@ defmodule Dotzip.EndCentralDirectory do end def decode(file) do - {:ok, end_central_directory, rest} = signature(file) + signature(file) |> number_disk() |> number_disk_start() |> total_entries_disk() diff --git a/lib/dotzip/extra_field.ex b/lib/dotzip/extra_field.ex index af3cdff..7b99a61 100644 --- a/lib/dotzip/extra_field.ex +++ b/lib/dotzip/extra_field.ex @@ -1,9 +1,13 @@ defmodule Dotzip.ExtraField do + @moduledoc """ + """ + def encode() do end - def decode() do + def decode(bitstring), do: decode(bitstring, []) + def decode(_bitstring, _opts) do end end diff --git a/lib/dotzip/extra_field/ntfs.ex b/lib/dotzip/extra_field/ntfs.ex new file mode 100644 index 0000000..fb40ab4 --- /dev/null +++ b/lib/dotzip/extra_field/ntfs.ex @@ -0,0 +1,43 @@ +defmodule Dotzip.ExtraField.Ntfs do + + @moduledoc """ + + The following is the layout of the NTFS attributes + "extra" block. (Note: At this time the Mtime, Atime + and Ctime values MAY be used on any WIN32 system.) + + Note: all fields stored in Intel low-byte/high-byte order. + + Value Size Description + ----- ---- ----------- + 0x000a 2 bytes Tag for this "extra" block type + TSize 2 bytes Size of the total "extra" block + Reserved 4 bytes Reserved for future use + Tag1 2 bytes NTFS attribute tag value #1 + Size1 2 bytes Size of attribute #1, in bytes + (var) Size1 Attribute #1 data + . + . + . + TagN 2 bytes NTFS attribute tag value #N + SizeN 2 bytes Size of attribute #N, in bytes + (var) SizeN Attribute #N data + + For NTFS, values for Tag1 through TagN are as follows: + (currently only one set of attributes is defined for NTFS) + + Tag Size Description + ----- ---- ----------- + 0x0001 2 bytes Tag for attribute #1 + Size1 2 bytes Size of attribute #1, in bytes + Mtime 8 bytes File last modification time + Atime 8 bytes File last access time + Ctime 8 bytes File creation time + + """ + + defstruct tsize: 0, reserved: <<>>, mtime: 0, atime: 0, ctime: 0, tags: [] + + def tag(), do: <<0x00, 0x0a>> + +end diff --git a/lib/dotzip/extra_field/openvms.ex b/lib/dotzip/extra_field/openvms.ex new file mode 100644 index 0000000..387638b --- /dev/null +++ b/lib/dotzip/extra_field/openvms.ex @@ -0,0 +1,47 @@ +defmodule Dotzip.ExtraField.Openvms do + + @moduledoc """ + + The following is the layout of the OpenVMS attributes + "extra" block. + + Note: all fields stored in Intel low-byte/high-byte order. + + Value Size Description + ----- ---- ----------- + 0x000c 2 bytes Tag for this "extra" block type + TSize 2 bytes Size of the total "extra" block + CRC 4 bytes 32-bit CRC for remainder of the block + Tag1 2 bytes OpenVMS attribute tag value #1 + Size1 2 bytes Size of attribute #1, in bytes + (var) Size1 Attribute #1 data + . + . + . + TagN 2 bytes OpenVMS attribute tag value #N + SizeN 2 bytes Size of attribute #N, in bytes + (var) SizeN Attribute #N data + + OpenVMS Extra Field Rules: + + 4.5.6.1. There will be one or more attributes present, which + will each be preceded by the above TagX & SizeX values. + These values are identical to the ATR$C_XXXX and ATR$S_XXXX + constants which are defined in ATR.H under OpenVMS C. Neither + of these values will ever be zero. + + 4.5.6.2. No word alignment or padding is performed. + + 4.5.6.3. A well-behaved PKZIP/OpenVMS program should never produce + more than one sub-block with the same TagX value. Also, there will + never be more than one "extra" block of type 0x000c in a particular + directory record. + + """ + + defstruct tsize: 0, crc: 0, tags: [] + + def tag(), do: <<0x00, 0x0c>> + +end + diff --git a/lib/dotzip/extra_field/os2.ex b/lib/dotzip/extra_field/os2.ex new file mode 100644 index 0000000..23ee83f --- /dev/null +++ b/lib/dotzip/extra_field/os2.ex @@ -0,0 +1,34 @@ +defmodule Dotzip.ExtraField.Os2 do + + @moduledoc """ + + The following is the layout of the OS/2 attributes "extra" + block. (Last Revision 09/05/95) + + Note: all fields stored in Intel low-byte/high-byte order. + + Value Size Description + ----- ---- ----------- + 0x0009 2 bytes Tag for this "extra" block type + TSize 2 bytes Size for the following data block + BSize 4 bytes Uncompressed Block Size + CType 2 bytes Compression type + EACRC 4 bytes CRC value for uncompress block + (var) variable Compressed block + + The OS/2 extended attribute structure (FEA2LIST) is + compressed and then stored in its entirety within this + structure. There will only ever be one "block" of data in + VarFields[]. + + """ + + defstruct ctype: 0, block: <<>> + + def tag(), do: <<0x00, 0x09>> + + def encode(_data), do: {:error, :not_implemented} + + def decode(_data), do: {:error, :not_implemented} + +end diff --git a/lib/dotzip/extra_field/patch.ex b/lib/dotzip/extra_field/patch.ex new file mode 100644 index 0000000..1adfcd4 --- /dev/null +++ b/lib/dotzip/extra_field/patch.ex @@ -0,0 +1,5 @@ +defmodule Dotzip.ExtraField.Patch do + + def tag(), do: <<0x00, 0x0f>> + +end diff --git a/lib/dotzip/extra_field/pkcs7.ex b/lib/dotzip/extra_field/pkcs7.ex new file mode 100644 index 0000000..750db5c --- /dev/null +++ b/lib/dotzip/extra_field/pkcs7.ex @@ -0,0 +1,5 @@ +defmodule Dotzip.ExtraField.Pkcs7 do + + def tag(), do: <<0x00, 0x14>> + +end diff --git a/lib/dotzip/extra_field/strong_encryption.ex b/lib/dotzip/extra_field/strong_encryption.ex new file mode 100644 index 0000000..4fe872a --- /dev/null +++ b/lib/dotzip/extra_field/strong_encryption.ex @@ -0,0 +1,5 @@ +defmodule Dotzip.ExtraField.StrongEncryption do + + def tag(), do: <<0x00, 0x17>> + +end diff --git a/lib/dotzip/extra_field/unix.ex b/lib/dotzip/extra_field/unix.ex index c071146..04342b0 100644 --- a/lib/dotzip/extra_field/unix.ex +++ b/lib/dotzip/extra_field/unix.ex @@ -2,18 +2,92 @@ defmodule Dotzip.ExtraField.Unix do @moduledoc """ - This module encode and decode Unix extra field defined in section - 4.5.7 of the official documentation. + The following is the layout of the UNIX "extra" block. + Note: all fields are stored in Intel low-byte/high-byte + order. + + Value Size Description + ----- ---- ----------- + 0x000d 2 bytes Tag for this "extra" block type + TSize 2 bytes Size for the following data block + Atime 4 bytes File last access time + Mtime 4 bytes File last modification time + Uid 2 bytes File user ID + Gid 2 bytes File group ID + (var) variable Variable length data field + + The variable length data field will contain file type + specific data. Currently the only values allowed are + the original "linked to" file names for hard or symbolic + links, and the major and minor device node numbers for + character and block device nodes. Since device nodes + cannot be either symbolic or hard links, only one set of + variable length data is stored. Link files will have the + name of the original file stored. This name is NOT NULL + terminated. Its size can be determined by checking TSize - + 12. Device entries will have eight bytes stored as two 4 + byte entries (in little endian format). The first entry + will be the major device number, and the second the minor + device number. """ - defstruct atime: 0, mtime: 0, uid: 0, gid: 0, var: 0 + defstruct [ + atime: 0, + mtime: 0, + uid: 0, + gid: 0, + var: 0 + ] + + @spec tag() :: bitstring() + def tag(), do: <<0x00, 0x0d>> - defp tag() do - <<0x00, 0x0d>> + @spec is?(bitstring()) :: boolean() + def is?(<>), do: tag == tag() + + @spec decode(bitstring(), Keyword.t()) :: {:ok, map()} + def decode(bitstring, opts) do + {%{}, bitstring} + |> decode_tag(opts) + |> decode_gid(opts) + |> decode_uid(opts) + |> decode_mtime(opts) + |> decode_atime(opts) + |> decode_tsize(opts) end - - defp encode_tag({:ok, data, buffer}) do + + defp decode_tag({struct, <<0x00, 0x0d, rest ::bitstring>>}, _opts) do + {struct, rest} + end + + defp decode_tsize({struct, <>}, _opts) do + {Map.put(struct, :tsize, tsize), rest} + end + + defp decode_atime({struct, <>}, _opts) do + {Map.put(struct, :atime, atime), rest} + end + + defp decode_mtime({struct, <>}, _opts) do + {Map.put(struct, :mtime, mtime), rest} + end + + defp decode_uid({struct, <>}, _opts) do + {Map.put(struct, :uid, uid), rest} + end + + defp decode_gid({struct, <>}, _opts) do + {Map.put(struct, :gid, gid), rest} + end + + defp decode_variable_data_field({%{ tsize: tsize } = struct, data}, _opts) do + << var :: binary-little-size(tsize), rest :: bitstring >> = data + {Map.put(struct, :variable_data_field, var), rest} + end + + defp encode_tag({data, buffer}) do + tag = tag() {:ok, data, <>} end diff --git a/lib/dotzip/extra_field/x509.ex b/lib/dotzip/extra_field/x509.ex new file mode 100644 index 0000000..64af64e --- /dev/null +++ b/lib/dotzip/extra_field/x509.ex @@ -0,0 +1,11 @@ +defmodule Dotzip.ExtraField.X509.Individual do + + def tag(), do: <<0x00, 0x15>> + +end + +defmodule Dotzip.ExtraField.X509.Central do + + def tag(), do: <<0x00, 0x16>> + +end diff --git a/lib/dotzip/extra_field/zip64_extended_information.ex b/lib/dotzip/extra_field/zip64_extended_information.ex new file mode 100644 index 0000000..b7a1180 --- /dev/null +++ b/lib/dotzip/extra_field/zip64_extended_information.ex @@ -0,0 +1,45 @@ +defmodule Dotzip.ExtraField.Zip64ExtendedInformation do + + @spec tag() :: bitstring() + def tag(), do: <<0x01, 0x00>> + + @spec decode(bitstring()) :: {:ok, map(), bitstring()} + def decode(data), do: decode(data, []) + + @spec decode(bitstring(), Keyword.t()) :: {:ok, map(), bitstring()} + def decode(data, opts) do + {struct, rest} = data + |> decode_tag(opts) + |> decode_size(opts) + |> decode_original_size(opts) + |> decode_compressed_size(opts) + |> decode_relative_header_offset(opts) + |> decode_disk_start_number(opts) + {:ok, struct, rest} + end + + defp decode_tag(<<0x01, 0x00, rest :: bitstring>>, _opts) do + {%{}, rest} + end + + defp decode_size({struct, <>}, _opts) do + {Map.put(struct, :size, size), rest} + end + + defp decode_original_size({struct, <>}, _opts) do + {Map.put(struct, :original_size, original_size), rest} + end + + defp decode_compressed_size({struct, <>}, _opts) do + {Map.put(struct, :compressed_size, compressed_size), rest} + end + + defp decode_relative_header_offset({struct, <>}, _opts) do + {Map.put(struct, :relative_header_offset, offset), rest} + end + + defp decode_disk_start_number({struct, <>}, _opts) do + {Map.put(struct, :disk_start_number, disk), rest} + end + +end diff --git a/lib/dotzip/format.ex b/lib/dotzip/format.ex deleted file mode 100644 index a298997..0000000 --- a/lib/dotzip/format.ex +++ /dev/null @@ -1,13 +0,0 @@ -# https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-dosdatetimetofiletime?redirectedfrom=MSDN - -defmodule Dotzip.Format.Msdos do - - def decode_date(<>) do - Date.new(1980+offset, month, day) - end - - def decode_time(<>) do - Time.new(hour, minute, second*2, 0) - end - -end diff --git a/lib/dotzip/general_purpose_bit_flag.ex b/lib/dotzip/general_purpose_bit_flag.ex new file mode 100644 index 0000000..8faf968 --- /dev/null +++ b/lib/dotzip/general_purpose_bit_flag.ex @@ -0,0 +1,103 @@ +defmodule Dotzip.GeneralPurposeBitFlag do + + defstruct [ + encrypted: false, + compression_bits: [0, 0], + data_descriptor_crc: false, + enhanced_deflating: false, + compressed_patched_data: false, + strong_encryption: false, + efs: false, + enhanced_compression: false, + masked_encryption: false + ] + + @spec decode(bitstring()) :: {:ok, map(), bitstring()} + def decode(<>) do + {struct, _r} = {%Dotzip.GeneralPurposeBitFlag{}, flags} + |> decode_encrypted() + |> decode_compression_bits() + |> decode_data_descriptor_crc32() + |> decode_enhanced_deflating() + |> decode_compressed_patched_data() + |> decode_strong_encryption() + |> decode_unused() + |> decode_unused() + |> decode_unused() + |> decode_unused() + |> decode_efs() + |> decode_enhanced_compression() + |> decode_masked_encryption() + |> decode_reserved() + |> decode_reserved() + {:ok, struct, rest} + end + + defp decode_encrypted({struct, <<0::size(1), rest :: bitstring>>}) do + {Map.put(struct, :encrypted, :false), rest} + end + defp decode_encrypted({struct, <<1::size(1), rest :: bitstring>>}) do + {Map.put(struct, :encrypted, :true), rest} + end + + defp decode_compression_bits({struct, <>}) do + {Map.put(struct, :compression_bits, {bit1, bit2}), rest} + end + + defp decode_data_descriptor_crc32({struct, <>}) do + {Map.put(struct, :data_descriptor_crc, flag), rest} + end + + defp decode_enhanced_deflating({struct, <<0::size(1), rest :: bitstring>>}) do + {Map.put(struct, :enhanced_deflating, false), rest} + end + defp decode_enhanced_deflating({struct, <<1::size(1), rest :: bitstring>>}) do + {Map.put(struct, :enhanced_deflating, true), rest} + end + + defp decode_compressed_patched_data({struct, <<0::size(1), rest :: bitstring>>}) do + {Map.put(struct, :compressed_patched_data, false), rest} + end + defp decode_compressed_patched_data({struct, <<1::size(1), rest :: bitstring>>}) do + {Map.put(struct, :compressed_patched_data, true), rest} + end + + defp decode_strong_encryption({struct, <<0::size(1), rest :: bitstring>>}) do + {Map.put(struct, :strong_encryption, false), rest} + end + defp decode_strong_encryption({struct, <<1::size(1), rest :: bitstring>>}) do + {Map.put(struct, :strong_encryption, true), rest} + end + + defp decode_unused({struct, <<_::size(1), rest :: bitstring>>}) do + {struct, rest} + end + + defp decode_efs({struct, <<0::size(1), rest :: bitstring>>}) do + {Map.put(struct, :efs, false), rest} + end + defp decode_efs({struct, <<1::size(1), rest :: bitstring>>}) do + {Map.put(struct, :efs, true), rest} + end + + defp decode_enhanced_compression({struct, <<0::size(1), rest :: bitstring>>}) do + {Map.put(struct, :enhanced_compression, false), rest} + end + defp decode_enhanced_compression({struct, <<1::size(1), rest :: bitstring>>}) do + {Map.put(struct, :enhanced_compression, true), rest} + end + + defp decode_masked_encryption({struct, <<0::size(1), rest :: bitstring>>}) do + {Map.put(struct, :masked_encryption, false), rest} + end + defp decode_masked_encryption({struct, <<1::size(1), rest :: bitstring>>}) do + {Map.put(struct, :masked_encryption, true), rest} + end + + defp decode_reserved({struct, <<_::size(1), rest :: bitstring>>}) do + {struct, rest} + end + + def encode() do + end +end diff --git a/lib/dotzip/headers/local_file.ex b/lib/dotzip/headers/local_file.ex new file mode 100644 index 0000000..f03618d --- /dev/null +++ b/lib/dotzip/headers/local_file.ex @@ -0,0 +1,13 @@ +defmodule Dotzip.Headers.LocalFile do + + defstruct [ + header_signature: <<0x04, 0x03, 0x4b, 0x50>>, # 4 bytes + version: <<>>, # 2 bytes + general_purpose_bit_flag: <<>>, # 2 bytes + compression_method: <<>>, # 2 bytes + last_modification_time: <<>>, # 2 bytes + last_modification_date: <<>>, # 2 bytes + + ] + +end diff --git a/lib/dotzip/local_file_header.ex b/lib/dotzip/local_file_header.ex index 4d25fa8..bef5117 100644 --- a/lib/dotzip/local_file_header.ex +++ b/lib/dotzip/local_file_header.ex @@ -1,186 +1,282 @@ defmodule Dotzip.LocalFileHeader do - def signature() do - << 0x50, 0x4b, 0x03, 0x04 >> - end - - defp signature(<< 0x50, 0x4b, 0x03, 0x04, rest::bitstring >>) do - {:ok, %{}, rest} + @moduledoc """ + `Dotzip.LocalFileHeader` module is a low level module used to decode + and encode Local File Header data-structure. This module should not + be used by developers as it, and can be changed at anytime. Only + stable interfaces are `decode/1`, `decode/2`, `encode/1` and + `encode/2` functions. Generated data structure may change during + development phase. + """ + + @doc """ + See `decode/2` function. + """ + @spec decode(bitstring()) :: {:ok, map(), bitstring()} + def decode(data) do + decode(data, []) end - defp encode_signature(data) when is_map(data) do - {:ok, data, signature()} + @doc """ + `decode/2` function decode a binary zip payload and convert it in + `map()` data structure. Options passed as second argument can alter + the behavior of this function. + """ + @spec decode(bitstring(), Keyword.t()) :: {:ok, map(), bitstring()} + def decode(data, opts) do + {struct, rest} = data + |> decode_signature(opts) + |> decode_version(opts) + |> decode_purpose_flag(opts) + |> decode_compression_method(opts) + |> decode_last_modification_time(opts) + |> decode_last_modification_date(opts) + |> decode_crc32(opts) + |> decode_compressed_size(opts) + |> decode_uncompressed_size(opts) + |> decode_file_name_length(opts) + |> decode_extra_field_length(opts) + |> decode_file_name(opts) + |> decode_extra_field(opts) + |> decode_content(opts) + {:ok, struct, rest} end - defp version({:ok, data, << version::binary-size(2), rest::bitstring >>}) do - {:ok, Map.put(data, :version, version), rest} + @doc """ + `signature/0` returns the local file header binary signature. + """ + @spec signature() :: bitstring() + def signature(), do: << 0x50, 0x4b, 0x03, 0x04 >> + + defp decode_signature(<< 0x50, 0x4b, 0x03, 0x04, rest::bitstring >>, _opts) do + {%{}, rest} end - defp encode_version({:ok, %{ :version => version } = data, buffer}) do - {:ok, data, <>} + defp decode_version({data, << version::binary-little-size(2), rest::bitstring >>}, _opts) do + {Map.put(data, :version, version), rest} end - defp purpose_flag({:ok, data, << purpose_flag::binary-size(2), rest::bitstring >>}) do - {:ok, Map.put(data, :purpose_flag, purpose_flag), rest} - end - - defp encode_purpose_flag({:ok, %{ :purpose_flag => purpose_flag } = data, buffer }) do - {:ok, data, <> } + defp decode_purpose_flag({data, << purpose_flag::binary-little-size(2), rest::bitstring >>}, _opts) do + {Map.put(data, :purpose_flag, purpose_flag), rest} end - defp compression_method_type(data) do - case data do - 0 -> :stored - 1 -> :shrunk - 2 -> :reduced_factor1 - 3 -> :reduced_factor2 - 4 -> :reduced_factor3 - 5 -> :reduced_factor4 - 6 -> :imploded - 7 -> :tokenizing - 8 -> :deflated - 9 -> :deflate64 - 10 -> :pkware - 11 -> :reserved - 12 -> :bzip2 - 13 -> :reserved - 14 -> :lzma - 15 -> :reserved - 16 -> :reserved - 17 -> :reserved - 18 -> :terse - 19 -> :lz77 - 97 -> :wavpack - 98 -> :ppmd - end - end - - defp compression_method({:ok, data, << compression_method::little-size(16), rest::bitstring >>}) do + @spec compression_method_type(integer() | atom()) :: atom() | integer() + defp compression_method_type(0), do: :stored + defp compression_method_type(:stored), do: 0 + + defp compression_method_type(1), do: :shrunk + defp compression_method_type(:shrunk), do: 1 + + defp compression_method_type(2), do: :reduced_factor1 + defp compression_method_type(:reduced_factor1), do: 2 + + defp compression_method_type(3), do: :reduced_factor2 + defp compression_method_type(:reduced_factor2), do: 3 + + defp compression_method_type(4), do: :reduced_factor3 + defp compression_method_type(:reduced_factor3), do: 4 + + defp compression_method_type(5), do: :reduced_factor4 + defp compression_method_type(:reduced_factor4), do: 5 + + defp compression_method_type(6), do: :imploded + defp compression_method_type(:imploded), do: 6 + + defp compression_method_type(7), do: :tokenizing + defp compression_method_type(:tokenizing), do: 7 + + defp compression_method_type(8), do: :deflated + defp compression_method_type(:deflated), do: 8 + + defp compression_method_type(9), do: :deflated64 + defp compression_method_type(:deflated64), do: 9 + + defp compression_method_type(10), do: :pkware + defp compression_method_type(:pkware), do: 10 + + defp compression_method_type(11), do: :reserved + defp compression_method_type(:reserved), do: 11 + + defp compression_method_type(12), do: :bzip2 + defp compression_method_type(:bzip2), do: 12 + + defp compression_method_type(13), do: :reserved + defp compression_method_type(:reserved), do: 13 + + defp compression_method_type(14), do: :lzma + defp compression_method_type(:lzma), do: 14 + + defp compression_method_type(15), do: :reserved + defp compression_method_type(:reserved), do: 15 + + defp compression_method_type(16), do: :reserved + defp compression_method_type(:reserved), do: 16 + + defp compression_method_type(17), do: :reserved + defp compression_method_type(:reserved), do: 17 + + defp compression_method_type(18), do: :terse + defp compression_method_type(:terse), do: 18 + + defp compression_method_type(19), do: :lz77 + defp compression_method_type(:lz77), do: 19 + + defp compression_method_type(97), do: :wavpack + defp compression_method_type(:wavpack), do: 97 + + defp compression_method_type(98), do: :ppmd + defp compression_method_type(:ppmd), do: 98 + + defp decode_compression_method({data, << compression_method::little-size(16), rest::bitstring >>}, _opts) do method = compression_method_type(compression_method) - {:ok, Map.put(data, :compression_method, method), rest} + {Map.put(data, :compression_method, method), rest} end - defp encode_compression_method({:ok, %{ :compression_method => compression_method } = data, buffer}) do - {:ok, data, <> } + defp decode_last_modification_time({data, << last_modification_time::binary-little-size(2), rest::bitstring >>}, _opts) do + {:ok, decoded} = Dotzip.Time.decode(last_modification_time) + {Map.put(data, :last_modification_time, decoded), rest} end - defp last_modification_time({:ok, data, << last_modification_time::little-size(16), rest::bitstring >>}) do - {:ok, Map.put(data, :last_modification_time, last_modification_time), rest} + defp decode_last_modification_date({data, << last_modification_date::binary-little-size(2), rest::bitstring >>}, _opts) do + last_modification_date |> IO.inspect() + {:ok, decoded} = Dotzip.Date.decode(last_modification_date) + {Map.put(data, :last_modification_date, decoded), rest} end - defp encode_last_modification_time({:ok, %{ :last_modification_time => last_modification_time } = data, buffer}) do - {:ok, data, <>} + defp decode_crc32({data, << crc32::binary-size(4), rest::bitstring >>}, _opts) do + {Map.put(data, :crc32, crc32), rest} end - defp last_modification_date({:ok, data, << last_modification_date::little-binary-size(2), rest::bitstring >>}) do - {:ok, Map.put(data, :last_modification_date, last_modification_date), rest} + defp decode_extra_field_length({data, << extra_field_length::little-size(16), rest::bitstring >>}, _opts) do + {Map.put(data, :extra_field_length, extra_field_length), rest} end - defp encode_last_modification_date({:ok, %{ :last_modification_date => last_modification_date } = data, buffer}) do - {:ok, data, <>} + defp decode_compressed_size({data, << compressed_size::little-size(32), rest::bitstring >>}, _opts) do + {Map.put(data, :compressed_size, compressed_size), rest} end - defp crc32({:ok, data, << crc32::binary-size(4), rest::bitstring >>}) do - {:ok, Map.put(data, :crc32, crc32), rest} + defp decode_uncompressed_size({data, << uncompressed_size::little-size(32), rest::bitstring >>}, _opts) do + {Map.put(data, :uncompressed_size, uncompressed_size), rest} end - defp encode_crc32({:ok, %{ :crc32 => crc32 } = data, buffer}) do - {:ok, data, <> } + defp decode_file_name_length({data, << file_name_length::little-size(16), rest::bitstring >>}, _opts) do + {Map.put(data, :file_name_length, file_name_length), rest } end - defp compressed_size({:ok, data, << compressed_size::little-size(32), rest::bitstring >>}) do - {:ok, Map.put(data, :compressed_size, compressed_size), rest} - end - - defp encode_compressed_size({:ok, %{ :compressed_size => compressed_size } = data, buffer}) do - {:ok, data, <>} - end - - defp uncompressed_size({:ok, data, << uncompressed_size::little-size(32), rest::bitstring >>}) do - {:ok, Map.put(data, :uncompressed_size, uncompressed_size), rest} - end - - defp encode_uncompressed_size({:ok, %{ :uncompressed_size => uncompressed_size } = data, buffer }) do - {:ok, data, <>} - end - - defp file_name_length({:ok, data, << file_name_length::little-size(16), rest::bitstring >>}) do - {:ok, Map.put(data, :file_name_length, file_name_length), rest } - end - - defp encode_file_name_length({:ok, %{ :file_name_length => file_name_length } = data, buffer}) do - {:ok, data, <> } - end - - defp extra_field_length({:ok, data, << extra_field_length::little-size(16), rest::bitstring >>}) do - {:ok, Map.put(data, :extra_field_length, extra_field_length), rest} - end - - defp encode_extra_field_length({:ok, %{ :extra_field_length => extra_field_length } = data, buffer}) do - {:ok, data, <>} - end - - defp file_name({:ok, data, rest}) do + defp decode_file_name({data, rest}, _opts) do %{ :file_name_length => file_name_length } = data <> = rest - {:ok, Map.put(data, :file_name, file_name), r} + {Map.put(data, :file_name, file_name), r} end - defp encode_file_name({:ok, %{ :file_name => file_name, :file_name_length => file_name_length } = data, buffer}) do - {:ok, data, <>} - end - - defp extra_field({:ok, data, rest}) do + defp decode_extra_field({data, rest}, _opts) do %{ :extra_field_length => extra_field_length } = data <> = rest - {:ok, Map.put(data, :extra_field, extra_field), r} + {Map.put(data, :extra_field, extra_field), r} end - defp encode_extra_field({:ok, %{ :extra_field => extra_field, :extra_field_length => extra_field_length } = data, buffer}) do - {:ok, data, <>} + defp decode_content({data, rest}, opts) do + preload = Keyword.get(opts, :preload, :false) + case preload do + false -> + %{ compressed_size: compressed_size } = data + <> = rest + {Map.put(data, :content, content), r} + true -> + %{ compressed_size: compressed_size } = data + <<_::binary-size(compressed_size), r::bitstring>> = rest + {Map.put(data, :content, {:ref, :wip}), r} + end end - defp content({:ok, data, rest}) do - %{ :compressed_size => compressed_size } = data - <> = rest - {:ok, Map.put(data, :content, content), r} + + @doc """ + See `encode/2` function. + """ + @spec encode(map()) :: {:ok, bitstring()} + def encode(struct) do + encode(struct, []) end - defp encode_content({:ok, %{ :compressed_size => compressed_size, :content => content } = data, buffer}) do - {:ok, data, <>} - end - - def decode(data) do - signature(data) - |> version() - |> purpose_flag() - |> compression_method() - |> last_modification_time() - |> last_modification_date() - |> crc32() - |> compressed_size() - |> uncompressed_size() - |> file_name_length() - |> extra_field_length() - |> file_name() - |> extra_field() - |> content() + @doc """ + `encode/2` function takes a `map()` structure and encode it in + `bitstring()`. Options can alter the behaviour of the encoding. + """ + @spec encode(map(), Keyword.t()) :: bitstring() + def encode(struct, opts) do + ret = encode_signature(struct, opts) + |> encode_version(opts) + |> encode_purpose_flag(opts) + |> encode_compression_method(opts) + |> encode_last_modification_time(opts) + |> encode_last_modification_date(opts) + |> encode_crc32(opts) + |> encode_compressed_size(opts) + |> encode_uncompressed_size(opts) + |> encode_file_name_length(opts) + |> encode_extra_field_length(opts) + |> encode_file_name(opts) + |> encode_extra_field(opts) + |> encode_content(opts) + {:ok, ret} end - def encode(data) do - encode_signature(data) - |> encode_version() - |> encode_purpose_flag() - |> encode_compression_method() - |> encode_last_modification_time() - |> encode_last_modification_date() - |> encode_crc32() - |> encode_compressed_size() - |> encode_uncompressed_size() - |> encode_file_name_length() - |> encode_extra_field_length() - |> encode_file_name() - |> encode_extra_field() - |> encode_content() - end + defp encode_signature(data, _opts) when is_map(data) do + {data, signature()} + end + + defp encode_version({%{ :version => version } = data, buffer}, _opts) do + {data, <>} + end + + defp encode_purpose_flag({%{ :purpose_flag => purpose_flag } = data, buffer }, _opts) do + {data, <> } + end + + defp encode_compression_method({%{ :compression_method => compression_method } = data, buffer}, _opts) do + type = compression_method_type(compression_method) + {data, <> } + end + + defp encode_last_modification_time({ %{ :last_modification_time => last_modification_time } = data, buffer}, _opts) do + {:ok, encoded} = Dotzip.Time.encode(last_modification_time) + {data, <>} + end + + defp encode_last_modification_date({ %{ :last_modification_date => last_modification_date } = data, buffer}, _opts) do + {:ok, encoded} = Dotzip.Date.encode(last_modification_date) + {data, <>} + end + + defp encode_crc32({ %{ :crc32 => crc32 } = data, buffer}, _opts) do + {data, <> } + end + + defp encode_compressed_size({ %{ :compressed_size => compressed_size } = data, buffer}, _opts) do + {data, <>} + end + + defp encode_uncompressed_size({ %{ :uncompressed_size => uncompressed_size } = data, buffer }, _opts) do + {data, <>} + end + + defp encode_file_name_length({ %{ :file_name_length => file_name_length } = data, buffer}, _opts) do + {data, <> } + end + + defp encode_extra_field_length({ %{ :extra_field_length => extra_field_length } = data, buffer}, _opts) do + {data, <>} + end + + defp encode_file_name({ %{ :file_name => file_name, :file_name_length => file_name_length } = data, buffer}, _opts) do + {data, <>} + end + + defp encode_extra_field({%{ :extra_field => extra_field, :extra_field_length => extra_field_length } = data, buffer}, _opts) do + {data, <>} + end + + defp encode_content({%{ :compressed_size => compressed_size, :content => content } = data, buffer}, _opts) do + {data, <>} + end end diff --git a/lib/dotzip/server.ex b/lib/dotzip/server.ex new file mode 100644 index 0000000..7fb611e --- /dev/null +++ b/lib/dotzip/server.ex @@ -0,0 +1,8 @@ +defmodule Dotzip.Server do + + use GenServer + + def init(_args) do + {:ok, %{}} + end +end diff --git a/lib/dotzip/third_party.ex b/lib/dotzip/third_party.ex new file mode 100644 index 0000000..85865e7 --- /dev/null +++ b/lib/dotzip/third_party.ex @@ -0,0 +1,2 @@ +defmodule Dotzip.ThirdParty do +end diff --git a/lib/dotzip/third_party/extended_timestamp.ex b/lib/dotzip/third_party/extended_timestamp.ex new file mode 100644 index 0000000..3b9d345 --- /dev/null +++ b/lib/dotzip/third_party/extended_timestamp.ex @@ -0,0 +1,185 @@ +defmodule Dotzip.ThirdParty.ExtendedTimestamp do + + @moduledoc ~S""" + + Extended Timestamp Extra Field Naive Implementation. This code is + currently not safe. This module is a (really) low interface to + generate extension. + + """ + + @spec tag() :: bitstring() + def tag(), do: <<0x55, 0x54>> + + @doc ~S""" + See `decode/2` function. + """ + @spec decode(bitstring()) :: {:ok, map(), bitstring()} + def decode(data), do: decode(data, []) + + @doc ~S""" + + This code is currently not safe. + + ## Examples + + iex> Dotzip.ThirdParty.ExtendedTimestamp.decode(<<85, 84, 9, 0, 3, 78, 231, 202, 97, 78, 231, 202, 97>>) + {:ok, %{ atime: ~U[2021-12-28 10:30:38Z], flags: %{atime: true, ctime: false, mtime: true}, mtime: ~U[2021-12-28 10:30:38Z], + tsize: 9}, ""} + + """ + @spec decode(bitstring(), Keyword.t()) :: {:ok, map(), bitstring()} + def decode(data, opts) do + {struct, rest} = data + |> decode_tag(opts) + |> decode_tsize(opts) + |> decode_flags(opts) + |> decode_mtime(opts) + |> decode_atime(opts) + |> decode_ctime(opts) + {:ok, struct, rest} + end + + @doc ~S""" + See `decode/2` function. + """ + @spec decode!(bitstring()) :: {map(), bitstring()} + def decode!(data), do: decode!(data, []) + + @doc ~S""" + See `decode/2` function. + """ + @spec decode!(bitstring(), Keyword.t()) :: {map(), bitstring()} + def decode!(data, opts) do + {:ok, struct, rest} = decode(data, opts) + {struct, rest} + end + + defp decode_tag(<<0x55, 0x54, rest :: bitstring>>, _opts) do + {%{}, rest} + end + + defp decode_tsize({struct, <>}, _opts) do + {Map.put(struct, :tsize, tsize), rest} + end + + defp decode_flags({struct, <>}, _opts) do + << _reserved :: size(5), ctime :: size(1), atime :: size(1), mtime :: size(1) >> = flags + decoded = %{ + mtime: to_boolean(mtime), + atime: to_boolean(atime), + ctime: to_boolean(ctime) + } + {Map.put(struct, :flags, decoded), rest} + end + + defp decode_mtime({%{ flags: %{ mtime: true } } = struct, <>}, _opts) do + {:ok, decoded} = DateTime.from_unix(mtime) + {Map.put(struct, :mtime, decoded), rest} + end + defp decode_mtime({struct, rest}, _opts), do: {struct, rest} + + defp decode_atime({%{ flags: %{ atime: true } } = struct, <>}, _opts) do + {:ok, decoded} = DateTime.from_unix(atime) + {Map.put(struct, :atime, decoded), rest} + end + defp decode_atime({struct, rest}, _opts), do: {struct, rest} + + defp decode_ctime({%{ flags: %{ ctime: true }} = struct, <>}, _opts) do + {:ok, decoded} = DateTime.from_unix(ctime) + {Map.put(struct, :ctime, decoded), rest} + end + defp decode_ctime({struct, rest}, _opts), do: {struct, rest} + + defp to_boolean(0), do: false + defp to_boolean(1), do: true + + defp from_boolean(:false), do: 0 + defp from_boolean(:true), do: 1 + + @doc ~S""" + See `encode/2` function. + """ + @spec encode(map()) :: {:ok, bitstring()} + def encode(decoded), do: encode(decoded, []) + + @doc ~S""" + + Warning: This code is currently not safe. + + `encode/2` function encode a map structure in to bitstring. + + ## Examples + + iex> Dotzip.ThirdParty.ExtendedTimestamp.encode(%{ atime: 1640687438, flags: %{atime: true, ctime: false, mtime: true}, mtime: 1640687438}), + {:ok, <<85, 84, 9, 0, 3, 78, 231, 202, 97, 78, 231, 202, 97>>} + + """ + @spec encode(map(), Keyword.t()) :: {:ok, bitstring()} + def encode(decoded, opts) do + {_, encoded} = decoded + |> encode_tag(opts) + |> encode_tsize(opts) + |> encode_flags(opts) + |> encode_mtime(opts) + |> encode_atime(opts) + |> encode_ctime(opts) + {:ok, encoded} + end + + @doc ~S""" + See `encode/2` function. + """ + @spec encode!(map()) :: bitstring() + def encode!(decoded), do: encode(decoded, []) + + @doc ~S""" + See `encode/2` function. + """ + @spec encode!(map(), Keyword.t()) :: bitstring() + def encode!(decoded, opts) do + {:ok, encoded} = encode(decoded, opts) + encoded + end + + defp encode_tag(struct, _opts) do + {struct, tag()} + end + + @spec make_tsize(map(), integer()) :: integer() + defp make_tsize(flags, init) do + Enum.reduce(flags, init, fn + ({_, true}, a) -> a+1 + ({_, false}, a) -> a + end) + end + + defp encode_tsize({ %{ flags: flags }= struct, buffer}, _opts) do + tsize = (make_tsize(flags, 0)*4)+1 + {struct, <>} + end + + defp encode_flags({ %{ flags: %{ atime: atime, ctime: ctime, mtime: mtime } } = struct, buffer}, _opts) do + activated = <> + {struct, <>} + end + + defp encode_mtime({%{ flags: %{ mtime: true }, mtime: mtime } = struct, buffer}, _opts) do + encoded = DateTime.to_unix(mtime) + {struct, <>} + end + defp encode_mtime({struct, rest}, _opts), do: {struct, rest} + + defp encode_atime({%{ flags: %{ atime: true }, atime: atime } = struct, buffer}, _opts) do + encoded = DateTime.to_unix(atime) + {struct, <>} + end + defp encode_atime({struct, rest}, _opts), do: {struct, rest} + + defp encode_ctime({%{ flags: %{ ctime: true }, ctime: ctime} = struct, buffer}, _opts) do + encoded = DateTime.to_unix(ctime) + {struct, <>} + end + defp encode_ctime({struct, rest}, _opts), do: {struct, rest} + +end diff --git a/lib/dotzip/third_party/info_zip_unix_new.ex b/lib/dotzip/third_party/info_zip_unix_new.ex new file mode 100644 index 0000000..fecfb96 --- /dev/null +++ b/lib/dotzip/third_party/info_zip_unix_new.ex @@ -0,0 +1,66 @@ +defmodule Dotzip.ThirdParty.InfoZipUnixNew do + + @moduledoc """ + + Info-ZIP New Unix Extra Field Third Party Naive Implementation. This + code is currently not safe. + + """ + + @spec tag() :: bitstring() + def tag(), do: <<0x78, 0x75>> + + @spec decode(bitstring()) :: {:ok, map(), bitstring()} + def decode(data), do: decode(data, []) + + @doc """ + + This code is currently not safe. + + ## Examples + + iex> Dotzip.ThirdParty.InfoZipUnixNew.decode(<<117, 120, 11, 0, 1, 4, 232, 3, 0, 0, 4, 232, 3, 0, 0>>) + {:ok, %{gid: 1000, gid_size: 32, tsize: 11, uid: 1000, uid_size: 32, version: 1}, ""} + + """ + @spec decode(bitstring(), Keyword.t()) :: {:ok, map(), bitstring()} + def decode(data, opts) do + {struct, rest} = data + |> decode_tag(opts) + |> decode_tsize(opts) + |> decode_version(opts) + |> decode_uid_size(opts) + |> decode_uid(opts) + |> decode_gid_size(opts) + |> decode_gid(opts) + {:ok, struct, rest} + end + + defp decode_tag(<<0x75, 0x78, rest :: bitstring>>, _opts), do: {%{}, rest} + + defp decode_tsize({ struct, <>}, _opts) do + {Map.put(struct, :tsize, tsize), rest} + end + + defp decode_version({ struct, <>}, _opts) do + {Map.put(struct, :version, version), rest} + end + + defp decode_uid_size({ struct, <>}, _opts) do + {Map.put(struct, :uid_size, uid_size*8), rest} + end + + defp decode_uid({ %{ uid_size: uid_size } = struct, data}, _opts) do + << uid :: little-size(uid_size), rest :: bitstring >> = data + {Map.put(struct, :uid, uid), rest} + end + + defp decode_gid_size({ struct, <>}, _opts) do + {Map.put(struct, :gid_size, gid_size*8), rest} + end + + defp decode_gid({ %{ gid_size: gid_size } = struct, data}, _opts) do + << gid :: little-size(gid_size), rest :: bitstring >> = data + {Map.put(struct, :gid, gid), rest} + end +end diff --git a/lib/dotzip/time.ex b/lib/dotzip/time.ex new file mode 100644 index 0000000..22dd888 --- /dev/null +++ b/lib/dotzip/time.ex @@ -0,0 +1,124 @@ +defmodule Dotzip.Time do + + @moduledoc """ + + This module implement MS-DOS Time format. Here some source if you + want Microsoft Time Format specification: + + - https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-dosdatetimetofiletime?redirectedfrom=MSDN + - https://docs.microsoft.com/en-us/cpp/c-runtime-library/32-bit-windows-time-date-formats?view=msvc-170 + + """ + + @doc """ + + `decode/1` function decode MS-DOS Time format. This function + explicitely convert data from big-endian to little-endian. + + ## Examples + + This following example is from `a.zip` text file present in + `test/fixtures/a.zip`. + + iex> Dotzip.Time.decode(<<0xd3, 0x53>>) + {:ok, ~T[10:30:38.000000]} + + """ + @spec decode(bitstring()) :: {:ok, Time.t()} + def decode(<> = _bitstring) do + decode2(<>) + end + + + @doc """ + + `decode!/1` function decode MS-DOS Time format. This function + explicitely convert data from big-endian to little-endian. + + ## Examples + + This following example is from `a.zip` text file present in + `test/fixtures/a.zip`. + + iex> Dotzip.Time.decode!(<<0xd3, 0x53>>) + ~T[10:30:38.000000] + + """ + @spec decode!(bitstring()) :: Time.t() + def decode!(bitstring) do + {:ok, time} = decode(bitstring) + time + end + + defp decode2(<>) do + Time.new(hour, minute, 59, 0) + end + defp decode2(<>) do + Time.new(hour, minute, second*2, 0) + end + + @doc """ + `encode/1` function encode time in little-endian format. + + ## Examples + + iex> Dotzip.Time.encode(~T[10:30:38.000000]) + {:ok, <<211, 83>>} + + """ + @spec encode(Time.t()) :: {:ok, bitstring()} + def encode(time) do + second = :erlang.round(time.second/2) + minute = time.minute + hour = time.hour + <> = <> + {:ok, <>} + end + + @doc """ + `encode!/1` function encode time in little-endian format. + + ## Examples + + iex> Dotzip.Time.encode!(~T[10:30:38.000000]) + <<211, 83>> + + """ + @spec encode!(Time.t()) :: bitstring() + def encode!(time) do + {:ok, encoded} = encode(time) + encoded + end + + @doc """ + `encode/3` function encode time in little-endian format. + + ## Examples + + iex> Dotzip.Time.encode(10,30,38) + {:ok, <<211, 83>>} + + """ + @spec encode(integer(), integer(), integer()) :: {:ok, bitstring()} + def encode(hour, minute, second) do + case Time.new(hour, minute, second) do + {:ok, time} -> encode(time) + {:error, error} -> {:error, error} + end + end + + @doc """ + `encode!/3` function encode time in little-endian format. + + ## Examples + + iex> Dotzip.Time.encode!(10,30,38) + <<211, 83>> + + """ + @spec encode!(integer(), integer(), integer()) :: {:ok, bitstring()} + def encode!(hour, minute, second) do + {:ok, encoded} = encode(hour, minute, second) + encoded + end +end diff --git a/lib/dotzip/version_made_by.ex b/lib/dotzip/version_made_by.ex new file mode 100644 index 0000000..c1e5726 --- /dev/null +++ b/lib/dotzip/version_made_by.ex @@ -0,0 +1,42 @@ +defmodule Dotzip.VersionMadeBy do + + @spec decode(bitstring()) :: {:ok, atom(), bitstring} + def decode(<<00::size(16), rest>>), do: {:ok, :msdos, rest} + def decode(<<01::size(16), rest>>), do: {:ok, :amiga, rest} + def decode(<<02::size(16), rest>>), do: {:ok, :openvms, rest} + def decode(<<03::size(16), rest>>), do: {:ok, :unix, rest} + def decode(<<04::size(16), rest>>), do: {:ok, :vmcms, rest} + def decode(<<05::size(16), rest>>), do: {:ok, :atarist, rest} + def decode(<<06::size(16), rest>>), do: {:ok, :os2, rest} + def decode(<<07::size(16), rest>>), do: {:ok, :macintosh, rest} + def decode(<<08::size(16), rest>>), do: {:ok, :zsystem, rest} + def decode(<<09::size(16), rest>>), do: {:ok, :cpm, rest} + def decode(<<10::size(16), rest>>), do: {:ok, :ntfs, rest} + def decode(<<11::size(16), rest>>), do: {:ok, :mvs, rest} + def decode(<<12::size(16), rest>>), do: {:ok, :vse, rest} + def decode(<<13::size(16), rest>>), do: {:ok, :acorn, rest} + def decode(<<14::size(16), rest>>), do: {:ok, :vfat, rest} + def decode(<<15::size(16), rest>>), do: {:ok, :alternatemvs, rest} + def decode(<<16::size(16), rest>>), do: {:ok, :beos, rest} + def decode(<<17::size(16), rest>>), do: {:ok, :tandem, rest} + def decode(<<18::size(16), rest>>), do: {:ok, :os400, rest} + def decode(<<19::size(16), rest>>), do: {:ok, :osx, rest} + def decode(<<_::size(16), rest>>), do: {:ok, :unused, rest} + def decode(integer) when is_integer(integer), do: decode(<>) + + @spec encode(atom()) :: {:ok, bitstring()} | {:error, any()} + def encode(:msdos), do: {:ok, <<0::size(16)>>} + def encode(:amiga), do: {:ok, <<1::size(16)>>} + def encode(:openvms), do: {:ok, <<2::size(16)>>} + def encode(:unix), do: {:ok, <<3::size(16)>>} + def encode(_), do: {:error, :unsupported} + + @spec encode(atom(), bitstring()) :: bitstring() + def encode(version, data) do + case encode(version) do + {:ok, encoded} -> {:ok, <>} + {:error, error} -> {:error, error} + end + end + +end diff --git a/lib/dotzip/version_needed_to.ex b/lib/dotzip/version_needed_to.ex new file mode 100644 index 0000000..6385c00 --- /dev/null +++ b/lib/dotzip/version_needed_to.ex @@ -0,0 +1,104 @@ +defmodule Dotzip.VersionNeededTo do + + @spec decode(bitstring()) :: {:ok, bitstring(), bitstring()} + def decode(<<10::size(16), rest>>), do: {:ok, "1.0", rest} + def decode(<<11::size(16), rest>>), do: {:ok, "1.1", rest} + def decode(<<20::size(16), rest>>), do: {:ok, "2.0", rest} + def decode(<<21::size(16), rest>>), do: {:ok, "2.1", rest} + def decode(<<25::size(16), rest>>), do: {:ok, "2.5", rest} + def decode(<<27::size(16), rest>>), do: {:ok, "2.7", rest} + def decode(<<45::size(16), rest>>), do: {:ok, "4.5", rest} + def decode(<<46::size(16), rest>>), do: {:ok, "4.6", rest} + def decode(<<50::size(16), rest>>), do: {:ok, "5.0", rest} + def decode(<<51::size(16), rest>>), do: {:ok, "5.1", rest} + def decode(<<52::size(16), rest>>), do: {:ok, "5.2", rest} + def decode(<<61::size(16), rest>>), do: {:ok, "6.1", rest} + def decode(<<62::size(16), rest>>), do: {:ok, "6.2", rest} + def decode(<<53::size(16), rest>>), do: {:ok, "6.3", rest} + def decode(_), do: {:error, :unsupported} + + @spec encode(bitstring() | atom()) :: {:ok, bitstring()} + # 1.0 - Default value + def encode("1.0"), do: encode(:default) + def encode(:default), do: {:ok, <<10::size(16)>>} + + # 1.1 - File is a volume label + def encode("1.1"), do: encode(:volume) + def encode(:volume), do: {:ok, <<11::size(16)>>} + + # 2.0 - File is a folder (directory) + # 2.0 - File is compressed using Deflate compression + # 2.0 - File is encrypted using traditional PKWARE encryption + def encode("2.0"), do: encode(:folder) + def encode(:folder), do: {:ok, <<20::size(16)>>} + def encode(:deflate), do: {:ok, <<20::size(16)>>} + def encode(:pkware_encryption), do: {:ok, <<20::size(16)>>} + + # 2.1 - File is compressed using Deflate64(tm) + def encode("2.1"), do: encode(:deflate64) + def encode(:deflate64), do: {:ok, <<21::size(16)>>} + + # 2.5 - File is compressed using PKWARE DCL Implode + def encode("2.5"), do: encode(:pkware_dcl_implode) + def encode(:pkware_dcl_implode), do: {:ok, <<25::size(16)>>} + + # 2.7 - File is a patch data set + def encode("2.7"), do: encode(:patch_data) + def encode(:patch_data), do: {:ok, <<27::size(16)>>} + + # 4.5 - File uses ZIP64 format extensions + def encode("4.5"), do: encode(:zip64) + def encode(:zip64), do: {:ok, <<45::size(16)>>} + + # 4.6 - File is compressed using BZIP2 compression* + def encode("4.6"), do: encode(:bzip2) + def encode(:bzip2), do: {:ok, <<46::size(16)>>} + + # 5.0 - File is encrypted using DES + # 5.0 - File is encrypted using 3DES + # 5.0 - File is encrypted using original RC2 encryption + # 5.0 - File is encrypted using RC4 encryption + def encode("5.0"), do: encode(:des) + def encode(:des), do: {:ok, <<50::size(16)>>} + def encode(:'3des'), do: {:ok, <<50::size(16)>>} + def encode(:rc2), do: {:ok, <<50::size(16)>>} + def encode(:rc4), do: {:ok, <<50::size(16)>>} + + # 5.1 - File is encrypted using AES encryption + # 5.1 - File is encrypted using corrected RC2 encryption** + def encode("5.1"), do: encode(:aes) + def encode(:aes), do: {:ok, <<51::size(16)>>} + def encode(:rc2_corrected), do: {:ok, <<51::size(16)>>} + + # 5.2 - File is encrypted using corrected RC2-64 encryption** + def encode("5.2"), do: encode(:rc264) + def encode(:rc264_corrected), do: {:ok, <<52::size(16)>>} + + # 6.1 - File is encrypted using non-OAEP key wrapping*** + def encode("6.1"), do: encode(:oaep) + def encode(:oaep), do: {:ok, <<61::size(16)>>} + + # 6.2 - Central directory encryption + def encode("6.2"), do: encode(:directory_encryption) + def encode(:directory_encryption), do: {:ok, <<62::size(16)>>} + + # 6.3 - File is compressed using LZMA + # 6.3 - File is compressed using PPMd+ + # 6.3 - File is encrypted using Blowfish + # 6.3 - File is encrypted using Twofish + def encode("6.3"), do: encode(:lzma) + def encode(:lzma), do: {:ok, <<63::size(16)>>} + def encode(:ppmd), do: {:ok, <<63::size(16)>>} + def encode(:blowfish), do: {:ok, <<63::size(16)>>} + def encode(:twofish), do: {:ok, <<63::size(16)>>} + + def encode(_), do: {:error, :unsupported} + + @spec encode(bitstring() | atom(), bitstring()) :: {:ok, bitstring()} + def encode(version, data) do + case encode(version) do + {:ok, content} -> {:ok, <>} + {:error, error} -> {:error, error} + end + end +end diff --git a/lib/dotzip_app.ex b/lib/dotzip_app.ex new file mode 100644 index 0000000..d26d5d7 --- /dev/null +++ b/lib/dotzip_app.ex @@ -0,0 +1,8 @@ +defmodule DotzipApp do + use Application + + def start(_type, _args) do + children = [] + Supervisor.start_link(children, strategy: :one_for_one) + end +end diff --git a/lib/extension.ex b/lib/extension.ex new file mode 100644 index 0000000..7e23326 --- /dev/null +++ b/lib/extension.ex @@ -0,0 +1,15 @@ +defmodule Dotzip.Extensions do + + @moduledoc """ + + List of supported ZIP extension. This module is used to do a + quickcheck on filenames. + + """ + + def supported do + ["zip", "zipx", "jar", "war", "docx", "xlxs", "pptx", "odt", + "ods", "odp"] + end + +end diff --git a/mix.exs b/mix.exs index d9063d4..3f16d66 100644 --- a/mix.exs +++ b/mix.exs @@ -4,6 +4,11 @@ defmodule Dotzip.MixProject do def project do [ app: :dotzip, + description: "ZIP format implementation in Elixir", + package: %{ + licenses: ["MIT"], + links: %{ "GitHub" => "https://github.com/niamtokik/dotzip" } + }, version: "0.1.0", elixir: "~> 1.11", start_permanent: Mix.env() == :prod, @@ -20,6 +25,7 @@ defmodule Dotzip.MixProject do def application do [ + mod: {DotzipApp, []}, extra_applications: [:logger] ] end diff --git a/notes/README.md b/notes/README.md index 9edff8c..2ead44e 100644 --- a/notes/README.md +++ b/notes/README.md @@ -1,117 +1,228 @@ ---- ---- - This documentation is a work in progress regarding the way to use -Dotzip Elixir module. It should: +Dotzip Elixir module. This module should: - * be easy to understand (e.g. easy API) - * compatible with Erlang/Elixir release - * portable to any systems supported by Erlang/Elixir - * usable as stream of data - * offering an high level representation of the data/metadata - * easy to debug + * **be easy to understand (e.g. easy API)**: interfaces should follow + OTP and/or Elixir principles. Anyone who want to use it should + simply read introduction page. The documentation should cover 99% + of user requirement but can offer also some "expert" feature. + + * **be compatible with Erlang/Elixir release**: this project should + be compatible with BEAP virtual machine and usable with other + languages like Joxa, Clojuerl, Erlang and Elixir. + + * **be portable to any systems supported by Erlang/Elixir**: it + should work on any "recent" version of OTP (>R19). + + * **be usable as stream of data**: this project should not have a + high memory impact, if an archive is too big, it should not be a + problem to use it in small systems. + + * **offer an high level representation of the data/metadata**: a + clean representation of ZIP archive should be generated and + hackable. Anyone who want to design his own module or feature + should have all information to do it. + + * **have no external requirement or dependencies**: this project + should not use any external project, except if the dependency is + vital for the project. + + * **be easy to debug**: parsing, encoding and decoding files can be + quite complex, this project should offer enough function to let + anyone debug this project and other ZIP related projects. + + * **offer a framework**: this project is a first step to create an + archive framework, where anyone can archive and compress data in + any kind of format. + + * **offer benchmark**: this project should be benchmarked and + generate stats. -# Elixir + * **offer different way to use**: the first target is to use this + project as library but, it could be nice to use it as compression + daemon and/or system tool. + +# Dotzip Documentation Draft + +(Work in progress) Dotzip can be used as library or as OTP +application. As library, Dotzip act as a highlevel interface for +creating Zip files. As OTP application, Dotzip act as a framework to +create, analyze or extract Zip archives by using optimized +functions. To use it as application, users will need to start `Dotzip` +application. + +``` +Application.start(:dotzip) +``` + +(Work in progress) One can also stop it. + +``` +Application.stop(:dotzip) +``` + +## Dotzip Library + +(Work in progress) To decode a Zip file from bitstring, one can use +`Dotzip.decode/1` or `Dotzip.decode/2` functions. + +```elixir +{:ok, dotzip} = Dotzip.decode(bitstring) +``` + +(Work in progress) In another hand, to encode abstract Dotzip data +structure as Zip file, one can use `Dotzip.encode/1` or +`Dotzip.encode/2` functions. + +```elixir +{:ok, bitstring} = Dotzip.encode(dotzip) +``` + +(Work in progress) The structure used must be easy to understand and +should contain all information required. A Zip file is mainly divided +in 2 parts, a central directory record containing global information +about the zip file, and a list of files, each one with their own +header. + +NOTE: static data-structures vs dynamic data-structures, here two +worlds are colliding, a strict decomposition of the data can be done +by using `tuples` or by using `maps`. Using `tuples` can be used on +practically any version of OTP but will require more work on the +library. In other hand, using `maps` can help to design a flexible +library but old OTP versions will be impacted. The first +implementation will use a mix between tuples and maps, all important +Dotzip datastructures will be tagged with `:dotzip_*` tag. + +All the following part is a draft. + +### File(s) Structure(s) + +To be defined + +```elixir +@type dotzip_encryption_header() :: %{} +@type dotzip_file_data() :: <<>> | {:dotzip_file_ref, <<>>} +@type dotzip_data_description() :: %{} +``` + +```elixir +@type dotzip_file() :: {:dotzip_file, + %{ dotzip_file_header, + :dotzip_encryption_header => dotzip_encryption_header(), + :dotzip_file_data => dotzip_file_data(), + :dotzip_data_descriptor => dotzip_data_descriptor() + } +} +``` + +```elixir +@type dotzip_files() :: [dotzip_file(), ...] +``` + +### Central Directory Record Structure(s) + +To be defined + +```elixir +@type dotzip_central_directory_record() :: %{ + +} +``` + +```elixir +@typedoc "" +@type dotzip_struct() :: {:dotzip, + %{ + :dotzip_central_directory_record => dotzip_central_directory_record, + :dotzip_files => dotzip_files + } +} +``` + +## ZIP File Extraction and Analysis + +(Work in progress) A Zip file can contain many files, and sometime, +big one. To avoid using the whole memory of the system, Dotzip can +load only metadata instead of the whole archive by using +`Dotzip.preload/1` or `Dotzip.preload/2` functions. + +```elixir +{:ok, reference_preload} = Dotzip.preload("/path/to/archive.zip") +``` + +(Work in progress) In other hand, a file can be fully loaded by using +`Dotzip.load/1` or `Dotzip.load/2` functions. + +```elixir +{:ok, reference_load} = Dotzip.load("/path/to/archive.zip") +``` + +(Work in progress) Dotzip can analyze the content of the archive by +using `Dotzip.analyze/1` or `Dotzip.analyze/2` functions. These +functions will ensure the file is in good state or alert if something +is not correct. `Dotzip.analyze` features may be extended by using +creating `Dotzip.Analyzer`. + +```elixir +{:ok, analysis} = Dotzip.analyze(reference) +``` + +(Work in progress) The whole archive can be extracted by using +`Dotzip.extract/2` or `Dotzip.extract/3` functions. + +```elixir +{:ok, info} = Dotzip.extract(reference, "/path/to/extract") +{:ok, info} = Dotzip.extract(reference, "/other/path/to/extract", verbose: true) +``` + +(Work in progress) When a file is not required anymore, this file can +be unloaded by using `Dotzip.unload/1` function. Both the path of the +archive or the reference can be used. + +```elixir +:ok = Dotzip.unload("/path/to/archive.zip") +:ok = Dotzip.unload(reference) +``` ## ZIP File Creation -Some example of the usage. Creating a zip file should be easy and only -based on a simple object creation. +(Work in progress) Some example of the usage. Creating a zip file +should be easy and only based on a simple object creation. To create a +new empty archive, `Dotzip.new/0` or `Dotzip.new/1` functions can be +used. ```elixir -Dotzip.new() -|> Dotzip.to_binary() +reference = Dotzip.new() ``` -Adding file should also be easy. Those files are loaded only when the -file is converted in binary. +(Work in progress) Adding files must also be quite +easy. `Dotzip.add/2` or `Dotzip.add/3` functions can be used to add +files based on different sources. By default, absolute paths are +converted to relavative path by removing the root part of the path. ```elixir -Dotzip.new() -|> Dotzip.file("/path/to/file/one", "/one") -|> Dotzip.file("/path/to/file/two", "/two") -|> Dotzip.to_binary() +# add a file from absolute path +{:ok, info} = Dotzip.add(reference, "/path/to/my/file") + +# add a directory and its whole content from absolute path +{:ok, info} = Dotzip.add(reference, "/path/to/my/directory", recursive: true) + +# create a new directory +{:ok, info} = Dotzip.add(reference, {:directory, "/my/directory"}) + +# create a new file in archive from bitstring +{:ok, info} = Dotzip.add(reference, {:raw, "/my/file", "content\n"}", compression: :lz4) + +# create a new file from external url +{:ok, info} = Dotzip.add(reference, {:url, "/my/other/file", "https://my.super.site.com/file"}) ``` -It should also be possible to add recursively the content of a -directory. +(Work in progress) The whole archive can also share some specific +options, like encryption or compression. ```elixir -Dotzip.new() -|> Dotzip.directory("/path/to/directory", recursive: true) -|> Dotzip.to_binary() -``` - -A blob is any kind of data direcly stored in memory, from the BEAM. - -```elixir -Dotzip.new() -|> Dotzip.blob("my raw data here", "/file_path") -|> Dotzip.blob("another content", "/file_path2") -|> Dotzip.to_binary() -``` - -The option of the zip file can be added directly when the zip is -created. - -```elixir -Dotzip.new(compression: :unshrink) -``` - -A list of supported compression methods can be found directly in the -library. - -```elixir -Dotzip.compression_methods() -``` - -Encrypted archive should also be made during the ZIP file creation. - -```elixir -Dotzip.new(encryption: :aes_cbc256) -``` - -or by configuring it after the object was created. - -```elixir -Dotzip.new() -|> Dotzip.hash(:md5) -|> Dotzip.encryption(:aes_cbc256, password: "my_password") -``` - -Supported method can be printed. - -```elixir -Dotzip.encryption_methods() -``` - -## ZIP File Extraction - -Extract all file from a local archive, present on the filesystem. - -```elixir -Dotzip.open_file("/path/to/file.zip") -|> Dotzip.extract_all() -``` - -Extract only one or many files from the local archive. - -```elixir -Dotzip.open_file("/path/to/file.zip") -|> Dotzip.extract("/path/compressed/file") -|> Dotzip.extract("/path/to/compressed.data") -``` - -Convert the full archive in erlang/elixir term. - -```elixir -Dotzip.open_file("/path/to/file.zip") -|> Dotzip.to_term() -``` - -Convert a stream archive to erlang/elixir term. - -```elixir -Dotzip.open_stream(mydata) -|> Dotzip.to_term() +# set compression to lz4 +Dotzip.set(reference, compression: :lz4) + +Dotzip.set(reference, encryption: :aes_cbc256) +Dotzip.set(reference, passphrase: "my passphrase") ``` diff --git a/test/dotzip/crc32_test.exs b/test/dotzip/crc32_test.exs new file mode 100644 index 0000000..07db926 --- /dev/null +++ b/test/dotzip/crc32_test.exs @@ -0,0 +1,13 @@ +defmodule Dotzip.Crc32_test do + use ExUnit.Case, async: true + + test "crc32 on bitstring" do + {:ok, <<161, 7>>} = Dotzip.Crc32.raw("a\n") + {:ok, <<137, 193>>} = Dotzip.Crc32.raw("file\n") + end + + test "crc32 on file" do + {:ok, "B5"} = Dotzip.Crc32.file("test/fixtures/a.zip") + end + +end diff --git a/test/dotzip/date_test.exs b/test/dotzip/date_test.exs new file mode 100644 index 0000000..01fc340 --- /dev/null +++ b/test/dotzip/date_test.exs @@ -0,0 +1,4 @@ +defmodule Dotzip.DateTest do + use ExUnit.Case, async: true + doctest Dotzip.Date +end diff --git a/test/dotzip/decode_test.exs b/test/dotzip/decode_test.exs new file mode 100644 index 0000000..13a465a --- /dev/null +++ b/test/dotzip/decode_test.exs @@ -0,0 +1,43 @@ +defmodule Dotzip.DecodeTest do + use ExUnit.Case, async: true + + test "decode a simple archive with one file" do + file = "test/fixtures/a.zip" + _decoded = [ + %{ + :type => :file, + :name => "a.txt", + :crc => <<0xdd, 0xea, 0xa1, 0x07>>, + :offset => 0, + :origin => "Unix", + :time => <<>>, + :date => <<>>, + :version => "3.0", + :compression => :none, + :encryption => :none, + :extended_local_header => false, + :compressed_size => 2, + :uncompressed_size => 2, + :filename_length => 5, + :extra_field_length => 24, + :comment_length => 0, + :method => :stored, + :command => :none, + :extra_field => %{ + :unix => %{ + + } + }, + :content => "a\n" + } + ] + {:ok, _content} = File.read(file) + :ok + end + + # @file "test/fixtures/directory.zip" + # test "decode a simple archive with 2 files and a directory" do + # {:ok, _content} = File.read(@file) + # end + +end diff --git a/test/dotzip/extra_field/os2_test.exs b/test/dotzip/extra_field/os2_test.exs new file mode 100644 index 0000000..00c530b --- /dev/null +++ b/test/dotzip/extra_field/os2_test.exs @@ -0,0 +1,4 @@ +defmodule Dotzip.ExtraField.Os2Test do + use ExUnit.Case, async: true + +end diff --git a/test/dotzip/extra_field/unix_test.exs b/test/dotzip/extra_field/unix_test.exs index 1a96194..4767426 100644 --- a/test/dotzip/extra_field/unix_test.exs +++ b/test/dotzip/extra_field/unix_test.exs @@ -1,17 +1,19 @@ defmodule Dotzip.ExtraField.UnixTest do use ExUnit.Case, async: true + doctest Dotzip.ExtraField.Unix - test "decode an empty Unix field" do - struct = %{atime: 0, gid: 0, mtime: 0, uid: 0, tsize: 12} - {:ok, struct, data} = Dotzip.ExtraField.Unix.encode(struct) - {:ok, decoded_struct, decoded_data} = Dotzip.ExtraField.Unix.decode(data) - assert struct == decoded_struct - end + # test "decode an empty Unix field" do + # struct = %{atime: 0, gid: 0, mtime: 0, uid: 0, tsize: 12} + # {:ok, struct, data} = Dotzip.ExtraField.Unix.encode(struct) + # {:ok, decoded_struct, _decoded_data} = Dotzip.ExtraField.Unix.decode(data) + # assert struct == decoded_struct + # end + + # test "encode an empty Unix field" do + # data = <<0, 13, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>> + # {:ok, decoded_struct, _} = Dotzip.ExtraField.Unix.decode(data) + # {:ok, struct, _encoded_data} = Dotzip.ExtraField.Unix.encode(decoded_struct) + # assert struct == decoded_struct + # end - test "encode an empty Unix field" do - data = <<0, 13, 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>> - {:ok, decoded_struct, _} = Dotzip.ExtraField.Unix.decode(data) - {:ok, struct, encoded_data} = Dotzip.ExtraField.Unix.encode(decoded_struct) - assert struct == decoded_struct - end end diff --git a/test/dotzip/local_file_header.exs b/test/dotzip/local_file_header.exs new file mode 100644 index 0000000..76427eb --- /dev/null +++ b/test/dotzip/local_file_header.exs @@ -0,0 +1,9 @@ +defmodule Dotzip.LocalFileHeaderTest do + use ExUnit.Case, async: true + + test "decode simple craft local file header" do + local_file_header= <<>> + Dotzip.LocalFileheader.decode(local_file_header) + end + +end diff --git a/test/dotzip/third_party/extended_timestamp.exs b/test/dotzip/third_party/extended_timestamp.exs new file mode 100644 index 0000000..7b40fef --- /dev/null +++ b/test/dotzip/third_party/extended_timestamp.exs @@ -0,0 +1,4 @@ +defmodule Dotzip.ThirdParty.ExtendedTimestampTest do + use ExUnit.Case, async: true + doctest Dotzip.ThirdParty.ExtendedTimestamp +end diff --git a/test/dotzip/third_party/info_zip_unix_new.exs b/test/dotzip/third_party/info_zip_unix_new.exs new file mode 100644 index 0000000..8e6d9bc --- /dev/null +++ b/test/dotzip/third_party/info_zip_unix_new.exs @@ -0,0 +1,4 @@ +defmodule Dotzip.ThirdParty.InfoZipUnixNewTest do + use ExUnit.Case, async: true + doctest Dotzip.ThirdParty.InfoZipUnixNew +end diff --git a/test/dotzip/time_test.exs b/test/dotzip/time_test.exs new file mode 100644 index 0000000..c72eb54 --- /dev/null +++ b/test/dotzip/time_test.exs @@ -0,0 +1,4 @@ +defmodule Dotzip.TimeTest do + use ExUnit.Case, async: true + doctest Dotzip.Time +end diff --git a/test/dotzip_test.exs b/test/dotzip_test.exs index e787674..8cd8571 100644 --- a/test/dotzip_test.exs +++ b/test/dotzip_test.exs @@ -1,8 +1,4 @@ defmodule DotzipTest do use ExUnit.Case - doctest Dotzip - test "local file header" do - assert :world == :world - end end diff --git a/test/fixtures/a.zip b/test/fixtures/a.zip new file mode 100644 index 0000000000000000000000000000000000000000..b97a34388ab3dc7900ac3573c4905f48d683445a GIT binary patch literal 162 zcmWIWW@h1H0D;TFbAs6yzPifm(gA7BWUP(nsXb2|*v)}Vmi6C5B!Og(P@`9Ox y0Zb%v1$Z+u$uZ+HLIPwm10xVGX#}yL2C_m7L^CYFo0Scuh!F_AfwUut!vFwF%paQo literal 0 HcmV?d00001 diff --git a/test/fixtures/directory.zip b/test/fixtures/directory.zip new file mode 100644 index 0000000000000000000000000000000000000000..f7dc8b2c5a10eaeacd311dc5bf187f16076c8ee1 GIT binary patch literal 472 zcmWIWW@h1H00E8AIl*8Cl;C2JVMxg=N=+`wFRIiJ4dG;9KJ)Ta;xiyFt>9*0WO>2N zzyKx!;AZFm%{bWkMxPaE1_<+^n30y5lL|KH3eXIYS!m{fm&0yGS6#6_U? z6F7}vWRhdX