diff --git a/poetry.lock b/poetry.lock index 2d7b19d..4facf1b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This test.png is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "anyio" @@ -577,6 +577,18 @@ typing-extensions = ">=4.2.0" dotenv = ["python-dotenv (>=0.10.4)"] email = ["email-validator (>=1.0.3)"] +[[package]] +name = "pysocks" +version = "1.7.1" +description = "A Python SOCKS client module. See https://github.com/Anorov/PySocks for more information." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "PySocks-1.7.1-py27-none-any.whl", hash = "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299"}, + {file = "PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5"}, + {file = "PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0"}, +] + [[package]] name = "python-dateutil" version = "2.8.2" @@ -652,6 +664,7 @@ files = [ certifi = ">=2017.4.17" charset-normalizer = ">=2,<4" idna = ">=2.5,<4" +PySocks = {version = ">=1.5.6,<1.5.7 || >1.5.7", optional = true, markers = "extra == \"socks\""} urllib3 = ">=1.21.1,<3" [package.extras] @@ -858,7 +871,21 @@ typing-extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} [package.extras] standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] +[[package]] +name = "virustotal-python" +version = "1.0.2" +description = "A Python library to interact with the public VirusTotal v3 and v2 APIs." +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "virustotal_python-1.0.2-py3-none-any.whl", hash = "sha256:daad06f702f9566d74c5882d742e5d6b229865787ca41503a0865e1cf3896cc5"}, + {file = "virustotal_python-1.0.2.tar.gz", hash = "sha256:541634d7e23bcff7019eafc9125b12a00352e058958335fa4b10467a3cf24737"}, +] + +[package.dependencies] +requests = {version = ">=2.26.0,<3.0.0", extras = ["socks"]} + [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "bd6b5b316bf690cb0128584793a1f1d0caa82fa3987be86d3439afac56c9b0fe" +content-hash = "db27ceb0744267ef9fda1d8c0d2fb2a55630f8863ee0a29fde03edfc06b13fd8" diff --git a/pyproject.toml b/pyproject.toml index 333b693..f2ed366 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ uvicorn = "^0.23.2" fastapi-utils = "^0.2.1" boto3 = "^1.28.21" python-multipart = "^0.0.6" +virustotal-python = "^1.0.2" [tool.poetry.group.dev.dependencies] diff --git a/storage_service/config/config_virus_checker.py b/storage_service/config/config_virus_checker.py new file mode 100644 index 0000000..dfa4e54 --- /dev/null +++ b/storage_service/config/config_virus_checker.py @@ -0,0 +1,11 @@ +from dotenv import load_dotenv + +import os + + +def get_virus_checker_api_key(): + load_dotenv() + + return { + "api_key": os.environ.get("VIRUS_CHECKER_API_KEY") + } diff --git a/storage_service/controller/storage_controller.py b/storage_service/controller/storage_controller.py index 31579b4..ccfdb54 100644 --- a/storage_service/controller/storage_controller.py +++ b/storage_service/controller/storage_controller.py @@ -4,12 +4,12 @@ from storage_service.depends.depend_queue import dependency_queue from storage_service.depends.depend_s3_service import ( dependency_storage_service, ) -from storage_service.service.storage_service import StorageService +from storage_service.service.storage.storage_service import StorageService from storage_service.utils.enums.file_type import FileType from storage_service.utils.file_name_hash import file_name_hash from storage_service.worker.storage_file_worker import storage_file_worker -from fastapi import Body, Depends, Form +from fastapi import Body, Depends from fastapi_utils.cbv import cbv from fastapi_utils.inferring_router import InferringRouter from rq import Queue @@ -26,7 +26,7 @@ class StorageController: dependency_storage_service, use_cache=True ) - @s3_router.post("/new_file_url/", status_code=200) + @s3_router.post("/file/", status_code=200) def new_file_url( self, username: Annotated[str, Body(embed=True)], @@ -37,16 +37,24 @@ class StorageController: file_name_hash(username, file_postfix), file_type ) - @s3_router.get("/file_url/", status_code=200) + @s3_router.get("/file/", status_code=200) def file_url(self, username: str, file_postfix: str) -> dict[str, str | None]: return self.storage_service.get_temp_read_link( file_name_hash(username, file_postfix) ) - @s3_router.post("/process_file/", status_code=200) + @s3_router.delete("/file/", status_code=204) + def delete_file(self, username: str, file_postfix: str): + return self.storage_service.delete_file( + file_name_hash(username, file_postfix) + ) + + @s3_router.post("/file/process", status_code=200) def process_file( self, username: Annotated[str, Body(embed=True)], file_postfix: Annotated[str, Body(embed=True)], ): self.queue.enqueue(storage_file_worker, username, file_postfix) + + diff --git a/storage_service/depends/depend_s3_service.py b/storage_service/depends/depend_s3_service.py index 0e7f46a..ae1834e 100644 --- a/storage_service/depends/depend_s3_service.py +++ b/storage_service/depends/depend_s3_service.py @@ -1,6 +1,6 @@ from storage_service.config.config_s3 import get_config_s3 -from storage_service.service.amazon_s3_service import AmazonS3Service -from storage_service.service.storage_service import StorageService +from storage_service.service.storage.amazon_s3_service import AmazonS3Service +from storage_service.service.storage.storage_service import StorageService from storage_service.utils.enums.storage_type import StorageType from dotenv import load_dotenv diff --git a/storage_service/depends/depend_virus_checker_service.py b/storage_service/depends/depend_virus_checker_service.py new file mode 100644 index 0000000..7fba733 --- /dev/null +++ b/storage_service/depends/depend_virus_checker_service.py @@ -0,0 +1,26 @@ +import os +from functools import cache + +from storage_service.config.config_virus_checker import get_virus_checker_api_key +from storage_service.service.virus_checker.virus_total_service import VirusTotalService +from storage_service.service.virus_checker.virus_checker_service import VirusCheckerService + +from dotenv import load_dotenv + +from storage_service.utils.enums.virus_checker_type import VirusCheckerType + + +@cache +def dependency_virus_checker_service() -> VirusCheckerService: + load_dotenv() + + virus_checker_config = get_virus_checker_api_key() + + if not virus_checker_config["api_key"]: + raise RuntimeError("Virus Checker API Key not found") + + virus_checker_type_var = os.environ.get("VIRUS_CHECKER_TYPE") + if VirusCheckerType(virus_checker_type_var) == VirusCheckerType.TOTAL_VIRUS: + return VirusTotalService(**get_virus_checker_api_key()) + + raise RuntimeError("Invalid Virus Checker Type") diff --git a/storage_service/service/storage/__init__.py b/storage_service/service/storage/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/storage_service/service/amazon_s3_service.py b/storage_service/service/storage/amazon_s3_service.py similarity index 85% rename from storage_service/service/amazon_s3_service.py rename to storage_service/service/storage/amazon_s3_service.py index c0d2e75..59625d9 100644 --- a/storage_service/service/amazon_s3_service.py +++ b/storage_service/service/storage/amazon_s3_service.py @@ -1,6 +1,7 @@ from __future__ import annotations -from storage_service.service.storage_service import StorageService +from storage_service.depends.depend_virus_checker_service import dependency_virus_checker_service +from storage_service.service.storage.storage_service import StorageService from storage_service.utils.enums.file_type import FileType from storage_service.utils.file_handler import FILE_HANDLER @@ -11,6 +12,9 @@ from typing import Any class AmazonS3Service(StorageService): + + virus_checker_service = dependency_virus_checker_service() + def __init__(self, **kwargs): super().__init__(**kwargs) @@ -39,8 +43,15 @@ class AmazonS3Service(StorageService): def get_temp_read_link(self, file_name) -> dict[str, str | None]: return {"presigned_url": self._get_presigned_read_url(file_name)} + def delete_file(self, file_name: str) -> None: + self._delete_file(file_name) + def process_file(self, file_name: str, file_type: FileType = FileType.PNG) -> None: file_bytes = self._get_file_obj(file_name) + + if not self.virus_checker_service.check_virus(file_bytes): + self._delete_file(file_name) + handler = FILE_HANDLER[file_type]["handler"] self._upload_file(file_name, handler(file_bytes)) @@ -78,6 +89,9 @@ class AmazonS3Service(StorageService): def _upload_file(self, file_name: str, file_bytes: io.BytesIO) -> None: self.s3.upload_fileobj(file_bytes, Bucket=self.bucket_name, Key=file_name) + def _delete_file(self, file_name: str) -> None: + self.s3.delete_object(Bucket=self.bucket_name, Key=file_name) + @staticmethod def __validate_config(**kwargs): if not kwargs.get("bucket_name"): diff --git a/storage_service/service/storage_service.py b/storage_service/service/storage/storage_service.py similarity index 87% rename from storage_service/service/storage_service.py rename to storage_service/service/storage/storage_service.py index 5967962..bf81cee 100644 --- a/storage_service/service/storage_service.py +++ b/storage_service/service/storage/storage_service.py @@ -20,6 +20,10 @@ class StorageService(ABC): def get_temp_read_link(self, file_name) -> dict[str, str | None]: pass + @abstractmethod + def delete_file(self, file_name: str) -> None: + pass + @abstractmethod def process_file(self, file_name: str, file_type: FileType) -> None: pass diff --git a/storage_service/service/virus_checker/__init__.py b/storage_service/service/virus_checker/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/storage_service/service/virus_checker/virus_checker_service.py b/storage_service/service/virus_checker/virus_checker_service.py new file mode 100644 index 0000000..32d89e1 --- /dev/null +++ b/storage_service/service/virus_checker/virus_checker_service.py @@ -0,0 +1,8 @@ +from abc import ABC, abstractmethod +from io import BytesIO + + +class VirusCheckerService(ABC): + @abstractmethod + def check_virus(self, file_data: BytesIO) -> bool: + pass diff --git a/storage_service/service/virus_checker/virus_total_service.py b/storage_service/service/virus_checker/virus_total_service.py new file mode 100644 index 0000000..3078e27 --- /dev/null +++ b/storage_service/service/virus_checker/virus_total_service.py @@ -0,0 +1,35 @@ +from io import BytesIO +from virustotal_python import Virustotal + +from storage_service.service.virus_checker.virus_checker_service import VirusCheckerService + + +class VirusTotalService(VirusCheckerService): + def __init__(self, api_key: str): + self.api_key = api_key + + def check_virus(self, file_data: BytesIO) -> bool: + files = {"file": ("image_file", file_data)} + + with Virustotal(self.api_key) as vtotal: + resp = vtotal.request("files", files=files, method="POST") + + file_attributes = self._get_analysis(resp.json()["data"]["id"]) + + return self._is_valid_file(file_attributes["data"]["attributes"]["stats"]) + + def _get_analysis(self, file_id: str) -> dict: + with Virustotal(self.api_key) as vtotal: + resp = vtotal.request(f"analyses/{file_id}") + + return resp.json() + + @staticmethod + def _is_valid_file(file_stats: dict) -> bool: + if 'malicious' in file_stats and file_stats['malicious'] > 0: + return False + + if 'suspicious' in file_stats and file_stats['suspicious'] > 0: + return False + + return True diff --git a/storage_service/utils/enums/virus_checker_type.py b/storage_service/utils/enums/virus_checker_type.py new file mode 100644 index 0000000..077e95b --- /dev/null +++ b/storage_service/utils/enums/virus_checker_type.py @@ -0,0 +1,5 @@ +from enum import Enum + + +class VirusCheckerType(Enum): + TOTAL_VIRUS = "total_virus" \ No newline at end of file