Merge pull request #16 from HideyoshiSolutions/new-api-implementation

New API and Tests Implementation
This commit is contained in:
2024-05-26 02:29:21 -03:00
committed by GitHub
46 changed files with 1769 additions and 333 deletions

View File

@@ -0,0 +1,13 @@
repos:
- repo: https://github.com/psf/black
rev: 24.4.2
hooks:
- id: black
args: [--config=pyproject.toml]
- repo: https://github.com/pycqa/isort
rev: 5.13.2
hooks:
- id: isort
files: "\\.(py)$"
args: [--settings-path=pyproject.toml]

5
.githooks/set-hooks.sh Normal file
View File

@@ -0,0 +1,5 @@
#!/bin/bash
pre-commit install --config .githooks/pre-commit-config.yaml
pre-commit autoupdate --config .githooks/pre-commit-config.yaml

27
.github/workflows/run-tests.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
name: ci
on:
push
jobs:
run-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
poetry install
- name: Run tests
run: |
poetry run python -m unittest

1
.gitignore vendored
View File

@@ -1,4 +1,5 @@
.env*
.coverage*
.idea

View File

@@ -1,64 +1,11 @@
# `python-base` sets up all our shared environment variables
FROM python:3.12-slim as python-base
FROM python:3.12
# python
ENV PYTHONUNBUFFERED=1 \
# prevents python creating .pyc files
PYTHONDONTWRITEBYTECODE=1 \
\
# pip
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# poetry
# https://python-poetry.org/docs/configuration/#using-environment-variables
POETRY_VERSION=1.5.1 \
# make poetry install to this location
POETRY_HOME="/opt/poetry" \
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_IN_PROJECT=true \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
\
# paths
# this is where our requirements + virtual environment will live
PYSETUP_PATH="/opt/pysetup" \
VENV_PATH="/opt/pysetup/.venv"
WORKDIR /app
RUN pip install poetry
# prepend poetry and venv to path
ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH"
COPY ./ /app/
RUN poetry install
# `builder-base` stage is used to build deps + create our virtual environment
FROM python-base as builder-base
RUN apt-get update \
&& apt-get install --no-install-recommends -y \
# deps for installing poetry
curl \
# deps for building python deps
build-essential
# install poetry - respects $POETRY_VERSION & $POETRY_HOME
RUN curl -sSL https://install.python-poetry.org | python3 -
# copy project requirement files here to ensure they will be cached.
WORKDIR $PYSETUP_PATH
COPY . .
# install runtime deps - uses $POETRY_VIRTUALENVS_IN_PROJECT internally
RUN poetry install --no-dev
# `builder-base` stage is used to build deps + create our virtual environment
FROM python-base as production
COPY --from=builder-base $PYSETUP_PATH $PYSETUP_PATH
WORKDIR $PYSETUP_PATH
EXPOSE 5000-9000
# Run your app
CMD [ "./run-queue.sh" ]
ENTRYPOINT ["poetry", "run", "python", "-m", "storage_service"]

1215
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -9,21 +9,35 @@ packages = [{include = "storage_service"}]
[tool.poetry.dependencies]
python = "^3.12"
pillow = "^10.0.0"
redis = "^5.0.3"
requests = "^2.31.0"
rq = "^1.15.1"
redis = "^5.0.4"
requests = "^2.32.1"
rq = "^1.16.1"
python-dotenv = "^1.0.0"
fastapi = "^0.110.1"
fastapi = "^0.111.0"
uvicorn = "^0.29.0"
boto3 = "^1.28.21"
boto3 = "^1.34.109"
python-multipart = "^0.0.9"
virustotal-python = "^1.0.2"
fastapi-utils = "^0.6.0"
typing-inspect = "^0.9.0"
poethepoet = "^0.26.1"
[tool.poetry.group.dev.dependencies]
isort = "^5.12.0"
black = "^23.7.0"
coverage = "^7.5.1"
pre-commit = "^3.7.1"
[tool.poe.tasks]
'run' = "python -m storage_service"
'run:queue' = "python -m storage_service --queue"
'run:dev' = "python -m storage_service --dev"
'create-hooks' = "bash .githooks/set-hooks.sh"
'test' = "coverage run -m unittest -v"
[build-system]
requires = ["poetry-core"]

View File

@@ -1,11 +0,0 @@
#!/bin/bash
if [[ $1 == "--queue" || $1 == "-q" ]]; then
rq worker --with-scheduler
exit 0
else
python -m storage_service
fi
exec "$@"

View File

@@ -0,0 +1,28 @@
from storage_service.config.config_allowed_origins import get_allowed_origins
from storage_service.controller import health_router, storage_router
from storage_service.utils.exception_handler import (
http_exception_handler,
validation_exception_handler,
)
from fastapi import FastAPI, HTTPException
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI()
app.add_exception_handler(HTTPException, http_exception_handler)
app.add_exception_handler(RequestValidationError, validation_exception_handler)
app.add_middleware(
CORSMiddleware,
allow_origins=get_allowed_origins(),
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(storage_router)
app.include_router(health_router)

View File

@@ -1,14 +1,42 @@
from storage_service.config.config_server import get_config_server
from storage_service.controller import app
from storage_service.depends.depend_queue import dependency_queue_worker
import uvicorn
import argparse
def main():
config = get_config_server()
uvicorn.run(app, host=config["host"], port=config["port"])
def main(is_queue=False, is_dev=False):
if is_queue:
dependency_queue_worker().work(with_scheduler=True)
else:
config = {
**get_config_server(),
"reload": is_dev,
}
uvicorn.run("storage_service.__init__:app", **config)
if __name__ == "__main__":
main()
parser = argparse.ArgumentParser(description="Storage Service")
parser.add_argument(
"-q",
"--queue",
dest="queue",
default=False,
action="store_true",
help="Runs the worker to process the queue",
)
parser.add_argument(
"-d",
"--dev",
dest="dev_mode",
default=False,
action="store_true",
help="Run the server in development mode.",
)
args = parser.parse_args()
main(args.queue, args.dev_mode)

View File

@@ -3,7 +3,12 @@ from dotenv import load_dotenv
import os
def get_virus_checker_api_key():
def get_virus_checker_api_key() -> str:
load_dotenv()
return {"api_key": os.environ.get("VIRUS_CHECKER_API_KEY")}
api_key = os.environ.get("VIRUS_CHECKER_API_KEY")
if not api_key:
raise RuntimeError("Virus Checker API Key not found")
return api_key

View File

@@ -1,20 +1,2 @@
from storage_service.config.config_allowed_origins import get_allowed_origins
from storage_service.controller.health_checker_controller import health_router
from storage_service.controller.storage_controller import s3_router
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=get_allowed_origins(),
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(s3_router)
app.include_router(health_router)
from .health_checker_controller import router as health_router
from .storage_controller import router as storage_router

View File

@@ -1,11 +1,15 @@
from storage_service.model.health_check.health_check_response import (
HealthCheckResponse,
)
from fastapi import APIRouter
from fastapi_utils.cbv import cbv
health_router = APIRouter()
router = APIRouter(tags=["health"])
@cbv(health_router)
@cbv(router)
class HealthCheckerController:
@health_router.get("/health", status_code=200)
def health(self) -> dict[str, str]:
return {"status": "healthy"}
@router.get("/health", status_code=200)
def health(self) -> HealthCheckResponse:
return HealthCheckResponse(status="healthy")

View File

@@ -4,52 +4,61 @@ from storage_service.depends.depend_queue import dependency_queue
from storage_service.depends.depend_s3_service import (
dependency_storage_service,
)
from storage_service.model.storage.new_file_request import NewFileURLRequest
from storage_service.model.storage.process_file_request import (
ProcessFileRequest,
)
from storage_service.model.storage.signed_url_response import SignedUrlResponse
from storage_service.service.storage.storage_service import StorageService
from storage_service.utils.enums.file_type import FileType
from storage_service.utils.file_name_hash import file_name_hash
from storage_service.utils.exceptions.file_not_found_exception import (
FileNotFoundException,
)
from storage_service.utils.file.file_hash_generator import generate_file_hash
from storage_service.worker.storage_file_worker import storage_file_worker
from fastapi import APIRouter, Body, Depends
from fastapi import APIRouter, Depends, HTTPException
from fastapi_utils.cbv import cbv
from rq import Queue
from typing import Annotated
s3_router = APIRouter()
router = APIRouter(tags=["storage"])
@cbv(s3_router)
@cbv(router)
class StorageController:
queue: Queue = Depends(dependency_queue, use_cache=True)
storage_service: StorageService = Depends(
dependency_storage_service, use_cache=True
)
@s3_router.post("/file/", status_code=200)
def new_file_url(
self,
username: Annotated[str, Body(embed=True)],
file_postfix: Annotated[str, Body(embed=True)],
file_type: Annotated[FileType, Body(embed=True)],
) -> dict[str, str]:
@router.post("/file", status_code=200)
def new_file_url(self, new_file_request: NewFileURLRequest) -> SignedUrlResponse:
hashed_file_name = generate_file_hash(
new_file_request.file_key, new_file_request.file_postfix
)
return self.storage_service.get_temp_upload_link(
file_name_hash(username, file_postfix), file_type
hashed_file_name, new_file_request.file_type
)
@s3_router.get("/file/", status_code=200)
def file_url(self, username: str, file_postfix: str) -> dict[str, str | None]:
@router.get("/file", status_code=200)
def file_url(self, file_key: str, file_postfix: str) -> SignedUrlResponse:
try:
return self.storage_service.get_temp_read_link(
file_name_hash(username, file_postfix)
generate_file_hash(file_key, file_postfix)
)
except Exception as _:
raise FileNotFoundException("File not found")
@router.delete("/file", status_code=204)
def delete_file(self, file_key: str, file_postfix: str):
return self.storage_service.delete_file(
generate_file_hash(file_key, file_postfix)
)
@s3_router.delete("/file/", status_code=204)
def delete_file(self, username: str, file_postfix: str):
return self.storage_service.delete_file(file_name_hash(username, file_postfix))
@s3_router.post("/file/process", status_code=200)
def process_file(
self,
username: Annotated[str, Body(embed=True)],
file_postfix: Annotated[str, Body(embed=True)],
):
self.queue.enqueue(storage_file_worker, username, file_postfix)
@router.post("/file/process", status_code=200)
def process_file(self, process_file_request: ProcessFileRequest):
self.queue.enqueue(
storage_file_worker,
process_file_request.file_key,
process_file_request.file_postfix,
)

View File

@@ -1,8 +1,16 @@
from storage_service.config.config_redis import get_config_redis
from redis import Redis
from rq import Queue
from rq import Queue, Worker
from functools import cache
def dependency_queue():
return Queue(connection=Redis(**get_config_redis()))
@cache
def dependency_queue() -> Queue:
return Queue(name="default", connection=Redis(**get_config_redis()))
@cache
def dependency_queue_worker() -> Worker:
return Worker(["default"], connection=Redis(**get_config_redis()))

View File

@@ -3,6 +3,8 @@ from storage_service.service.storage.amazon_s3_service import AmazonS3Service
from storage_service.service.storage.storage_service import StorageService
from storage_service.utils.enums.storage_type import StorageType
import boto3
import botocore.client
from dotenv import load_dotenv
import os
@@ -14,6 +16,27 @@ def dependency_storage_service() -> StorageService:
load_dotenv()
if StorageType(os.environ["STORAGE_TYPE"]) == StorageType.S3_STORAGE:
return AmazonS3Service(**get_config_s3())
s3_config = get_config_s3()
if "aws_access_key_id" not in s3_config:
raise RuntimeError("Invalid S3 Config: Missing aws_access_key_id")
if "aws_secret_access_key" not in s3_config:
raise RuntimeError("Invalid S3 Config: Missing aws_secret_access_key")
if "region_name" not in s3_config:
raise RuntimeError("Invalid S3 Config: Missing region_name")
s3_client = boto3.client(
"s3",
region_name=s3_config["region_name"],
aws_access_key_id=s3_config["aws_access_key_id"],
aws_secret_access_key=s3_config["aws_secret_access_key"],
)
return AmazonS3Service(
s3_client,
s3_config["bucket_name"],
)
raise RuntimeError("Invalid Storage Type")

View File

@@ -10,6 +10,7 @@ from storage_service.service.virus_checker.virus_total_service import (
from storage_service.utils.enums.virus_checker_type import VirusCheckerType
from dotenv import load_dotenv
from virustotal_python import Virustotal
import os
from functools import cache
@@ -19,13 +20,12 @@ from functools import cache
def dependency_virus_checker_service() -> VirusCheckerService:
load_dotenv()
virus_checker_config = get_virus_checker_api_key()
if not virus_checker_config["api_key"]:
raise RuntimeError("Virus Checker API Key not found")
virus_checker_type_var = os.environ.get("VIRUS_CHECKER_TYPE")
if VirusCheckerType(virus_checker_type_var) == VirusCheckerType.TOTAL_VIRUS:
return VirusTotalService(**get_virus_checker_api_key())
try:
type = VirusCheckerType(os.environ["VIRUS_CHECKER_TYPE"])
except ValueError:
raise RuntimeError("Invalid Virus Checker Type")
match type:
case VirusCheckerType.TOTAL_VIRUS:
virus_checker = Virustotal(get_virus_checker_api_key())
return VirusTotalService(virus_checker)

View File

@@ -0,0 +1,5 @@
from pydantic import BaseModel
class HealthCheckResponse(BaseModel):
status: str

View File

@@ -0,0 +1,9 @@
from storage_service.utils.enums.file_type import FileType
from pydantic import BaseModel
class NewFileURLRequest(BaseModel):
file_key: str
file_postfix: str
file_type: FileType

View File

@@ -0,0 +1,6 @@
from pydantic import BaseModel
class ProcessFileRequest(BaseModel):
file_key: str
file_postfix: str

View File

@@ -0,0 +1,6 @@
from pydantic import BaseModel
class SignedUrlResponse(BaseModel):
signed_url: str
expires_in: int

View File

@@ -0,0 +1,2 @@
from .amazon_s3_service import AmazonS3Service
from .storage_service import StorageService

View File

@@ -3,111 +3,128 @@ from __future__ import annotations
from storage_service.depends.depend_virus_checker_service import (
dependency_virus_checker_service,
)
from storage_service.model.storage.signed_url_response import SignedUrlResponse
from storage_service.service.storage.storage_service import StorageService
from storage_service.service.virus_checker.virus_checker_service import (
VirusCheckerService,
)
from storage_service.utils.enums.file_type import FileType
from storage_service.utils.file_handler import FILE_HANDLER
import boto3
from botocore.client import BaseClient
import io
from typing import Any
import logging
logger = logging.getLogger(__name__)
class AmazonS3Service(StorageService):
virus_checker_service = dependency_virus_checker_service()
virus_checker_service: VirusCheckerService
def __init__(self, **kwargs):
super().__init__(**kwargs)
s3_client: BaseClient
bucket_name: str
self.__validate_config(**kwargs)
expires_in: int = 3600
self.bucket_name = kwargs.get("bucket_name")
self.region_name = kwargs.get("region_name")
def __init__(
self,
s3_client: BaseClient,
bucket_name: str,
virus_checker_service=None,
**kwargs,
):
self.virus_checker_service = virus_checker_service
self.expires_in = kwargs.get("expires_in")
if s3_client is None:
raise RuntimeError("Invalid S3 Config: Missing s3_client")
self.s3_client = s3_client
self.s3 = boto3.client(
"s3",
aws_access_key_id=kwargs.get("aws_access_key_id"),
aws_secret_access_key=kwargs.get("aws_secret_access_key"),
region_name=kwargs.get("region_name"),
if bucket_name is None:
raise RuntimeError("Invalid S3 Config: Missing bucket_name")
self.bucket_name = bucket_name
if virus_checker_service is None:
self.virus_checker_service = dependency_virus_checker_service()
if "expires_in" in kwargs:
self.expires_in = kwargs["expires_in"]
def get_temp_upload_link(self, file_name, file_type: FileType) -> SignedUrlResponse:
return SignedUrlResponse(
signed_url=self._get_presigned_write_url(file_name, file_type),
expires_in=self.expires_in,
)
def get_temp_upload_link(
self, file_name, file_type: FileType
) -> dict[str, str | Any]:
return {
"presigned_url": self._get_presigned_write_url(file_name, file_type),
"file_key": self._get_object_url(file_name),
}
def get_temp_read_link(self, file_name) -> dict[str, str | None]:
return {"presigned_url": self._get_presigned_read_url(file_name)}
def get_temp_read_link(self, file_name) -> SignedUrlResponse:
return SignedUrlResponse(
signed_url=self._get_presigned_read_url(file_name),
expires_in=self.expires_in,
)
def delete_file(self, file_name: str) -> None:
self._delete_file(file_name)
def process_file(self, file_name: str, file_type: FileType = FileType.PNG) -> None:
def process_file(self, file_name: str, file_type: FileType = FileType.PNG) -> dict:
try:
file_bytes = self._get_file_obj(file_name)
except Exception as _:
raise FileNotFoundError("File not found")
if not self.virus_checker_service.check_virus(file_bytes):
self._delete_file(file_name)
raise ValueError("Virus Detected")
handler = FILE_HANDLER[file_type]["handler"]
try:
old_size = file_bytes.getbuffer().nbytes
self._upload_file(file_name, handler(file_bytes))
file_bytes = file_type.get_validator()(file_bytes)
def _get_object_url(self, file_name: str) -> str:
return f"https://{self.bucket_name}.s3.{self.region_name}.amazonaws.com/{file_name}"
new_size = file_bytes.getbuffer().nbytes
except Exception as _:
raise RuntimeError("Error Processing")
self._upload_file(file_name, file_bytes)
return {
"previous_size": old_size,
"current_size": new_size,
}
def _get_presigned_write_url(self, file_name, file_type: FileType) -> str:
return self.s3.generate_presigned_url(
return self.s3_client.generate_presigned_url(
"put_object",
Params={
"Bucket": self.bucket_name,
"Key": file_name,
"ContentType": FILE_HANDLER[file_type]["content_type"],
"ContentType": file_type.get_content_type(),
},
ExpiresIn=self.expires_in,
)
def _get_presigned_read_url(self, file_name) -> str | None:
result = self.s3.list_objects(Bucket=self.bucket_name, Prefix=file_name)
result = self.s3_client.list_objects(Bucket=self.bucket_name, Prefix=file_name)
if "Contents" in result and file_name in map(
lambda x: x["Key"], result["Contents"]
):
return self.s3.generate_presigned_url(
return self.s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": self.bucket_name, "Key": file_name},
ExpiresIn=self.expires_in,
)
return None
raise FileNotFoundError("File not found")
def _get_file_obj(self, file_name: str) -> io.BytesIO:
return io.BytesIO(
self.s3.get_object(Bucket=self.bucket_name, Key=file_name)["Body"].read()
self.s3_client.get_object(Bucket=self.bucket_name, Key=file_name)[
"Body"
].read()
)
def _upload_file(self, file_name: str, file_bytes: io.BytesIO) -> None:
self.s3.upload_fileobj(file_bytes, Bucket=self.bucket_name, Key=file_name)
self.s3_client.upload_fileobj(
file_bytes, Bucket=self.bucket_name, Key=file_name
)
def _delete_file(self, file_name: str) -> None:
self.s3.delete_object(Bucket=self.bucket_name, Key=file_name)
@staticmethod
def __validate_config(**kwargs):
if not kwargs.get("bucket_name"):
raise RuntimeError("bucket_name is required")
if not kwargs.get("aws_access_key_id"):
raise RuntimeError("aws_access_key_id is required")
if not kwargs.get("aws_secret_access_key"):
raise RuntimeError("aws_secret_access_key is required")
if not kwargs.get("region_name"):
raise RuntimeError("region_name is required")
if not kwargs.get("bucket_name"):
raise RuntimeError("bucket_name is required")
self.s3_client.delete_object(Bucket=self.bucket_name, Key=file_name)

View File

@@ -1,23 +1,18 @@
from __future__ import annotations
from storage_service.model.storage.signed_url_response import SignedUrlResponse
from storage_service.utils.enums.file_type import FileType
from abc import ABC, abstractmethod
from typing import Any
class StorageService(ABC):
def __init__(self, **kwargs):
@abstractmethod
def get_temp_upload_link(self, file_name, file_type: FileType) -> SignedUrlResponse:
pass
@abstractmethod
def get_temp_upload_link(
self, file_name, file_type: FileType
) -> dict[str, str | Any]:
pass
@abstractmethod
def get_temp_read_link(self, file_name) -> dict[str, str | None]:
def get_temp_read_link(self, file_name) -> SignedUrlResponse:
pass
@abstractmethod
@@ -25,5 +20,5 @@ class StorageService(ABC):
pass
@abstractmethod
def process_file(self, file_name: str, file_type: FileType) -> None:
def process_file(self, file_name: str, file_type: FileType) -> dict:
pass

View File

@@ -8,31 +8,33 @@ from io import BytesIO
class VirusTotalService(VirusCheckerService):
def __init__(self, api_key: str):
self.api_key = api_key
virus_checker: Virustotal
def __init__(self, virus_checker: Virustotal):
self.virus_checker = virus_checker
def check_virus(self, file_data: BytesIO) -> bool:
file_id = self._upload_file(file_data)
file_attributes = self._get_analysis(file_id)
return self._is_valid_file(file_attributes)
def _upload_file(self, file_data: BytesIO) -> str:
files = {"file": ("image_file", file_data)}
with Virustotal(self.api_key) as vtotal:
resp = vtotal.request("files", files=files, method="POST")
resp = self.virus_checker.request("files", files=files, method="POST")
file_attributes = self._get_analysis(resp.json()["data"]["id"])
return self._is_valid_file(file_attributes["data"]["attributes"]["stats"])
return resp.data["id"]
def _get_analysis(self, file_id: str) -> dict:
with Virustotal(self.api_key) as vtotal:
resp = vtotal.request(f"analyses/{file_id}")
resp = self.virus_checker.request(f"analyses/{file_id}")
return resp.json()
return resp.json()["data"]["attributes"]["stats"]
@staticmethod
def _is_valid_file(file_stats: dict) -> bool:
if "malicious" in file_stats and file_stats["malicious"] > 0:
return False
if "suspicious" in file_stats and file_stats["suspicious"] > 0:
return False
match file_stats:
case {"malicious": 0, "suspicious": 0, "harmless": 0}:
return True
case _:
return False

View File

@@ -1,6 +1,26 @@
from storage_service.utils.file.validators import image_validator
from enum import Enum
from io import BytesIO
from typing import Callable
class FileType(Enum):
PNG = "png"
JPEG = "jpeg"
def get_content_type(self) -> str:
match self:
case FileType.PNG:
return "image/png"
case FileType.JPEG:
return "image/jpeg"
case _:
raise ValueError("File Type Not Implemented")
def get_validator(self) -> Callable[[BytesIO], BytesIO]:
match self:
case FileType.PNG | FileType.JPEG:
return image_validator
case _:
raise ValueError("File Type Not Implemented")

View File

@@ -0,0 +1,2 @@
from .http_exception_handler import http_exception_handler
from .validation_exception_handler import validation_exception_handler

View File

@@ -0,0 +1,15 @@
from starlette.exceptions import HTTPException
from starlette.requests import Request
from starlette.responses import JSONResponse
async def http_exception_handler(request: Request, exc: HTTPException):
return JSONResponse(
status_code=exc.status_code,
content={
"error": {
"message": exc.detail,
"status_code": exc.status_code,
}
},
)

View File

@@ -0,0 +1,20 @@
from fastapi.exceptions import RequestValidationError
from starlette import status
from starlette.requests import Request
from starlette.responses import JSONResponse
async def validation_exception_handler(request: Request, exc: RequestValidationError):
status_code = status.HTTP_422_UNPROCESSABLE_ENTITY
return JSONResponse(
status_code=status_code,
content={
"error": {
"details": {
"body": exc.body,
"errors": exc.errors(),
},
"status_code": status_code,
}
},
)

View File

@@ -0,0 +1,6 @@
from fastapi import HTTPException, status
class FileNotFoundException(HTTPException):
def __init__(self, message: str):
super().__init__(status.HTTP_404_NOT_FOUND, detail=message)

View File

View File

@@ -0,0 +1,9 @@
import base64
from hashlib import md5
def generate_file_hash(file_key: str, file_postfix: str) -> str:
hashed_file_key = md5(file_key.encode("utf-8")).digest()
hashed_file_key = base64.b64encode(hashed_file_key).decode()
return f"{hashed_file_key}_{file_postfix}"

View File

@@ -0,0 +1 @@
from .image_handler import image_validator

View File

@@ -3,10 +3,10 @@ from PIL import Image
import io
def image_handler(file_bytes: io.BytesIO) -> io.BytesIO:
def image_validator(file_bytes: io.BytesIO) -> io.BytesIO:
img = Image.open(file_bytes)
img.thumbnail((320, 320))
img.thumbnail((180, 180))
data = list(img.getdata())
image_without_exif = Image.new(img.mode, img.size)

View File

@@ -1,9 +0,0 @@
from storage_service.utils.enums.file_type import FileType
from storage_service.utils.file_handler.handlers.image_handler import (
image_handler,
)
FILE_HANDLER = {
FileType.PNG: {"content_type": "image/png", "handler": image_handler},
FileType.JPEG: {"content_type": "image/jpeg", "handler": image_handler},
}

View File

@@ -1,9 +0,0 @@
import base64
from hashlib import md5
def file_name_hash(username: str, file_postfix: str) -> str:
hashed_username = md5(username.encode("utf-8")).digest()
hashed_username = base64.b64encode(hashed_username).decode()
return f"{hashed_username}_{file_postfix}"

View File

@@ -1,9 +1,29 @@
from storage_service.depends.depend_s3_service import (
dependency_storage_service,
)
from storage_service.utils.enums.file_type import FileType
from storage_service.utils.file_name_hash import file_name_hash
from storage_service.utils.file.file_hash_generator import generate_file_hash
import logging
logger = logging.getLogger(__name__)
def storage_file_worker(username: str, file_postfix: str) -> None:
dependency_storage_service().process_file(file_name_hash(username, file_postfix))
storage_service = dependency_storage_service()
file_name = generate_file_hash(username, file_postfix)
try:
stats = storage_service.process_file(file_name)
previous_size_kb = stats["previous_size"] / 1_000
current_size_kb = stats["current_size"] / 1_000
print(
f"File processed: {file_name} - "
f"Previous Size: {previous_size_kb}kb - "
f"New Size: {current_size_kb}kb"
)
except Exception as e:
print(f"Error processing file: {e}." f" Deleting file: {file_name}.")
storage_service.delete_file(file_name)

0
tests/__init__.py Normal file
View File

View File

View File

@@ -0,0 +1,140 @@
from unittest import TestCase
from unittest.mock import Mock
from storage_service.service.storage import AmazonS3Service
from storage_service.utils.enums.file_type import FileType
class TestAmazonS3Service(TestCase):
def setUp(self):
self.s3_client_mock = Mock()
self.virus_checker_service_mock = Mock()
def test_get_temp_upload_link(self):
self.s3_client_mock.generate_presigned_url.return_value = "https://test.com"
storage_service = AmazonS3Service(
s3_client=self.s3_client_mock,
bucket_name="test_bucket",
virus_checker_service=self.virus_checker_service_mock
)
response = storage_service.get_temp_upload_link("test_file", FileType.JPEG)
self.assertEqual(response.signed_url, "https://test.com")
self.assertEqual(response.expires_in, 3600)
self.s3_client_mock.generate_presigned_url.assert_called_once_with(
"put_object",
Params={
"Bucket": "test_bucket",
"Key": "test_file",
"ContentType": "image/jpeg",
},
ExpiresIn=3600,
)
def test_get_temp_read_link(self):
self.s3_client_mock.generate_presigned_url.return_value = "https://test.com"
self.s3_client_mock.list_objects.return_value = {
"Contents": [
{
"Key": "test_file"
}
]
}
storage_service = AmazonS3Service(
s3_client=self.s3_client_mock,
bucket_name="test_bucket",
virus_checker_service=self.virus_checker_service_mock
)
response = storage_service.get_temp_read_link("test_file")
self.assertEqual(response.signed_url, "https://test.com")
self.assertEqual(response.expires_in, 3600)
self.s3_client_mock.generate_presigned_url.assert_called_once_with(
"get_object",
Params={
"Bucket": "test_bucket",
"Key": "test_file"
},
ExpiresIn=3600,
)
def test_delete_file(self):
storage_service = AmazonS3Service(
s3_client=self.s3_client_mock,
bucket_name="test_bucket",
virus_checker_service=self.virus_checker_service_mock
)
storage_service.delete_file("test_file")
self.s3_client_mock.delete_object.assert_called_once_with(
Bucket="test_bucket",
Key="test_file"
)
def test_process_file_if_file_invalid(self):
mock_body = Mock()
mock_body.read.return_value = b"test_file"
self.s3_client_mock.get_object.return_value = {
"Body": mock_body
}
self.virus_checker_service_mock.check_virus.return_value = True
storage_service = AmazonS3Service(
s3_client=self.s3_client_mock,
bucket_name="test_bucket",
virus_checker_service=self.virus_checker_service_mock
)
with self.assertRaises(RuntimeError):
storage_service.process_file("test_file", FileType.JPEG)
def test_process_file_if_file_is_virus(self):
mock_body = Mock()
mock_body.read.return_value = b"test_file"
self.s3_client_mock.get_object.return_value = {
"Body": mock_body
}
mock_file_type = Mock()
mock_file_type.get_validator.return_value = lambda x: x
mock_file_type.get_content_type.return_value = "image/fake"
self.virus_checker_service_mock.check_virus.return_value = False
storage_service = AmazonS3Service(
s3_client=self.s3_client_mock,
bucket_name="test_bucket",
virus_checker_service=self.virus_checker_service_mock
)
with self.assertRaises(ValueError):
storage_service.process_file("test_file", mock_file_type)
def test_process_file(self):
mock_body = Mock()
mock_body.read.return_value = b"test_file"
self.s3_client_mock.get_object.return_value = {
"Body": mock_body
}
self.virus_checker_service_mock.check_virus.return_value = True
mock_file_type = Mock()
mock_file_type.get_validator.return_value = lambda x: x
mock_file_type.get_content_type.return_value = "image/fake"
storage_service = AmazonS3Service(
s3_client=self.s3_client_mock,
bucket_name="test_bucket",
virus_checker_service=self.virus_checker_service_mock
)
storage_service.process_file("test_file", mock_file_type)
self.s3_client_mock.upload_fileobj.assert_called()

View File

View File

@@ -0,0 +1,33 @@
from io import BytesIO
from unittest import TestCase
from unittest.mock import Mock
from storage_service.service.virus_checker.virus_total_service import VirusTotalService
class TestVirusTotalService(TestCase):
def test_check_virus_invalid(self):
mock_virus_checker = Mock()
mock_virus_checker.request.side_effect = [
Mock(data={"id": "file_id"}),
Mock(json=Mock(return_value={"data": {"attributes": {"stats": {"malicious": 1, "suspicious": 1, "harmless": 1}}}})),
]
virus_total_service = VirusTotalService(mock_virus_checker)
result = virus_total_service.check_virus(BytesIO(b"file_data"))
self.assertFalse(result)
def test_check_virus_valid(self):
mock_virus_checker = Mock()
mock_virus_checker.request.side_effect = [
Mock(data={"id": "file_id"}),
Mock(json=Mock(return_value={"data": {"attributes": {"stats": {"malicious": 0, "suspicious": 0, "harmless": 0}}}})),
]
virus_total_service = VirusTotalService(mock_virus_checker)
result = virus_total_service.check_virus(BytesIO(b"file_data"))
self.assertTrue(result)