Pular para conteúdo

Reader

JSONReader

Bases: Reader

Concrete class to read data from a JSON file.

Source code in cartola_project/reader.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class JSONReader(Reader):
    """Concrete class to read data from a JSON file."""

    def __init__(
        self,
        cloud_storage: CloudStorage,
        bucket_name: str,
        file_path: str,
    ):
        self.cloud_storage = cloud_storage
        self.bucket_name = bucket_name
        self.file_path = file_path

    def read(self) -> dict:
        """Download the data from the storage.

        Returns:
            Json object.

        """
        file = self.cloud_storage.download(
            self.bucket_name,
            self.file_path,
        )
        return json.loads(file.decode("utf-8"))

    def read_all_files(self) -> list[dict]:
        """Read all the data from the storage.

        Returns:
            List of Json objects.
        """
        files = self.cloud_storage.list_files(
            self.bucket_name,
            self.file_path,
        )
        files_download = [
            self.cloud_storage.download(
                self.bucket_name,
                file,
            )
            for file in files
        ]
        return [json.loads(file.decode("utf-8")) for file in files_download]

read()

Download the data from the storage.

Returns:

Type Description
dict

Json object.

Source code in cartola_project/reader.py
39
40
41
42
43
44
45
46
47
48
49
50
def read(self) -> dict:
    """Download the data from the storage.

    Returns:
        Json object.

    """
    file = self.cloud_storage.download(
        self.bucket_name,
        self.file_path,
    )
    return json.loads(file.decode("utf-8"))

read_all_files()

Read all the data from the storage.

Returns:

Type Description
list[dict]

List of Json objects.

Source code in cartola_project/reader.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def read_all_files(self) -> list[dict]:
    """Read all the data from the storage.

    Returns:
        List of Json objects.
    """
    files = self.cloud_storage.list_files(
        self.bucket_name,
        self.file_path,
    )
    files_download = [
        self.cloud_storage.download(
            self.bucket_name,
            file,
        )
        for file in files
    ]
    return [json.loads(file.decode("utf-8")) for file in files_download]

ParquetReader

Bases: Reader

Concrete class to read data from a Parquet file.

Source code in cartola_project/reader.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
class ParquetReader(Reader):
    """Concrete class to read data from a Parquet file."""

    def __init__(
        self,
        cloud_storage: CloudStorage,
        bucket_name: str,
        file_path: str,
    ):
        self.file_path = file_path
        self.bucket_name = bucket_name
        self.cloud_storage = cloud_storage

    def read(self) -> pd.DataFrame:
        """Read the data from the storage.
        Use pandas tod read the data and return a pandas DataFrame

        Returns:
            Pandas DataFrame.
        """
        file = self.cloud_storage.download(
            self.bucket_name,
            self.file_path,
        )
        pq_file = BytesIO(file)
        return pd.read_parquet(pq_file)

    def read_all_files(self) -> pd.DataFrame:
        """Read all the data from the storage.
        Use pandas tod read the data and return a pandas DataFrame

        Returns:
            Pandas DataFrame.
        """
        files = self.cloud_storage.list_files(
            self.bucket_name,
            self.file_path,
        )

        print(files)

        files_download = [
            self.cloud_storage.download(
                self.bucket_name,
                file,
            )
            for file in files
        ]
        return pd.concat([pd.read_parquet(BytesIO(file)) for file in files_download])

read()

Read the data from the storage. Use pandas tod read the data and return a pandas DataFrame

Returns:

Type Description
pd.DataFrame

Pandas DataFrame.

Source code in cartola_project/reader.py
85
86
87
88
89
90
91
92
93
94
95
96
97
def read(self) -> pd.DataFrame:
    """Read the data from the storage.
    Use pandas tod read the data and return a pandas DataFrame

    Returns:
        Pandas DataFrame.
    """
    file = self.cloud_storage.download(
        self.bucket_name,
        self.file_path,
    )
    pq_file = BytesIO(file)
    return pd.read_parquet(pq_file)

read_all_files()

Read all the data from the storage. Use pandas tod read the data and return a pandas DataFrame

Returns:

Type Description
pd.DataFrame

Pandas DataFrame.

Source code in cartola_project/reader.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def read_all_files(self) -> pd.DataFrame:
    """Read all the data from the storage.
    Use pandas tod read the data and return a pandas DataFrame

    Returns:
        Pandas DataFrame.
    """
    files = self.cloud_storage.list_files(
        self.bucket_name,
        self.file_path,
    )

    print(files)

    files_download = [
        self.cloud_storage.download(
            self.bucket_name,
            file,
        )
        for file in files
    ]
    return pd.concat([pd.read_parquet(BytesIO(file)) for file in files_download])

Reader

Bases: ABC

Source code in cartola_project/reader.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
class Reader(ABC):
    @abstractmethod
    def read(self):
        """Abstart method to read the data from the file."""
        raise NotImplementedError

    @abstractmethod
    def read_all_files(self, *args):
        """Abstart method to read all the data from list of files.

        Args:
            *args:
        """
        raise NotImplementedError

read() abstractmethod

Abstart method to read the data from the file.

Source code in cartola_project/reader.py
11
12
13
14
@abstractmethod
def read(self):
    """Abstart method to read the data from the file."""
    raise NotImplementedError

read_all_files(*args) abstractmethod

Abstart method to read all the data from list of files.

Parameters:

Name Type Description Default
*args ()
Source code in cartola_project/reader.py
16
17
18
19
20
21
22
23
@abstractmethod
def read_all_files(self, *args):
    """Abstart method to read all the data from list of files.

    Args:
        *args:
    """
    raise NotImplementedError