studia/jezyki-skryptowe/lista8/parser.py
2024-06-14 16:53:58 +02:00

41 lines
1.2 KiB
Python

import re
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
import locale
LOG_PATTERN = re.compile(
r'(?P<host>\S+) - - \[(?P<time>.+)\] "(?P<request>.*)" (?P<status_code>\d+) (?P<bytes_sent>\d+|-)'
)
@dataclass
class ApacheLogEntry:
host: str
timestamp: datetime
method: str
path: str
status_code: int
bytes_sent: Optional[int]
original_line: str
@classmethod
def from_log(cls, log_string):
match = LOG_PATTERN.match(log_string.strip())
if not match:
return None
try:
locale.setlocale(locale.LC_ALL, "en_US.UTF-8")
request = match.group("request").split(" ")
host = match.group('host')
timestamp = datetime.strptime(match.group("time"), "%d/%b/%Y:%H:%M:%S %z")
method = request[0]
path = request[1]
status_code = int(match.group('status_code'))
bytes_sent = int(match.group('bytes_sent')) if match.group('bytes_sent') != '-' else None
except:
return None
return cls(host, timestamp, method, path, status_code, bytes_sent, log_string)