diff --git a/bot/__pycache__/exception.cpython-310.pyc b/bot/__pycache__/exception.cpython-310.pyc new file mode 100644 index 0000000..08003b1 Binary files /dev/null and b/bot/__pycache__/exception.cpython-310.pyc differ diff --git a/bot/__pycache__/msgprocessor.cpython-310.pyc b/bot/__pycache__/msgprocessor.cpython-310.pyc new file mode 100644 index 0000000..6ec92f1 Binary files /dev/null and b/bot/__pycache__/msgprocessor.cpython-310.pyc differ diff --git a/bot/msgprocessor.py b/bot/msgprocessor.py index 229305d..79b10f1 100644 --- a/bot/msgprocessor.py +++ b/bot/msgprocessor.py @@ -2,15 +2,16 @@ from typing import Callable from dataclasses import dataclass from exception import UrlRemoverNotImplementedException from urllib.parse import urlparse, parse_qs, urlencode, urlunparse +import re -@dataclass(init=True) +@dataclass(init=True, eq=True) class TrackerRemovalProcessorMessage: fromUsername: str text: str -@dataclass(frozen=True, init=True) +@dataclass(frozen=True, init=True, eq=True) class TrackerRemovalResult: needsToReply: bool text: str @@ -35,7 +36,9 @@ class TrackerRemovalMsgProcessor: SCHEMES = ["http://", "https://"] return len([s for s in SCHEMES if url.startswith(s)]) != 0 - lexems = self.__msg.text.split() + SEPARATOR_CHARS = [" ", "\n"] + separator_regex = "("+"|".join(SEPARATOR_CHARS)+")" + lexems = re.split(separator_regex, self.__msg.text) for i, l in enumerate(lexems): if not is_url(l): continue @@ -47,12 +50,15 @@ class TrackerRemovalMsgProcessor: trackers_extracted = True lexems[i] = removed_trackers_url - self.__msg.text = " ".join(lexems) + self.__msg.text = "".join(lexems) return trackers_extracted @staticmethod def __remove_tracker(url: str) -> str: - parsed_url = urlparse(url) + try: + parsed_url = urlparse(url) + except Exception: + return url if parsed_url.hostname is None: return url hostname = str(parsed_url.hostname) @@ -62,7 +68,7 @@ class TrackerRemovalMsgProcessor: return url def __emplace_sender_into_msg_text(self): - self.__msg.text = f'Message from {self.__msg.fromUsername}:\n\n{self.__msg.text}' + self.__msg.text = f'Message from @{self.__msg.fromUsername}:\n\n{self.__msg.text}' class TrackerRemoverFactory: @@ -82,9 +88,9 @@ class TrackerRemoverFactory: ) ] remover_one = [ - r + r for r in removers_by_domain - if len([d for d in r.domains if d.endswith(domain)]) != 0 + if len([d for d in r.domains if domain.endswith(d)]) != 0 ] if len(remover_one) == 0: raise UrlRemoverNotImplementedException(domain) diff --git a/bot/msgprocessor_test.py b/bot/msgprocessor_test.py new file mode 100644 index 0000000..5cff4de --- /dev/null +++ b/bot/msgprocessor_test.py @@ -0,0 +1,117 @@ +import unittest +from msgprocessor import ( + TrackerRemovalMsgProcessor, + TrackerRemoverFactory, + TrackerRemovalProcessorMessage, + TrackerRemovalResult, +) + + +class TestRemoverFactory(unittest.TestCase): + factory = TrackerRemoverFactory() + + def test_remove_strategy_constructor(self): + test_case_data = [ + { + "domain": "youtube.com", + "remover": self.factory.remove_yt_trackers + }, + { + "domain": "lowerlevel.youtube.com", + "remover": self.factory.remove_yt_trackers + }, + { + "domain": "youtu.be", + "remover": self.factory.remove_yt_trackers, + }, + { + "domain": "something.youtu.be", + "remover": self.factory.remove_yt_trackers, + } + ] + for test_case in test_case_data: + self.assertIs( + self.factory.make_remover(test_case["domain"]), + self.factory.remove_yt_trackers, + ) + + def test_remove_yt_si(self): + test_case_data = [ + { + "url": "https://youtu.be/jNQXAC9IVRw?si=qLIZT1rvs99_jbgy", + "expected_url": "https://youtu.be/jNQXAC9IVRw" + }, + { + "url": "https://youtu.be/jNQXAC9IVRw?si=qLIZT1rvs99_jbgy&t=16", + "expected_url": "https://youtu.be/jNQXAC9IVRw?t=16" + }, + { + "url": "https://www.youtube.com/watch?v=jNQXAC9IVRw", + "expected_url": "https://www.youtube.com/watch?v=jNQXAC9IVRw" + }, + { + "url": "http://www.youtube.com/watch?v=jNQXAC9IVRw&si=qLIZT1rvs99_jbgy&t=16", + "expected_url": "http://www.youtube.com/watch?v=jNQXAC9IVRw&t=16" + } + ] + for test_case in test_case_data: + self.assertEqual(self.factory.remove_yt_trackers(test_case["url"]), test_case["expected_url"]) + + +class TestRemovalMsgProcessor(unittest.TestCase): + def test_remove_links(self): + test_case_data = [ + { + "msg_text": "https://youtu.be/jNQXAC9IVRw?si=qLIZT1rvs99_jbgy", + "sender_username": "Ghytro", + "bot_responded": True, + "bot_response": "Message from @Ghytro:\n\nhttps://youtu.be/jNQXAC9IVRw" + }, + { + "msg_text": "чекай https://youtu.be/jNQXAC9IVRw?si=qLIZT1rvs99_jbgy\nнаш слон хд", + "sender_username": "OllyHearn", + "bot_responded": True, + "bot_response": "Message from @OllyHearn:\n\nчекай https://youtu.be/jNQXAC9IVRw\nнаш слон хд" + }, + { + "msg_text": "а я такая нитакуся без si ссылки шлю сразу https://youtu.be/jNQXAC9IVRw и по нескольку штук\nhttp://www.youtube.com/watch?v=jNQXAC9IVRw&si=qLIZT1rvs99_jbgy&t=16 дада", + "sender_username": "OllyHearn", + "bot_responded": True, + "bot_response": "Message from @OllyHearn:\n\nа я такая нитакуся без si ссылки шлю сразу https://youtu.be/jNQXAC9IVRw и по нескольку штук\nhttp://www.youtube.com/watch?v=jNQXAC9IVRw&t=16 дада" + }, + { + "msg_text": "asdasdasdasdasdasdasd asdasd asdasd asdad sasa dadsas", + "sender_username": "Ghytro", + "bot_responded": False, + "bot_response": "" + } + ] + for test_case in test_case_data: + result = TrackerRemovalMsgProcessor( + TrackerRemovalProcessorMessage( + fromUsername=test_case["sender_username"], + text=test_case["msg_text"] + ) + ).process() + self.assertEqual( + result, + TrackerRemovalResult( + needsToReply=test_case["bot_responded"], + text=test_case["bot_response"] + ) + ) + +if __name__ == "__main__": + test_classes_to_run = [TestRemoverFactory, TestRemovalMsgProcessor] + + loader = unittest.TestLoader() + + suites_list = [] + for test_class in test_classes_to_run: + suite = loader.loadTestsFromTestCase(test_class) + suites_list.append(suite) + + big_suite = unittest.TestSuite(suites_list) + + runner = unittest.TextTestRunner() + results = runner.run(big_suite)