From 3ae891c62e3ca4953773e7777226af68060e4740 Mon Sep 17 00:00:00 2001
From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com.>
Date: Sun, 26 May 2024 15:58:33 +0100
Subject: [PATCH] cucumber

---
 .gitignore                                  |   5 +-
 cucumber/features/environment.py            |  16 ++
 cucumber/features/examples.feature          | 242 ++++++++++++++++++++
 cucumber/features/steps/step_definitions.py | 207 +++++++++++++++++
 cucumber/requirements.txt                   |   4 +
 5 files changed, 473 insertions(+), 1 deletion(-)
 create mode 100644 cucumber/features/environment.py
 create mode 100644 cucumber/features/examples.feature
 create mode 100644 cucumber/features/steps/step_definitions.py
 create mode 100644 cucumber/requirements.txt

diff --git a/.gitignore b/.gitignore
index 174eab39..1a241d6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -124,4 +124,7 @@ watchedFolders/
 
 # Ignore Mac DS_Store files
 .DS_Store
-**/.DS_Store
\ No newline at end of file
+**/.DS_Store
+
+#cucumber
+/cucumber/reports/**
\ No newline at end of file
diff --git a/cucumber/features/environment.py b/cucumber/features/environment.py
new file mode 100644
index 00000000..ec7f2c9f
--- /dev/null
+++ b/cucumber/features/environment.py
@@ -0,0 +1,16 @@
+import os
+
+def before_all(context):
+    context.endpoint = None
+    context.request_data = None
+    context.files = {}
+    context.response = None
+
+def after_scenario(context, scenario):
+    if hasattr(context, 'files'):
+        for file in context.files.values():
+            file.close()
+    if os.path.exists('response_file'):
+        os.remove('response_file')
+    if hasattr(context, 'file_name') and os.path.exists(context.file_name):
+        os.remove(context.file_name)
diff --git a/cucumber/features/examples.feature b/cucumber/features/examples.feature
new file mode 100644
index 00000000..c18e0516
--- /dev/null
+++ b/cucumber/features/examples.feature
@@ -0,0 +1,242 @@
+Feature: API Validation
+
+  Scenario: Remove password 
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages
+    And the pdf is encrypted with password "password123"
+    And the request data includes
+      | parameter | value       |
+      | password  | password123 |
+    When I send the API request to the endpoint "/api/v1/security/remove-password"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+    And the response PDF is not passworded
+	And the response status code should be 200
+	
+  Scenario: Remove password wrong password
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages
+    And the pdf is encrypted with password "password123"
+    And the request data includes
+      | parameter | value       |
+      | password  | wrongPassword |
+    When I send the API request to the endpoint "/api/v1/security/remove-password"
+    Then the response status code should be 500
+    And the response should contain error message "Internal Server Error"
+
+  Scenario: Get info
+    Given I generate a PDF file as "fileInput"
+    When I send the API request to the endpoint "/api/v1/security/get-info-on-pdf"
+    Then the response content type should be "application/json"
+    And the response file should have size greater than 100
+	And the response status code should be 200
+
+  Scenario: Add password
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages
+    And the request data includes
+      | parameter | value       |
+      | password  | password123 |
+    When I send the API request to the endpoint "/api/v1/security/add-password"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 100
+    And the response PDF is passworded
+	And the response status code should be 200
+	
+  Scenario: Add password with other params 
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages
+    And the request data includes
+      | parameter      | value       |
+      | ownerPassword  | ownerPass   |
+      | password       | password123 |
+      | keyLength      | 256         |
+      | canPrint       | true        |
+      | canModify      | false       |
+    When I send the API request to the endpoint "/api/v1/security/add-password"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 100
+    And the response PDF is passworded
+	And the response status code should be 200
+	
+	
+  Scenario: Add watermark
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages
+    And the request data includes
+      | parameter     | value            |
+      | watermarkType | text             |
+      | watermarkText | Sample Watermark |
+      | fontSize      | 30               |
+      | rotation      | 45               |
+      | opacity       | 0.5              |
+      | widthSpacer   | 50               |
+      | heightSpacer  | 50               |
+    When I send the API request to the endpoint "/api/v1/security/add-watermark"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 100
+	And the response status code should be 200
+	
+
+
+  Scenario: Repair PDF
+    Given I generate a PDF file as "fileInput"
+    When I send the API request to the endpoint "/api/v1/misc/repair"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+	And the response status code should be 200
+	
+
+
+  Scenario: Remove blank pages
+    Given I generate a PDF file as "fileInput"
+	And the pdf contains 3 blank pages
+    And the request data includes
+      | parameter    | value       |
+      | threshold    | 90          |
+      | whitePercent | 99.9        |
+    When I send the API request to the endpoint "/api/v1/misc/remove-blanks"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+    And the response PDF should contain 0 pages
+	And the response status code should be 200
+	
+  @ocr
+  Scenario: Process PDF with OCR
+    Given I generate a PDF file as "fileInput"
+    And the request data includes
+      | parameter        | value       |
+      | languages        | eng         |
+      | sidecar          | false        |
+      | deskew           | true        |
+      | clean            | true        |
+      | cleanFinal       | true        |
+      | ocrType          | Normal      |
+      | ocrRenderType    | hocr        |
+      | removeImagesAfter| false       |
+    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+	And the response status code should be 200
+
+  @ocr
+  Scenario: Process PDF with text and OCR with type normal 
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+    And the request data includes
+      | parameter        | value       |
+      | languages        | eng         |
+      | sidecar          | false        |
+      | deskew           | true        |
+      | clean            | true        |
+      | cleanFinal       | true        |
+      | ocrType          | Normal      |
+      | ocrRenderType    | hocr        |
+      | removeImagesAfter| false       |
+    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
+	Then the response status code should be 500
+	
+  @ocr
+  Scenario: Process PDF with OCR
+    Given I generate a PDF file as "fileInput"
+    And the request data includes
+      | parameter        | value       |
+      | languages        | eng         |
+      | sidecar          | false        |
+      | deskew           | true        |
+      | clean            | true        |
+      | cleanFinal       | true        |
+      | ocrType          | Force      |
+      | ocrRenderType    | hocr        |
+      | removeImagesAfter| false       |
+    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+	And the response status code should be 200
+	
+  @ocr
+  Scenario: Process PDF with OCR with sidecar
+    Given I generate a PDF file as "fileInput"
+    And the request data includes
+      | parameter        | value       |
+      | languages        | eng         |
+      | sidecar          | true        |
+      | deskew           | true        |
+      | clean            | true        |
+      | cleanFinal       | true        |
+      | ocrType          | Force      |
+      | ocrRenderType    | hocr        |
+      | removeImagesAfter| false       |
+    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
+    Then the response content type should be "application/octet-stream"
+	And the response file should have extension ".zip"
+    And the response file should have size greater than 0
+	And the response status code should be 200
+
+	
+  Scenario: Flatten PDF
+    Given I generate a PDF file as "fileInput"
+    And the request data includes
+      | parameter         | value   |
+      | flattenOnlyForms  | false    |
+    When I send the API request to the endpoint "/api/v1/misc/flatten"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+	And the response status code should be 200
+	
+  Scenario: Update metadata
+    Given I generate a PDF file as "fileInput"
+    And the request data includes
+      | parameter        | value             |
+      | author           | John Doe          |
+      | title            | Sample Title      |
+      | subject          | Sample Subject    |
+      | keywords         | sample, test      |
+      | producer         | Test Producer     |
+    When I send the API request to the endpoint "/api/v1/misc/update-metadata"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+    And the response PDF metadata should include "Author" as "John Doe"
+	And the response PDF metadata should include "Keywords" as "sample, test"
+	And the response PDF metadata should include "Subject" as "Sample Subject"
+	And the response PDF metadata should include "Title" as "Sample Title"
+	And the response status code should be 200
+
+  @libre
+  Scenario: Convert PDF to DOCX
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+	And the request data includes
+      | parameter        | value      |
+      | outputFormat     | docx       |
+    When I send the API request to the endpoint "/api/v1/convert/pdf/word"
+	Then the response status code should be 200
+    And the response file should have size greater than 100
+    And the response file should have extension ".docx"
+#    And the response DOCX should contain 3 pages
+
+  @libre
+  Scenario: Convert PDF to ODT
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+	And the request data includes
+      | parameter        | value     |
+      | outputFormat     | odt       |
+    When I send the API request to the endpoint "/api/v1/convert/pdf/word"
+	Then the response status code should be 200
+    And the response file should have size greater than 100
+    And the response file should have extension ".odt"
+#   And the response ODT should contain 3 pages
+
+  @libre
+  Scenario: Convert PDF to DOC
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+	And the request data includes
+      | parameter        | value     |
+      | outputFormat     | doc       |
+    When I send the API request to the endpoint "/api/v1/convert/pdf/word"
+	Then the response status code should be 200
+    And the response file should have extension ".doc"
+    And the response file should have size greater than 100
+#    And the response DOC should contain 3 pages
\ No newline at end of file
diff --git a/cucumber/features/steps/step_definitions.py b/cucumber/features/steps/step_definitions.py
new file mode 100644
index 00000000..50022d4e
--- /dev/null
+++ b/cucumber/features/steps/step_definitions.py
@@ -0,0 +1,207 @@
+import os
+import requests
+from behave import given, when, then
+from PyPDF2 import PdfWriter, PdfReader
+import io
+import random
+import string
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas
+import mimetypes
+import requests
+
+#########
+# GIVEN #
+#########
+
+@given('I generate a PDF file as "{fileInput}"')
+def step_generate_pdf(context, fileInput):
+    context.param_name = fileInput
+    context.file_name = "genericNonCustomisableName.pdf"
+    writer = PdfWriter()
+    writer.add_blank_page(width=72, height=72)  # Single blank page
+    with open(context.file_name, 'wb') as f:
+        writer.write(f)
+    if not hasattr(context, 'files'):
+        context.files = {}
+    context.files[context.param_name] = open(context.file_name, 'rb')
+
+@given('the pdf contains {page_count:d} pages')
+def step_pdf_contains_pages(context, page_count):
+    writer = PdfWriter()
+    for i in range(page_count):
+        writer.add_blank_page(width=72, height=72)
+    with open(context.file_name, 'wb') as f:
+        writer.write(f)
+    context.files[context.param_name].close()
+    context.files[context.param_name] = open(context.file_name, 'rb')
+
+# Duplicate for now...
+@given('the pdf contains {page_count:d} blank pages')
+def step_pdf_contains_blank_pages(context, page_count):
+    writer = PdfWriter()
+    for i in range(page_count):
+        writer.add_blank_page(width=72, height=72)
+    with open(context.file_name, 'wb') as f:
+        writer.write(f)
+    context.files[context.param_name].close()
+    context.files[context.param_name] = open(context.file_name, 'rb')
+
+@given('the pdf contains {page_count:d} pages with random text')
+def step_pdf_contains_pages_with_random_text(context, page_count):
+    buffer = io.BytesIO()
+    c = canvas.Canvas(buffer, pagesize=letter)
+    width, height = letter
+    
+    for _ in range(page_count):
+        text = ''.join(random.choices(string.ascii_letters + string.digits, k=100))
+        c.drawString(100, height - 100, text)
+        c.showPage()
+        
+    c.save()
+    
+    with open(context.file_name, 'wb') as f:
+        f.write(buffer.getvalue())
+        
+    context.files[context.param_name].close()
+    context.files[context.param_name] = open(context.file_name, 'rb')
+
+@given('the pdf pages all contain the text "{text}"')
+def step_pdf_pages_contain_text(context, text):
+    buffer = io.BytesIO()
+    c = canvas.Canvas(buffer, pagesize=letter)
+    width, height = letter
+    
+    for _ in range(len(PdfReader(context.file_name).pages)):
+        c.drawString(100, height - 100, text)
+        c.showPage()
+        
+    c.save()
+    
+    with open(context.file_name, 'wb') as f:
+        f.write(buffer.getvalue())
+        
+    context.files[context.param_name].close()
+    context.files[context.param_name] = open(context.file_name, 'rb')
+
+@given('the pdf is encrypted with password "{password}"')
+def step_encrypt_pdf(context, password):
+    writer = PdfWriter()
+    reader = PdfReader(context.file_name)
+    for i in range(len(reader.pages)):
+        writer.add_page(reader.pages[i])
+    writer.encrypt(password)
+    with open(context.file_name, 'wb') as f:
+        writer.write(f)
+    context.files[context.param_name].close()
+    context.files[context.param_name] = open(context.file_name, 'rb')
+
+@given('the request data is')
+def step_request_data(context):
+    context.request_data = eval(context.text)
+
+@given('the request data includes')
+def step_request_data_table(context):
+    context.request_data = {row['parameter']: row['value'] for row in context.table}
+
+@given('save the generated PDF file as "{filename}" for debugging')
+def save_generated_pdf(context, filename):
+    with open(filename, 'wb') as f:
+        f.write(context.files[context.param_name].read())
+    print(f"Saved generated PDF content to {filename}")
+
+########
+# WHEN #
+########
+
+@when('I send the API request to the endpoint "{endpoint}"')
+def step_send_api_request(context, endpoint):
+    url = f"http://localhost:8080{endpoint}"
+    files = context.files if hasattr(context, 'files') else {}
+
+    if not hasattr(context, 'request_data') or context.request_data is None:
+        context.request_data = {}
+
+    form_data = []
+    for key, value in context.request_data.items():
+        form_data.append((key, (None, value)))
+
+    for key, file in files.items():
+        mime_type, _ = mimetypes.guess_type(file.name)
+        mime_type = mime_type or 'application/octet-stream'
+        print("form_data " + file.name + " with " + mime_type)
+        form_data.append((key, (file.name, file, mime_type)))
+
+    response = requests.post(url, files=form_data)
+    context.response = response
+
+########
+# THEN #
+########
+
+@then('the response content type should be "{content_type}"')
+def step_check_response_content_type(context, content_type):
+    actual_content_type = context.response.headers.get('Content-Type', '')
+    assert actual_content_type.startswith(content_type), f"Expected {content_type} but got {actual_content_type}. Response content: {context.response.content}"
+
+@then('the response file should have size greater than {size:d}')
+def step_check_response_file_size(context, size):
+    response_file = io.BytesIO(context.response.content)
+    assert len(response_file.getvalue()) > size
+
+@then('the response PDF is not passworded')
+def step_check_response_pdf_not_passworded(context):
+    response_file = io.BytesIO(context.response.content)
+    reader = PdfReader(response_file)
+    assert not reader.is_encrypted
+
+@then('the response PDF is passworded')
+def step_check_response_pdf_passworded(context):
+    response_file = io.BytesIO(context.response.content)
+    try:
+        reader = PdfReader(response_file)
+        assert reader.is_encrypted
+    except PdfReadError as e:
+        raise AssertionError(f"Failed to read PDF: {str(e)}. Response content: {context.response.content}")
+    except Exception as e:
+        raise AssertionError(f"An error occurred: {str(e)}. Response content: {context.response.content}")
+
+@then('the response status code should be {status_code:d}')
+def step_check_response_status_code(context, status_code):
+    assert context.response.status_code == status_code, f"Expected status code {status_code} but got {context.response.status_code}"
+
+@then('the response should contain error message "{message}"')
+def step_check_response_error_message(context, message):
+    response_json = context.response.json()
+    assert response_json.get('error') == message, f"Expected error message '{message}' but got '{response_json.get('error')}'"
+
+@then('the response PDF should contain {page_count:d} pages')
+def step_check_response_pdf_page_count(context, page_count):
+    response_file = io.BytesIO(context.response.content)
+    reader = PdfReader(response_file)
+    assert len(reader.pages) == page_count, f"Expected {page_count} pages but got {len(reader.pages)} pages"
+
+@then('the response PDF metadata should include "{metadata_key}" as "{metadata_value}"')
+def step_check_response_pdf_metadata(context, metadata_key, metadata_value):
+    response_file = io.BytesIO(context.response.content)
+    reader = PdfReader(response_file)
+    metadata = reader.metadata
+    assert metadata.get("/" + metadata_key) == metadata_value, f"Expected {metadata_key} to be '{metadata_value}' but got '{metadata.get(metadata_key)}'"
+
+@then('the response file should have extension "{extension}"')
+def step_check_response_file_extension(context, extension):
+    content_disposition = context.response.headers.get('Content-Disposition', '')
+    filename = ""
+    if content_disposition:
+        parts = content_disposition.split(';')
+        for part in parts:
+            if part.strip().startswith('filename'):
+                filename = part.split('=')[1].strip().strip('"')
+                break
+    assert filename.endswith(extension), f"Expected file extension {extension} but got {filename}. Response content: {context.response.content}"
+
+@then('save the response file as "{filename}" for debugging')
+def step_save_response_file(context, filename):
+    with open(filename, 'wb') as f:
+        f.write(context.response.content)
+    print(f"Saved response content to {filename}")
diff --git a/cucumber/requirements.txt b/cucumber/requirements.txt
new file mode 100644
index 00000000..52f20fd2
--- /dev/null
+++ b/cucumber/requirements.txt
@@ -0,0 +1,4 @@
+behave
+requests
+PyPDF2
+reportlab
\ No newline at end of file