commit a8bd5ace1e2184b1f6990245cf3e6bfbdce55523
Author: ghaymah_dev
Date: Fri Apr 24 09:02:46 2026 +0000
Add swagger.yaml
diff --git a/swagger.yaml b/swagger.yaml
new file mode 100644
index 0000000..ef5e2b1
--- /dev/null
+++ b/swagger.yaml
@@ -0,0 +1,728 @@
+openapi: 3.0.0
+info:
+ title: ProbLab Pipeline API
+ description: |
+ بسم الله الرحمن الرحيم
+
+ API for ProbLab data processing, probability computations, and machine learning operations.
+ This API provides endpoints for dataset validation, processing, probability calculations,
+ predictions, and data discovery operations.
+ version: 1.0.0
+ contact:
+ name: ProbLab Team
+ email: support@problab.com
+ license:
+ name: Proprietary
+ url: https://problab.com/license
+
+servers:
+ - url: https://problab-api-0004c00ee319.hosted.ghaymah.systems
+ description: Production server
+ - url: http://localhost:5000
+ description: Local development server
+
+tags:
+ - name: Dataset Management
+ description: Endpoints for dataset validation and processing
+ - name: Probability Computation
+ description: Endpoints for probability calculations and queries
+ - name: Predictions
+ description: Endpoints for batch and single predictions
+ - name: Discovery
+ description: Endpoints for data discovery operations
+ - name: Visualization
+ description: Endpoints for probability plots and visualizations
+ - name: Recommendations
+ description: Endpoints for recommendation systems
+
+paths:
+ /validate_dataset:
+ post:
+ tags:
+ - Dataset Management
+ summary: Validate a dataset
+ description: |
+ Validate a dataset file and return excluded columns based on data characteristics.
+ Uploads a file and validates it against various criteria.
+ operationId: validateDataset
+ requestBody:
+ required: true
+ content:
+ multipart/form-data:
+ schema:
+ type: object
+ required:
+ - file
+ - hasHeader
+ - dataType
+ - delimiter
+ properties:
+ file:
+ type: string
+ format: binary
+ description: Dataset file to validate (CSV, Excel, JSON, or TXT)
+ hasHeader:
+ type: string
+ enum: [true, false]
+ description: Whether the dataset has a header row
+ dataType:
+ type: string
+ enum: [csv, xlsx, json, txt]
+ description: Type of the dataset file
+ delimiter:
+ type: string
+ description: Column delimiter for CSV files (e.g., ',', ';', '\t')
+ sheetName:
+ type: string
+ description: Sheet name for Excel files (optional)
+ responses:
+ '200':
+ description: Dataset validation result
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ is_valid:
+ type: boolean
+ description: Whether the dataset is valid
+ excluded_cols:
+ type: array
+ items:
+ type: array
+ items:
+ type: string
+ description: List of excluded columns with their indices
+ example:
+ is_valid: true
+ excluded_cols: [["column_name", 5]]
+ '400':
+ description: Invalid request or missing parameters
+ '500':
+ description: Internal server error
+
+ /process_dataset:
+ post:
+ tags:
+ - Dataset Management
+ summary: Process a dataset
+ description: |
+ Process a dataset from uploaded file data, discover schema, encode it, and store results locally.
+ This endpoint is typically triggered by webhook events.
+ operationId: processDataset
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - event
+ properties:
+ event:
+ type: object
+ properties:
+ data:
+ type: object
+ properties:
+ new:
+ type: object
+ properties:
+ id:
+ type: string
+ description: Dataset ID
+ file_data:
+ type: string
+ format: base64
+ description: Base64 encoded file data
+ file_name:
+ type: string
+ description: Name of the uploaded file
+ hasHeader:
+ type: boolean
+ description: Whether the dataset has a header row
+ delimiter:
+ type: string
+ description: Column delimiter for CSV files
+ sheetName:
+ type: string
+ description: Sheet name for Excel files
+ example:
+ event:
+ data:
+ new:
+ id: "dataset123"
+ file_data: "base64-encoded-file-data-here"
+ file_name: "sample.csv"
+ hasHeader: true
+ delimiter: ","
+ sheetName: "Sheet1"
+ responses:
+ '200':
+ description: Dataset processed successfully
+ content:
+ text/plain:
+ schema:
+ type: string
+ description: Local file path of the processed file
+ example: "./datasets/dataset123/data.bit"
+ '400':
+ description: No file data provided or invalid request
+ '500':
+ description: Internal server error
+
+ /compute_expr:
+ post:
+ tags:
+ - Probability Computation
+ summary: Compute probability expression
+ description: |
+ Compute probability for a given expression on a dataset.
+ Supports both conditional and joint probability queries.
+ operationId: computeExpression
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - expression
+ - dataset_id
+ properties:
+ expression:
+ type: string
+ description: |
+ Probability expression in format "query_event|query_condition"
+ Example: "GENERATION:genbb|PARTICIPATION_TYPE:type1"
+ Where query_event is "key1:value1;key2:value2" and query_condition is optional
+ dataset_id:
+ type: string
+ description: ID of the dataset to query
+ example:
+ expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1"
+ dataset_id: "dataset789"
+ responses:
+ '200':
+ description: Probability computation result
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ output:
+ type: string
+ description: Probability result or error message
+ example:
+ output: "0.75"
+ '400':
+ description: Invalid expression or missing parameters
+ '500':
+ description: Internal server error
+
+ /table_prob:
+ post:
+ tags:
+ - Probability Computation
+ summary: Generate probability table
+ description: |
+ Generate a probability table for given variables.
+ Creates conditional probability tables for visualization.
+ operationId: tableProbability
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - expression
+ - dataset_id
+ properties:
+ expression:
+ type: string
+ description: |
+ Expression defining column and row variables
+ Format: "column_vars|row_vars"
+ Example: "GENERATION:genbb|PARTICIPATION_TYPE:type1"
+ dataset_id:
+ type: string
+ description: ID of the dataset to query
+ example:
+ expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1"
+ dataset_id: "dataset789"
+ responses:
+ '200':
+ description: Probability table result
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ output:
+ type: string
+ description: Table data in string format
+ example:
+ output: "[[0.1,0.2],[0.3,0.4]]"
+ '400':
+ description: Invalid expression or missing parameters
+ '500':
+ description: Internal server error
+
+ /prob_plot:
+ post:
+ tags:
+ - Visualization
+ summary: Generate probability plot
+ description: |
+ Generate a plot visualization for probability distributions.
+ Creates various plot types based on probability data.
+ operationId: probabilityPlot
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - expression
+ - dataset_id
+ - plot_type
+ properties:
+ expression:
+ type: string
+ description: |
+ Expression defining variables for plotting
+ Format: "column_vars|row_vars"
+ dataset_id:
+ type: string
+ description: ID of the dataset to query
+ plot_type:
+ type: string
+ description: Type of plot to generate
+ enum: [bar, line, scatter, heatmap]
+ example:
+ expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1"
+ dataset_id: "dataset789"
+ plot_type: "bar"
+ responses:
+ '200':
+ description: Plot generation result
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ output:
+ type: string
+ description: Plot data or file reference
+ example:
+ output: "Plot generated successfully"
+ '400':
+ description: Invalid expression or missing parameters
+ '500':
+ description: Internal server error
+
+ /describe_prob:
+ post:
+ tags:
+ - Discovery
+ summary: Describe probability distribution
+ description: |
+ Get descriptive statistics and information about a variable's probability distribution.
+ operationId: describeProbability
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - expression
+ - dataset_id
+ - command_type
+ properties:
+ expression:
+ type: string
+ description: |
+ Expression containing the variable to describe
+ Format: "variable_name:value"
+ dataset_id:
+ type: string
+ description: ID of the dataset to query
+ command_type:
+ type: string
+ enum: [describe, summary]
+ description: Type of description command
+ example:
+ expression: "GENERATION:genbb"
+ dataset_id: "dataset789"
+ command_type: "describe"
+ responses:
+ '200':
+ description: Description result
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ output:
+ type: string
+ description: Descriptive statistics
+ example:
+ output: "Variable statistics..."
+ '400':
+ description: Invalid expression or missing parameters
+ '500':
+ description: Internal server error
+
+ /batch_pred:
+ post:
+ tags:
+ - Predictions
+ summary: Batch predictions
+ description: |
+ Perform batch predictions on multiple expressions.
+ Processes multiple prediction expressions in parallel.
+ operationId: batchPredict
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - expressions
+ - dataset_id
+ properties:
+ expressions:
+ type: array
+ items:
+ type: string
+ description: |
+ Array of prediction expressions
+ Each expression should be in format "query_event|query_condition"
+ example: ["GENERATION:genbb|PARTICIPATION_TYPE:type1", "GENERATION:gencc|PARTICIPATION_TYPE:type2"]
+ dataset_id:
+ type: string
+ description: ID of the dataset to query
+ example:
+ expressions: ["GENERATION:genbb|PARTICIPATION_TYPE:type1", "GENERATION:gencc|PARTICIPATION_TYPE:type2"]
+ dataset_id: "dataset789"
+ responses:
+ '200':
+ description: Batch prediction results
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ processed:
+ type: integer
+ description: Number of expressions processed
+ successful:
+ type: array
+ items:
+ type: object
+ properties:
+ expression:
+ type: string
+ result:
+ type: object
+ properties:
+ final_prediction:
+ type: string
+ algorithm:
+ type: string
+ likelihood:
+ type: object
+ properties:
+ probability:
+ type: number
+ labels:
+ type: array
+ items:
+ type: string
+ status:
+ type: string
+ enum: [success]
+ failed:
+ type: array
+ items:
+ type: object
+ properties:
+ expression:
+ type: string
+ error:
+ type: string
+ status:
+ type: string
+ enum: [failed]
+ example:
+ processed: 2
+ successful:
+ - expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1"
+ result:
+ final_prediction: "Feature X"
+ algorithm: "Random Forest"
+ likelihood:
+ probability: 0.85
+ labels: ["label1", "label2"]
+ status: "success"
+ failed: []
+ '400':
+ description: Invalid payload structure
+ '500':
+ description: Internal server error
+
+ /prob_predict:
+ post:
+ tags:
+ - Predictions
+ summary: Single prediction
+ description: |
+ Perform a single prediction based on query event and condition.
+ operationId: probabilityPredict
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - expression
+ - dataset_id
+ properties:
+ expression:
+ type: string
+ description: |
+ Prediction expression in format "query_event|query_condition"
+ Example: "GENERATION:genbb|PARTICIPATION_TYPE:type1"
+ dataset_id:
+ type: string
+ description: ID of the dataset to query
+ example:
+ expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1"
+ dataset_id: "dataset789"
+ responses:
+ '200':
+ description: Prediction result
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ output:
+ type: string
+ description: Prediction output
+ example:
+ output: "Predicted Features: X\nAlgorithms = Random Forest\n('likelihood', 0.85, ['label1', 'label2'])"
+ '400':
+ description: Invalid expression or missing parameters
+ '500':
+ description: Internal server error
+
+ /discovery:
+ post:
+ tags:
+ - Discovery
+ summary: Data discovery operations
+ description: |
+ Perform various data discovery operations (association, influencer, anomaly, polymaly).
+ operationId: discovery
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ required:
+ - expression
+ - dataset_id
+ - discovery_type
+ properties:
+ expression:
+ type: string
+ description: |
+ Expression for discovery in format "query_event|query_condition"
+ dataset_id:
+ type: string
+ description: ID of the dataset to query
+ discovery_type:
+ type: string
+ enum: [association, influencer, anomaly, polymaly]
+ description: Type of discovery operation
+ example:
+ expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1"
+ dataset_id: "dataset789"
+ discovery_type: "association"
+ responses:
+ '200':
+ description: Discovery results
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ output:
+ type: string
+ description: Discovery operation output
+ example:
+ output: "Discovery results..."
+ '400':
+ description: Invalid expression or missing parameters
+ '500':
+ description: Internal server error
+
+ /recommend-item:
+ post:
+ tags:
+ - Recommendations
+ summary: Recommend items
+ description: |
+ Get item recommendations based on input data.
+ Forwards request to recommendation service.
+ operationId: recommendItem
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ # Define based on recommender API requirements
+ user_id:
+ type: string
+ description: User ID for recommendations
+ item_ids:
+ type: array
+ items:
+ type: string
+ description: List of item IDs
+ preferences:
+ type: object
+ description: User preferences
+ example:
+ user_id: "user123"
+ item_ids: ["item1", "item2", "item3"]
+ preferences:
+ category: "electronics"
+ price_range: "medium"
+ responses:
+ '200':
+ description: Recommendation results
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ recommendations:
+ type: array
+ items:
+ type: object
+ properties:
+ item_id:
+ type: string
+ score:
+ type: number
+ example:
+ recommendations:
+ - item_id: "item456"
+ score: 0.95
+ - item_id: "item789"
+ score: 0.87
+ '500':
+ description: Internal server error
+
+ /recommend-by-country:
+ post:
+ tags:
+ - Recommendations
+ summary: Recommend items by country
+ description: |
+ Get item recommendations filtered by country.
+ Forwards request to recommendation service.
+ operationId: recommendByCountry
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ user_id:
+ type: string
+ description: User ID for recommendations
+ country:
+ type: string
+ description: Country code for filtering
+ item_ids:
+ type: array
+ items:
+ type: string
+ description: List of item IDs
+ example:
+ user_id: "user123"
+ country: "US"
+ item_ids: ["item1", "item2", "item3"]
+ responses:
+ '200':
+ description: Recommendation results by country
+ content:
+ application/json:
+ schema:
+ type: object
+ properties:
+ recommendations:
+ type: array
+ items:
+ type: object
+ properties:
+ item_id:
+ type: string
+ score:
+ type: number
+ country_specific:
+ type: boolean
+ example:
+ recommendations:
+ - item_id: "item456"
+ score: 0.92
+ country_specific: true
+ - item_id: "item789"
+ score: 0.85
+ country_specific: false
+ '500':
+ description: Internal server error
+
+components:
+ schemas:
+ Error:
+ type: object
+ properties:
+ error:
+ type: string
+ description: Error message
+ example:
+ error: "Invalid expression format"
+
+ ValidationResult:
+ type: object
+ properties:
+ is_valid:
+ type: boolean
+ excluded_cols:
+ type: array
+ items:
+ type: array
+ items:
+ type: string
+
+ securitySchemes:
+ BearerAuth:
+ type: http
+ scheme: bearer
+ bearerFormat: JWT
+
+security:
+ - BearerAuth: []