commit a8bd5ace1e2184b1f6990245cf3e6bfbdce55523 Author: ghaymah_dev Date: Fri Apr 24 09:02:46 2026 +0000 Add swagger.yaml diff --git a/swagger.yaml b/swagger.yaml new file mode 100644 index 0000000..ef5e2b1 --- /dev/null +++ b/swagger.yaml @@ -0,0 +1,728 @@ +openapi: 3.0.0 +info: + title: ProbLab Pipeline API + description: | + بسم الله الرحمن الرحيم + + API for ProbLab data processing, probability computations, and machine learning operations. + This API provides endpoints for dataset validation, processing, probability calculations, + predictions, and data discovery operations. + version: 1.0.0 + contact: + name: ProbLab Team + email: support@problab.com + license: + name: Proprietary + url: https://problab.com/license + +servers: + - url: https://problab-api-0004c00ee319.hosted.ghaymah.systems + description: Production server + - url: http://localhost:5000 + description: Local development server + +tags: + - name: Dataset Management + description: Endpoints for dataset validation and processing + - name: Probability Computation + description: Endpoints for probability calculations and queries + - name: Predictions + description: Endpoints for batch and single predictions + - name: Discovery + description: Endpoints for data discovery operations + - name: Visualization + description: Endpoints for probability plots and visualizations + - name: Recommendations + description: Endpoints for recommendation systems + +paths: + /validate_dataset: + post: + tags: + - Dataset Management + summary: Validate a dataset + description: | + Validate a dataset file and return excluded columns based on data characteristics. + Uploads a file and validates it against various criteria. + operationId: validateDataset + requestBody: + required: true + content: + multipart/form-data: + schema: + type: object + required: + - file + - hasHeader + - dataType + - delimiter + properties: + file: + type: string + format: binary + description: Dataset file to validate (CSV, Excel, JSON, or TXT) + hasHeader: + type: string + enum: [true, false] + description: Whether the dataset has a header row + dataType: + type: string + enum: [csv, xlsx, json, txt] + description: Type of the dataset file + delimiter: + type: string + description: Column delimiter for CSV files (e.g., ',', ';', '\t') + sheetName: + type: string + description: Sheet name for Excel files (optional) + responses: + '200': + description: Dataset validation result + content: + application/json: + schema: + type: object + properties: + is_valid: + type: boolean + description: Whether the dataset is valid + excluded_cols: + type: array + items: + type: array + items: + type: string + description: List of excluded columns with their indices + example: + is_valid: true + excluded_cols: [["column_name", 5]] + '400': + description: Invalid request or missing parameters + '500': + description: Internal server error + + /process_dataset: + post: + tags: + - Dataset Management + summary: Process a dataset + description: | + Process a dataset from uploaded file data, discover schema, encode it, and store results locally. + This endpoint is typically triggered by webhook events. + operationId: processDataset + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - event + properties: + event: + type: object + properties: + data: + type: object + properties: + new: + type: object + properties: + id: + type: string + description: Dataset ID + file_data: + type: string + format: base64 + description: Base64 encoded file data + file_name: + type: string + description: Name of the uploaded file + hasHeader: + type: boolean + description: Whether the dataset has a header row + delimiter: + type: string + description: Column delimiter for CSV files + sheetName: + type: string + description: Sheet name for Excel files + example: + event: + data: + new: + id: "dataset123" + file_data: "base64-encoded-file-data-here" + file_name: "sample.csv" + hasHeader: true + delimiter: "," + sheetName: "Sheet1" + responses: + '200': + description: Dataset processed successfully + content: + text/plain: + schema: + type: string + description: Local file path of the processed file + example: "./datasets/dataset123/data.bit" + '400': + description: No file data provided or invalid request + '500': + description: Internal server error + + /compute_expr: + post: + tags: + - Probability Computation + summary: Compute probability expression + description: | + Compute probability for a given expression on a dataset. + Supports both conditional and joint probability queries. + operationId: computeExpression + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - expression + - dataset_id + properties: + expression: + type: string + description: | + Probability expression in format "query_event|query_condition" + Example: "GENERATION:genbb|PARTICIPATION_TYPE:type1" + Where query_event is "key1:value1;key2:value2" and query_condition is optional + dataset_id: + type: string + description: ID of the dataset to query + example: + expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1" + dataset_id: "dataset789" + responses: + '200': + description: Probability computation result + content: + application/json: + schema: + type: object + properties: + output: + type: string + description: Probability result or error message + example: + output: "0.75" + '400': + description: Invalid expression or missing parameters + '500': + description: Internal server error + + /table_prob: + post: + tags: + - Probability Computation + summary: Generate probability table + description: | + Generate a probability table for given variables. + Creates conditional probability tables for visualization. + operationId: tableProbability + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - expression + - dataset_id + properties: + expression: + type: string + description: | + Expression defining column and row variables + Format: "column_vars|row_vars" + Example: "GENERATION:genbb|PARTICIPATION_TYPE:type1" + dataset_id: + type: string + description: ID of the dataset to query + example: + expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1" + dataset_id: "dataset789" + responses: + '200': + description: Probability table result + content: + application/json: + schema: + type: object + properties: + output: + type: string + description: Table data in string format + example: + output: "[[0.1,0.2],[0.3,0.4]]" + '400': + description: Invalid expression or missing parameters + '500': + description: Internal server error + + /prob_plot: + post: + tags: + - Visualization + summary: Generate probability plot + description: | + Generate a plot visualization for probability distributions. + Creates various plot types based on probability data. + operationId: probabilityPlot + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - expression + - dataset_id + - plot_type + properties: + expression: + type: string + description: | + Expression defining variables for plotting + Format: "column_vars|row_vars" + dataset_id: + type: string + description: ID of the dataset to query + plot_type: + type: string + description: Type of plot to generate + enum: [bar, line, scatter, heatmap] + example: + expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1" + dataset_id: "dataset789" + plot_type: "bar" + responses: + '200': + description: Plot generation result + content: + application/json: + schema: + type: object + properties: + output: + type: string + description: Plot data or file reference + example: + output: "Plot generated successfully" + '400': + description: Invalid expression or missing parameters + '500': + description: Internal server error + + /describe_prob: + post: + tags: + - Discovery + summary: Describe probability distribution + description: | + Get descriptive statistics and information about a variable's probability distribution. + operationId: describeProbability + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - expression + - dataset_id + - command_type + properties: + expression: + type: string + description: | + Expression containing the variable to describe + Format: "variable_name:value" + dataset_id: + type: string + description: ID of the dataset to query + command_type: + type: string + enum: [describe, summary] + description: Type of description command + example: + expression: "GENERATION:genbb" + dataset_id: "dataset789" + command_type: "describe" + responses: + '200': + description: Description result + content: + application/json: + schema: + type: object + properties: + output: + type: string + description: Descriptive statistics + example: + output: "Variable statistics..." + '400': + description: Invalid expression or missing parameters + '500': + description: Internal server error + + /batch_pred: + post: + tags: + - Predictions + summary: Batch predictions + description: | + Perform batch predictions on multiple expressions. + Processes multiple prediction expressions in parallel. + operationId: batchPredict + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - expressions + - dataset_id + properties: + expressions: + type: array + items: + type: string + description: | + Array of prediction expressions + Each expression should be in format "query_event|query_condition" + example: ["GENERATION:genbb|PARTICIPATION_TYPE:type1", "GENERATION:gencc|PARTICIPATION_TYPE:type2"] + dataset_id: + type: string + description: ID of the dataset to query + example: + expressions: ["GENERATION:genbb|PARTICIPATION_TYPE:type1", "GENERATION:gencc|PARTICIPATION_TYPE:type2"] + dataset_id: "dataset789" + responses: + '200': + description: Batch prediction results + content: + application/json: + schema: + type: object + properties: + processed: + type: integer + description: Number of expressions processed + successful: + type: array + items: + type: object + properties: + expression: + type: string + result: + type: object + properties: + final_prediction: + type: string + algorithm: + type: string + likelihood: + type: object + properties: + probability: + type: number + labels: + type: array + items: + type: string + status: + type: string + enum: [success] + failed: + type: array + items: + type: object + properties: + expression: + type: string + error: + type: string + status: + type: string + enum: [failed] + example: + processed: 2 + successful: + - expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1" + result: + final_prediction: "Feature X" + algorithm: "Random Forest" + likelihood: + probability: 0.85 + labels: ["label1", "label2"] + status: "success" + failed: [] + '400': + description: Invalid payload structure + '500': + description: Internal server error + + /prob_predict: + post: + tags: + - Predictions + summary: Single prediction + description: | + Perform a single prediction based on query event and condition. + operationId: probabilityPredict + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - expression + - dataset_id + properties: + expression: + type: string + description: | + Prediction expression in format "query_event|query_condition" + Example: "GENERATION:genbb|PARTICIPATION_TYPE:type1" + dataset_id: + type: string + description: ID of the dataset to query + example: + expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1" + dataset_id: "dataset789" + responses: + '200': + description: Prediction result + content: + application/json: + schema: + type: object + properties: + output: + type: string + description: Prediction output + example: + output: "Predicted Features: X\nAlgorithms = Random Forest\n('likelihood', 0.85, ['label1', 'label2'])" + '400': + description: Invalid expression or missing parameters + '500': + description: Internal server error + + /discovery: + post: + tags: + - Discovery + summary: Data discovery operations + description: | + Perform various data discovery operations (association, influencer, anomaly, polymaly). + operationId: discovery + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - expression + - dataset_id + - discovery_type + properties: + expression: + type: string + description: | + Expression for discovery in format "query_event|query_condition" + dataset_id: + type: string + description: ID of the dataset to query + discovery_type: + type: string + enum: [association, influencer, anomaly, polymaly] + description: Type of discovery operation + example: + expression: "GENERATION:genbb|PARTICIPATION_TYPE:type1" + dataset_id: "dataset789" + discovery_type: "association" + responses: + '200': + description: Discovery results + content: + application/json: + schema: + type: object + properties: + output: + type: string + description: Discovery operation output + example: + output: "Discovery results..." + '400': + description: Invalid expression or missing parameters + '500': + description: Internal server error + + /recommend-item: + post: + tags: + - Recommendations + summary: Recommend items + description: | + Get item recommendations based on input data. + Forwards request to recommendation service. + operationId: recommendItem + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + # Define based on recommender API requirements + user_id: + type: string + description: User ID for recommendations + item_ids: + type: array + items: + type: string + description: List of item IDs + preferences: + type: object + description: User preferences + example: + user_id: "user123" + item_ids: ["item1", "item2", "item3"] + preferences: + category: "electronics" + price_range: "medium" + responses: + '200': + description: Recommendation results + content: + application/json: + schema: + type: object + properties: + recommendations: + type: array + items: + type: object + properties: + item_id: + type: string + score: + type: number + example: + recommendations: + - item_id: "item456" + score: 0.95 + - item_id: "item789" + score: 0.87 + '500': + description: Internal server error + + /recommend-by-country: + post: + tags: + - Recommendations + summary: Recommend items by country + description: | + Get item recommendations filtered by country. + Forwards request to recommendation service. + operationId: recommendByCountry + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + user_id: + type: string + description: User ID for recommendations + country: + type: string + description: Country code for filtering + item_ids: + type: array + items: + type: string + description: List of item IDs + example: + user_id: "user123" + country: "US" + item_ids: ["item1", "item2", "item3"] + responses: + '200': + description: Recommendation results by country + content: + application/json: + schema: + type: object + properties: + recommendations: + type: array + items: + type: object + properties: + item_id: + type: string + score: + type: number + country_specific: + type: boolean + example: + recommendations: + - item_id: "item456" + score: 0.92 + country_specific: true + - item_id: "item789" + score: 0.85 + country_specific: false + '500': + description: Internal server error + +components: + schemas: + Error: + type: object + properties: + error: + type: string + description: Error message + example: + error: "Invalid expression format" + + ValidationResult: + type: object + properties: + is_valid: + type: boolean + excluded_cols: + type: array + items: + type: array + items: + type: string + + securitySchemes: + BearerAuth: + type: http + scheme: bearer + bearerFormat: JWT + +security: + - BearerAuth: []