ComputerVision - Synapseml

import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; import DocTable from "@theme/DocumentationTable";

Computer Vision

OCR

python

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))

df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", ),
    ], ["url", ])

ocr = (OCR()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setImageUrlCol("url")
        .setDetectOrientation(True)
        .setOutputCol("ocr"))

ocr.transform(df).show()

</TabItem> <TabItem value="scala">

scala

import com.microsoft.azure.synapse.ml.services.vision.OCR
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg"
).toDF("url")


val ocr = (new OCR()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setDetectOrientation(true)
  .setOutputCol("ocr"))

ocr.transform(df).show()

</TabItem> </Tabs>

AnalyzeImage

python

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", "en"),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", None),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", "en")
    ], ["image", "language"])


ai = (AnalyzeImage()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setImageUrlCol("image")
        .setLanguageCol("language")
        .setVisualFeatures(["Categories", "Tags", "Description", "Faces", "ImageType", "Color", "Adult", "Objects", "Brands"])
        .setDetails(["Celebrities", "Landmarks"])
        .setOutputCol("features"))

ai.transform(df).show()

</TabItem> <TabItem value="scala">

scala

import com.microsoft.azure.synapse.ml.services.vision.AnalyzeImage
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", "en"),
  ("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", null),
  ("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", "en")
).toDF("url", "language")

val ai = (new AnalyzeImage()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setLanguageCol("language")
  .setVisualFeatures(Seq("Categories", "Tags", "Description", "Faces", "ImageType", "Color", "Adult", "Objects", "Brands"))
  .setDetails(Seq("Celebrities", "Landmarks"))
  .setOutputCol("features"))

ai.transform(df).select("url", "features").show()

</TabItem> </Tabs>

RecognizeText

python

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", ),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", ),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", )
    ], ["url", ])

rt = (RecognizeText()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setImageUrlCol("url")
        .setMode("Printed")
        .setOutputCol("ocr")
        .setConcurrency(5))

rt.transform(df).show()

</TabItem> <TabItem value="scala">

scala

import com.microsoft.azure.synapse.ml.services.vision.RecognizeText
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png",
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png"
).toDF("url")

val rt = (new RecognizeText()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setMode("Printed")
  .setOutputCol("ocr")
  .setConcurrency(5))

rt.transform(df).show()

</TabItem> </Tabs>

ReadImage

python

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", ),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", ),
        ("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", )
    ], ["url", ])

ri = (ReadImage()
    .setSubscriptionKey(cognitiveKey)
    .setLocation("eastus")
    .setImageUrlCol("url")
    .setOutputCol("ocr")
    .setConcurrency(5))

ri.transform(df).show()

</TabItem> <TabItem value="scala">

scala

import com.microsoft.azure.synapse.ml.services.vision.ReadImage
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg",
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png",
  "https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png"
).toDF("url")

val ri = (new ReadImage()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("ocr")
  .setConcurrency(5))

ri.transform(df).show()

</TabItem> </Tabs>

RecognizeDomainSpecificContent

python

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg", )
    ], ["url", ])

celeb = (RecognizeDomainSpecificContent()
        .setSubscriptionKey(cognitiveKey)
        .setModel("celebrities")
        .setLocation("eastus")
        .setImageUrlCol("url")
        .setOutputCol("celebs"))

celeb.transform(df).show()

</TabItem> <TabItem value="scala">

scala

import com.microsoft.azure.synapse.ml.services.vision.RecognizeDomainSpecificContent
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg"
).toDF("url")

val celeb = (new RecognizeDomainSpecificContent()
  .setSubscriptionKey(cognitiveKey)
  .setModel("celebrities")
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("celebs"))

celeb.transform(df).show()

</TabItem> </Tabs>

GenerateThumbnails

python

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg", )
    ], ["url", ])

gt = (GenerateThumbnails()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setHeight(50)
        .setWidth(50)
        .setSmartCropping(True)
        .setImageUrlCol("url")
        .setOutputCol("thumbnails"))

gt.transform(df).show()

</TabItem> <TabItem value="scala">

scala

import com.microsoft.azure.synapse.ml.services.vision.GenerateThumbnails
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
).toDF("url")

val gt = (new GenerateThumbnails()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setHeight(50)
  .setWidth(50)
  .setSmartCropping(true)
  .setImageUrlCol("url")
  .setOutputCol("thumbnails"))

gt.transform(df).show()

</TabItem> </Tabs>

TagImage

python

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg", )
    ], ["url", ])

ti = (TagImage()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setImageUrlCol("url")
        .setOutputCol("tags"))

ti.transform(df).show()

</TabItem> <TabItem value="scala">

scala

import com.microsoft.azure.synapse.ml.services.vision.TagImage
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
).toDF("url")

val ti = (new TagImage()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setImageUrlCol("url")
  .setOutputCol("tags"))

ti.transform(df).show()

</TabItem> </Tabs>

DescribeImage

python

from synapse.ml.services import *

cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
        ("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg", )
    ], ["url", ])

di = (DescribeImage()
        .setSubscriptionKey(cognitiveKey)
        .setLocation("eastus")
        .setMaxCandidates(3)
        .setImageUrlCol("url")
        .setOutputCol("descriptions"))

di.transform(df).show()

</TabItem> <TabItem value="scala">

scala

import com.microsoft.azure.synapse.ml.services.vision.DescribeImage
import spark.implicits._

val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
  "https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
).toDF("url")

val di = (new DescribeImage()
  .setSubscriptionKey(cognitiveKey)
  .setLocation("eastus")
  .setMaxCandidates(3)
  .setImageUrlCol("url")
  .setOutputCol("descriptions"))

di.transform(df).show()

</TabItem> </Tabs>